mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 06:16:18 +00:00
Compare commits
4 commits
076abab943
...
fbf3cb8d88
Author | SHA1 | Date | |
---|---|---|---|
fbf3cb8d88 | |||
41c299a8ce | |||
4f3038eee9 | |||
d3befb157b |
13 changed files with 521 additions and 20 deletions
2
.github/workflows/sepa-precios-archiver.yml
vendored
2
.github/workflows/sepa-precios-archiver.yml
vendored
|
@ -2,7 +2,7 @@ name: Sepa Precios Archiver
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "0 */12 * * *" # Run every 6 hours
|
- cron: "0 */3 * * *" # Run every 3 hours
|
||||||
workflow_dispatch: # Allow manual trigger
|
workflow_dispatch: # Allow manual trigger
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
BIN
sepa/bun.lockb
BIN
sepa/bun.lockb
Binary file not shown.
|
@ -4,6 +4,7 @@
|
||||||
"workspaces": [
|
"workspaces": [
|
||||||
"sepa-precios-archiver",
|
"sepa-precios-archiver",
|
||||||
"sepa-precios-importer",
|
"sepa-precios-importer",
|
||||||
|
"sepa-index-gen",
|
||||||
"ckan"
|
"ckan"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
175
sepa/sepa-index-gen/.gitignore
vendored
Normal file
175
sepa/sepa-index-gen/.gitignore
vendored
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
|
||||||
|
logs
|
||||||
|
_.log
|
||||||
|
npm-debug.log_
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
lerna-debug.log*
|
||||||
|
.pnpm-debug.log*
|
||||||
|
|
||||||
|
# Caches
|
||||||
|
|
||||||
|
.cache
|
||||||
|
|
||||||
|
# Diagnostic reports (https://nodejs.org/api/report.html)
|
||||||
|
|
||||||
|
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
||||||
|
|
||||||
|
# Runtime data
|
||||||
|
|
||||||
|
pids
|
||||||
|
_.pid
|
||||||
|
_.seed
|
||||||
|
*.pid.lock
|
||||||
|
|
||||||
|
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||||
|
|
||||||
|
lib-cov
|
||||||
|
|
||||||
|
# Coverage directory used by tools like istanbul
|
||||||
|
|
||||||
|
coverage
|
||||||
|
*.lcov
|
||||||
|
|
||||||
|
# nyc test coverage
|
||||||
|
|
||||||
|
.nyc_output
|
||||||
|
|
||||||
|
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||||
|
|
||||||
|
.grunt
|
||||||
|
|
||||||
|
# Bower dependency directory (https://bower.io/)
|
||||||
|
|
||||||
|
bower_components
|
||||||
|
|
||||||
|
# node-waf configuration
|
||||||
|
|
||||||
|
.lock-wscript
|
||||||
|
|
||||||
|
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||||
|
|
||||||
|
build/Release
|
||||||
|
|
||||||
|
# Dependency directories
|
||||||
|
|
||||||
|
node_modules/
|
||||||
|
jspm_packages/
|
||||||
|
|
||||||
|
# Snowpack dependency directory (https://snowpack.dev/)
|
||||||
|
|
||||||
|
web_modules/
|
||||||
|
|
||||||
|
# TypeScript cache
|
||||||
|
|
||||||
|
*.tsbuildinfo
|
||||||
|
|
||||||
|
# Optional npm cache directory
|
||||||
|
|
||||||
|
.npm
|
||||||
|
|
||||||
|
# Optional eslint cache
|
||||||
|
|
||||||
|
.eslintcache
|
||||||
|
|
||||||
|
# Optional stylelint cache
|
||||||
|
|
||||||
|
.stylelintcache
|
||||||
|
|
||||||
|
# Microbundle cache
|
||||||
|
|
||||||
|
.rpt2_cache/
|
||||||
|
.rts2_cache_cjs/
|
||||||
|
.rts2_cache_es/
|
||||||
|
.rts2_cache_umd/
|
||||||
|
|
||||||
|
# Optional REPL history
|
||||||
|
|
||||||
|
.node_repl_history
|
||||||
|
|
||||||
|
# Output of 'npm pack'
|
||||||
|
|
||||||
|
*.tgz
|
||||||
|
|
||||||
|
# Yarn Integrity file
|
||||||
|
|
||||||
|
.yarn-integrity
|
||||||
|
|
||||||
|
# dotenv environment variable files
|
||||||
|
|
||||||
|
.env
|
||||||
|
.env.development.local
|
||||||
|
.env.test.local
|
||||||
|
.env.production.local
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# parcel-bundler cache (https://parceljs.org/)
|
||||||
|
|
||||||
|
.parcel-cache
|
||||||
|
|
||||||
|
# Next.js build output
|
||||||
|
|
||||||
|
.next
|
||||||
|
out
|
||||||
|
|
||||||
|
# Nuxt.js build / generate output
|
||||||
|
|
||||||
|
.nuxt
|
||||||
|
dist
|
||||||
|
|
||||||
|
# Gatsby files
|
||||||
|
|
||||||
|
# Comment in the public line in if your project uses Gatsby and not Next.js
|
||||||
|
|
||||||
|
# https://nextjs.org/blog/next-9-1#public-directory-support
|
||||||
|
|
||||||
|
# public
|
||||||
|
|
||||||
|
# vuepress build output
|
||||||
|
|
||||||
|
.vuepress/dist
|
||||||
|
|
||||||
|
# vuepress v2.x temp and cache directory
|
||||||
|
|
||||||
|
.temp
|
||||||
|
|
||||||
|
# Docusaurus cache and generated files
|
||||||
|
|
||||||
|
.docusaurus
|
||||||
|
|
||||||
|
# Serverless directories
|
||||||
|
|
||||||
|
.serverless/
|
||||||
|
|
||||||
|
# FuseBox cache
|
||||||
|
|
||||||
|
.fusebox/
|
||||||
|
|
||||||
|
# DynamoDB Local files
|
||||||
|
|
||||||
|
.dynamodb/
|
||||||
|
|
||||||
|
# TernJS port file
|
||||||
|
|
||||||
|
.tern-port
|
||||||
|
|
||||||
|
# Stores VSCode versions used for testing VSCode extensions
|
||||||
|
|
||||||
|
.vscode-test
|
||||||
|
|
||||||
|
# yarn v2
|
||||||
|
|
||||||
|
.yarn/cache
|
||||||
|
.yarn/unplugged
|
||||||
|
.yarn/build-state.yml
|
||||||
|
.yarn/install-state.gz
|
||||||
|
.pnp.*
|
||||||
|
|
||||||
|
# IntelliJ based IDEs
|
||||||
|
.idea
|
||||||
|
|
||||||
|
# Finder (MacOS) folder config
|
||||||
|
.DS_Store
|
15
sepa/sepa-index-gen/README.md
Normal file
15
sepa/sepa-index-gen/README.md
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
# sepa-index-gen
|
||||||
|
|
||||||
|
To install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun install
|
||||||
|
```
|
||||||
|
|
||||||
|
To run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bun run index.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
This project was created using `bun init` in bun v1.1.26. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
|
67
sepa/sepa-index-gen/b2.ts
Normal file
67
sepa/sepa-index-gen/b2.ts
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
import {
|
||||||
|
S3Client,
|
||||||
|
HeadObjectCommand,
|
||||||
|
ListObjectsV2Command,
|
||||||
|
GetObjectCommand,
|
||||||
|
} from "@aws-sdk/client-s3";
|
||||||
|
import { Upload } from "@aws-sdk/lib-storage";
|
||||||
|
|
||||||
|
function checkEnvVariable(variableName: string) {
|
||||||
|
const value = process.env[variableName];
|
||||||
|
if (value) {
|
||||||
|
console.log(`✅ ${variableName} is set`);
|
||||||
|
return value;
|
||||||
|
} else {
|
||||||
|
console.log(`❌ ${variableName} is not set`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
export const B2_BUCKET_NAME = checkEnvVariable("B2_BUCKET_NAME");
|
||||||
|
const B2_BUCKET_KEY_ID = checkEnvVariable("B2_BUCKET_KEY_ID");
|
||||||
|
const B2_BUCKET_KEY = checkEnvVariable("B2_BUCKET_KEY");
|
||||||
|
export const s3 = new S3Client({
|
||||||
|
endpoint: "https://s3.us-west-004.backblazeb2.com",
|
||||||
|
region: "us-west-004",
|
||||||
|
credentials: {
|
||||||
|
accessKeyId: B2_BUCKET_KEY_ID,
|
||||||
|
secretAccessKey: B2_BUCKET_KEY,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
export async function listDirectory(directoryName: string) {
|
||||||
|
const command = new ListObjectsV2Command({
|
||||||
|
Bucket: B2_BUCKET_NAME,
|
||||||
|
Prefix: directoryName ? `${directoryName}/` : "",
|
||||||
|
Delimiter: "/",
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await s3.send(command);
|
||||||
|
return response.Contents?.map((item) => item.Key ?? "") ?? [];
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getFileContent(fileName: string) {
|
||||||
|
const fileContent = await s3.send(
|
||||||
|
new GetObjectCommand({
|
||||||
|
Bucket: B2_BUCKET_NAME,
|
||||||
|
Key: fileName,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
return (await fileContent.Body?.transformToString()) ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function checkFileExists(fileName: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await s3.send(
|
||||||
|
new HeadObjectCommand({
|
||||||
|
Bucket: B2_BUCKET_NAME,
|
||||||
|
Key: fileName,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
if ((error as any).name === "NotFound") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
53
sepa/sepa-index-gen/index-resources.ts
Normal file
53
sepa/sepa-index-gen/index-resources.ts
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import { zDatasetInfo, type DatasetInfo, type Resource } from "ckan/schemas";
|
||||||
|
import { getFileContent, listDirectory } from "./b2";
|
||||||
|
|
||||||
|
import { basename } from "path";
|
||||||
|
|
||||||
|
export async function indexResources() {
|
||||||
|
const fileNames = await listDirectory("timestamped-metadata");
|
||||||
|
|
||||||
|
let datasetInfos = new Map<string, Map<string, Resource>>();
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
fileNames.map(async (fileName) => {
|
||||||
|
let resources = new Map<string, Resource>();
|
||||||
|
|
||||||
|
const fileContentText = await getFileContent(fileName);
|
||||||
|
const json = JSON.parse(fileContentText);
|
||||||
|
if (!json.success) {
|
||||||
|
console.log(`skipping ${fileName} because of ${json.error.message}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let datasetInfo: DatasetInfo;
|
||||||
|
try {
|
||||||
|
datasetInfo = zDatasetInfo.parse(json.result);
|
||||||
|
} catch (e) {
|
||||||
|
console.log(fileName, e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const resource of datasetInfo.resources) {
|
||||||
|
const id = `${resource.id}-${resource.revision_id}`;
|
||||||
|
const existingResource = resources.get(id);
|
||||||
|
if (
|
||||||
|
existingResource &&
|
||||||
|
existingResource.modified &&
|
||||||
|
existingResource.modified > (resource.modified ?? 0)
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
resources.set(id, resource);
|
||||||
|
}
|
||||||
|
const base = basename(fileName, ".json");
|
||||||
|
datasetInfos.set(base, resources);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
return Array.from(datasetInfos.entries()).reduce(
|
||||||
|
(acc, [fileName, resources]) => {
|
||||||
|
acc[fileName] = Array.from(resources.values());
|
||||||
|
return acc;
|
||||||
|
},
|
||||||
|
{} as Record<string, Resource[]>
|
||||||
|
);
|
||||||
|
}
|
141
sepa/sepa-index-gen/index.ts
Normal file
141
sepa/sepa-index-gen/index.ts
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
import { zResource, type Resource } from "ckan/schemas";
|
||||||
|
import { z } from "zod";
|
||||||
|
import { listDirectory } from "./b2";
|
||||||
|
import { isSameDay } from "date-fns";
|
||||||
|
import { indexResources } from "./index-resources";
|
||||||
|
|
||||||
|
export async function generateMarkdown() {
|
||||||
|
const resourcesIndex = await indexResources();
|
||||||
|
|
||||||
|
const datasets = z
|
||||||
|
.record(z.string(), z.array(zResource))
|
||||||
|
.parse(resourcesIndex);
|
||||||
|
const datasetsArray = Object.entries(datasets)
|
||||||
|
.map(([date, resources]) => ({
|
||||||
|
date: new Date(date),
|
||||||
|
resources,
|
||||||
|
}))
|
||||||
|
.sort((a, b) => +b.date - +a.date);
|
||||||
|
|
||||||
|
let latestResources = new Map<string, Resource>();
|
||||||
|
|
||||||
|
for (const { date, resources } of datasetsArray) {
|
||||||
|
for (const resource of resources) {
|
||||||
|
const id = `${resource.id}-revID-${resource.revision_id}`;
|
||||||
|
if (latestResources.has(id)) continue;
|
||||||
|
latestResources.set(id, resource);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDate(resource: Resource) {
|
||||||
|
{
|
||||||
|
const matches = resource.name.match(/precios_(\d{4})(\d{2})(\d{2})/);
|
||||||
|
if (matches) {
|
||||||
|
return new Date(
|
||||||
|
parseInt(matches[1]),
|
||||||
|
parseInt(matches[2]) - 1,
|
||||||
|
parseInt(matches[3])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const matches = resource.description.match(
|
||||||
|
/(?<day>\d{2})\/(?<month>\d{2})\/(?<year>\d{4})|(?<year2>\d{4})-(?<month2>\d{2})-(?<day2>\d{2})/
|
||||||
|
);
|
||||||
|
if (matches) {
|
||||||
|
const { day, month, year, day2, month2, year2 } = matches.groups!;
|
||||||
|
return new Date(
|
||||||
|
parseInt(year || year2),
|
||||||
|
parseInt(month || month2) - 1,
|
||||||
|
parseInt(day || day2)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.debug(resource);
|
||||||
|
throw new Error(`No date found for ${resource.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileList = await listDirectory("");
|
||||||
|
|
||||||
|
const zipResources = [...latestResources.values()].filter(
|
||||||
|
(r) => r.format === "ZIP"
|
||||||
|
);
|
||||||
|
const dates = [
|
||||||
|
...new Set(
|
||||||
|
zipResources.map((r) => getDate(r).toISOString().split("T")[0]).sort()
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
// check if dates are missing in between min and max date
|
||||||
|
const minDate = new Date(
|
||||||
|
Math.min(...[...dates].map((d) => new Date(d).getTime()))
|
||||||
|
);
|
||||||
|
const maxDate = new Date(
|
||||||
|
Math.max(...[...dates].map((d) => new Date(d).getTime()))
|
||||||
|
);
|
||||||
|
for (let d = minDate; d <= maxDate; d.setDate(d.getDate() + 1)) {
|
||||||
|
const dateStr = d.toISOString().split("T")[0];
|
||||||
|
if (!dates.includes(dateStr)) dates.push(dateStr);
|
||||||
|
}
|
||||||
|
dates.sort();
|
||||||
|
|
||||||
|
function getWeekDay(searchIn: string) {
|
||||||
|
if (/domingo/iu.test(searchIn)) return 0;
|
||||||
|
if (/lunes/iu.test(searchIn)) return 1;
|
||||||
|
if (/martes/iu.test(searchIn)) return 2;
|
||||||
|
if (/mi[eé]rcoles/iu.test(searchIn)) return 3;
|
||||||
|
if (/jueves/iu.test(searchIn)) return 4;
|
||||||
|
if (/viernes/iu.test(searchIn)) return 5;
|
||||||
|
if (/s[aá]bado/iu.test(searchIn)) return 6;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
function getWeekDayInResource(resource: Resource) {
|
||||||
|
return getWeekDay(resource.description) ?? getWeekDay(resource.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
let markdown = `# index de archivo de datasets de precios SEPA
|
||||||
|
|
||||||
|
esto esta automáticamente generado por sepa-index-gen dentro de preciazo.`;
|
||||||
|
|
||||||
|
const formatter = Intl.DateTimeFormat("es-AR", {
|
||||||
|
year: "numeric",
|
||||||
|
month: "2-digit",
|
||||||
|
day: "2-digit",
|
||||||
|
weekday: "long",
|
||||||
|
});
|
||||||
|
const dateTimeFormatter = Intl.DateTimeFormat("es-AR", {
|
||||||
|
year: "numeric",
|
||||||
|
month: "2-digit",
|
||||||
|
day: "2-digit",
|
||||||
|
hour: "2-digit",
|
||||||
|
minute: "2-digit",
|
||||||
|
});
|
||||||
|
for (const dateStr of dates) {
|
||||||
|
const date = new Date(dateStr);
|
||||||
|
markdown += `\n* ${formatter.format(date)}:`;
|
||||||
|
const resourcesInDate = zipResources.filter((r) =>
|
||||||
|
isSameDay(getDate(r), date)
|
||||||
|
);
|
||||||
|
if (!resourcesInDate.length) {
|
||||||
|
markdown += " ❌ no tengo recursos para esta fecha";
|
||||||
|
}
|
||||||
|
for (const resource of resourcesInDate) {
|
||||||
|
const id = `${resource.id}-revID-${resource.revision_id}`;
|
||||||
|
const fileExists = fileList.find((file) => file.startsWith(id));
|
||||||
|
const link =
|
||||||
|
fileExists ??
|
||||||
|
`https://f004.backblazeb2.com/file/precios-justos-datasets/${fileExists}`;
|
||||||
|
let warnings = "";
|
||||||
|
if (
|
||||||
|
getWeekDayInResource(resource) &&
|
||||||
|
date.getDay() !== getWeekDayInResource(resource)
|
||||||
|
) {
|
||||||
|
warnings +=
|
||||||
|
"⁉️⚠️ dia de semana incorrecto, puede haberse subido incorrectamente ";
|
||||||
|
}
|
||||||
|
markdown += `\n * ${id} ${warnings} ${fileExists ? `[✅ descargar](${link})` : "❌"} (${dateTimeFormatter.format(resource.modified)})`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return markdown;
|
||||||
|
}
|
19
sepa/sepa-index-gen/package.json
Normal file
19
sepa/sepa-index-gen/package.json
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
{
|
||||||
|
"name": "sepa-index-gen",
|
||||||
|
"module": "index.ts",
|
||||||
|
"type": "module",
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/bun": "latest",
|
||||||
|
"prettier": "^3.3.3"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"typescript": "^5.0.0"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@aws-sdk/client-s3": "^3.637.0",
|
||||||
|
"@aws-sdk/lib-storage": "^3.637.0",
|
||||||
|
"ckan": "workspace:*",
|
||||||
|
"date-fns": "^3.6.0",
|
||||||
|
"zod": "^3.23.8"
|
||||||
|
}
|
||||||
|
}
|
27
sepa/sepa-index-gen/tsconfig.json
Normal file
27
sepa/sepa-index-gen/tsconfig.json
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
// Enable latest features
|
||||||
|
"lib": ["ESNext", "DOM"],
|
||||||
|
"target": "ESNext",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleDetection": "force",
|
||||||
|
"jsx": "react-jsx",
|
||||||
|
"allowJs": true,
|
||||||
|
|
||||||
|
// Bundler mode
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"allowImportingTsExtensions": true,
|
||||||
|
"verbatimModuleSyntax": true,
|
||||||
|
"noEmit": true,
|
||||||
|
|
||||||
|
// Best practices
|
||||||
|
"strict": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"noFallthroughCasesInSwitch": true,
|
||||||
|
|
||||||
|
// Some stricter flags (disabled by default)
|
||||||
|
"noUnusedLocals": false,
|
||||||
|
"noUnusedParameters": false,
|
||||||
|
"noPropertyAccessFromIndexSignature": false
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ import { basename, extname, join } from "path";
|
||||||
import { $, write } from "bun";
|
import { $, write } from "bun";
|
||||||
import { S3Client, HeadObjectCommand } from "@aws-sdk/client-s3";
|
import { S3Client, HeadObjectCommand } from "@aws-sdk/client-s3";
|
||||||
import { Upload } from "@aws-sdk/lib-storage";
|
import { Upload } from "@aws-sdk/lib-storage";
|
||||||
|
import { generateMarkdown } from "sepa-index-gen";
|
||||||
|
|
||||||
function checkEnvVariable(variableName: string) {
|
function checkEnvVariable(variableName: string) {
|
||||||
const value = process.env[variableName];
|
const value = process.env[variableName];
|
||||||
|
@ -38,29 +39,25 @@ async function getRawDatasetInfo() {
|
||||||
console.error(
|
console.error(
|
||||||
`❌ Error fetching dataset info`,
|
`❌ Error fetching dataset info`,
|
||||||
error,
|
error,
|
||||||
`retrying in 5min...`,
|
`retrying in 5min...`
|
||||||
);
|
);
|
||||||
await new Promise((resolve) => setTimeout(resolve, 5 * 60 * 1000));
|
await new Promise((resolve) => setTimeout(resolve, 5 * 60 * 1000));
|
||||||
return await getRawDatasetInfo();
|
return await getRawDatasetInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
async function saveFileIntoRepo(fileName: string, fileContent: string) {
|
||||||
async function saveDatasetInfoIntoRepo(datasetInfo: any) {
|
|
||||||
const dir = await mkdtemp("/tmp/sepa-precios-archiver-metadata-repo-");
|
const dir = await mkdtemp("/tmp/sepa-precios-archiver-metadata-repo-");
|
||||||
try {
|
try {
|
||||||
await $`git clone https://catdevnull:${GITHUB_TOKEN}@github.com/catdevnull/sepa-precios-metadata.git ${dir}`;
|
await $`git clone https://catdevnull:${GITHUB_TOKEN}@github.com/catdevnull/sepa-precios-metadata.git ${dir}`;
|
||||||
await writeFile(
|
await writeFile(join(dir, fileName), fileContent);
|
||||||
dir + "/dataset-info.json",
|
|
||||||
JSON.stringify(datasetInfo, null, 2),
|
|
||||||
);
|
|
||||||
await $`cd ${dir} && git add dataset-info.json`;
|
|
||||||
await $`cd ${dir} && git config user.email "git@nulo.in" && git config user.name "github actions"`;
|
await $`cd ${dir} && git config user.email "git@nulo.in" && git config user.name "github actions"`;
|
||||||
await $`cd ${dir} && git diff --staged --quiet || git commit -m "Update dataset info"`;
|
await $`cd ${dir} && git add ${fileName}`;
|
||||||
|
await $`cd ${dir} && git diff --staged --quiet || git commit -m "Update ${fileName}"`;
|
||||||
await $`cd ${dir} && git push origin main`;
|
await $`cd ${dir} && git push origin main`;
|
||||||
} finally {
|
} finally {
|
||||||
await $`rm -rf ${dir}`;
|
await $`rm -rf ${dir}`;
|
||||||
}
|
}
|
||||||
console.log(`✅ Saved dataset info into repo`);
|
console.log(`✅ Saved ${fileName} into repo`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
|
@ -69,7 +66,7 @@ async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
new HeadObjectCommand({
|
new HeadObjectCommand({
|
||||||
Bucket: B2_BUCKET_NAME,
|
Bucket: B2_BUCKET_NAME,
|
||||||
Key: fileName,
|
Key: fileName,
|
||||||
}),
|
})
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -82,7 +79,7 @@ async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
|
|
||||||
async function uploadToB2Bucket(
|
async function uploadToB2Bucket(
|
||||||
fileName: string,
|
fileName: string,
|
||||||
fileContent: ReadableStream | Blob | string,
|
fileContent: ReadableStream | Blob | string
|
||||||
) {
|
) {
|
||||||
const upload = new Upload({
|
const upload = new Upload({
|
||||||
client: s3,
|
client: s3,
|
||||||
|
@ -98,12 +95,15 @@ async function uploadToB2Bucket(
|
||||||
|
|
||||||
const rawDatasetInfo = await getRawDatasetInfo();
|
const rawDatasetInfo = await getRawDatasetInfo();
|
||||||
|
|
||||||
await saveDatasetInfoIntoRepo(rawDatasetInfo);
|
await saveFileIntoRepo(
|
||||||
|
"dataset-info.json",
|
||||||
|
JSON.stringify(rawDatasetInfo, null, 2)
|
||||||
|
);
|
||||||
|
|
||||||
let errored = false;
|
let errored = false;
|
||||||
|
|
||||||
function checkRes(
|
function checkRes(
|
||||||
res: Response,
|
res: Response
|
||||||
): res is Response & { body: ReadableStream<Uint8Array> } {
|
): res is Response & { body: ReadableStream<Uint8Array> } {
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
console.error(`❌ Error downloading ${res.url}`);
|
console.error(`❌ Error downloading ${res.url}`);
|
||||||
|
@ -116,7 +116,7 @@ function checkRes(
|
||||||
|
|
||||||
await uploadToB2Bucket(
|
await uploadToB2Bucket(
|
||||||
`timestamped-metadata/${new Date().toISOString()}.json`,
|
`timestamped-metadata/${new Date().toISOString()}.json`,
|
||||||
JSON.stringify(rawDatasetInfo, null, 2),
|
JSON.stringify(rawDatasetInfo, null, 2)
|
||||||
);
|
);
|
||||||
|
|
||||||
const datasetInfo = z.object({ result: zDatasetInfo }).parse(rawDatasetInfo);
|
const datasetInfo = z.object({ result: zDatasetInfo }).parse(rawDatasetInfo);
|
||||||
|
@ -144,7 +144,7 @@ for (const resource of datasetInfo.result.resources) {
|
||||||
|
|
||||||
await writeFile(
|
await writeFile(
|
||||||
join(dir, "dataset-info.json"),
|
join(dir, "dataset-info.json"),
|
||||||
JSON.stringify(rawDatasetInfo, null, 2),
|
JSON.stringify(rawDatasetInfo, null, 2)
|
||||||
);
|
);
|
||||||
|
|
||||||
const compressed =
|
const compressed =
|
||||||
|
@ -163,6 +163,8 @@ for (const resource of datasetInfo.result.resources) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await saveFileIntoRepo("index.md", await generateMarkdown());
|
||||||
|
|
||||||
if (errored) {
|
if (errored) {
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/client-s3": "^3.637.0",
|
"@aws-sdk/client-s3": "^3.637.0",
|
||||||
"@aws-sdk/lib-storage": "^3.637.0",
|
"@aws-sdk/lib-storage": "^3.637.0",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8",
|
||||||
|
"sepa-index-gen": "workspace:*"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,6 +126,7 @@ async function importSucursales(
|
||||||
}
|
}
|
||||||
|
|
||||||
async function importDataset(dir: string) {
|
async function importDataset(dir: string) {
|
||||||
|
console.log(dir);
|
||||||
const date = basename(dir).match(/(\d{4}-\d{2}-\d{2})/)![1];
|
const date = basename(dir).match(/(\d{4}-\d{2}-\d{2})/)![1];
|
||||||
// TODO: parsear "Ultima actualizacion" al final del CSV y insertarlo en la tabla datasets
|
// TODO: parsear "Ultima actualizacion" al final del CSV y insertarlo en la tabla datasets
|
||||||
|
|
||||||
|
@ -246,13 +247,12 @@ async function importDataset(dir: string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const pQueue = new PQueue({ concurrency: 4 });
|
const pQueue = new PQueue({ concurrency: 2 });
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const glob = new Glob("**/productos.csv");
|
const glob = new Glob("**/productos.csv");
|
||||||
for await (const file of glob.scan(process.argv[2])) {
|
for await (const file of glob.scan(process.argv[2])) {
|
||||||
const dir = join(process.argv[2], dirname(file));
|
const dir = join(process.argv[2], dirname(file));
|
||||||
console.log(dir);
|
|
||||||
pQueue.add(() => importDataset(dir));
|
pQueue.add(() => importDataset(dir));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
Loading…
Reference in a new issue