mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 14:16:19 +00:00
Compare commits
No commits in common. "fbf3cb8d88be595be168e3dd353c5dc1b174a2cc" and "076abab943c1211d05492b456732bc4c42994b0e" have entirely different histories.
fbf3cb8d88
...
076abab943
13 changed files with 20 additions and 521 deletions
2
.github/workflows/sepa-precios-archiver.yml
vendored
2
.github/workflows/sepa-precios-archiver.yml
vendored
|
@ -2,7 +2,7 @@ name: Sepa Precios Archiver
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "0 */3 * * *" # Run every 3 hours
|
- cron: "0 */12 * * *" # Run every 6 hours
|
||||||
workflow_dispatch: # Allow manual trigger
|
workflow_dispatch: # Allow manual trigger
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
BIN
sepa/bun.lockb
BIN
sepa/bun.lockb
Binary file not shown.
|
@ -4,7 +4,6 @@
|
||||||
"workspaces": [
|
"workspaces": [
|
||||||
"sepa-precios-archiver",
|
"sepa-precios-archiver",
|
||||||
"sepa-precios-importer",
|
"sepa-precios-importer",
|
||||||
"sepa-index-gen",
|
|
||||||
"ckan"
|
"ckan"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
175
sepa/sepa-index-gen/.gitignore
vendored
175
sepa/sepa-index-gen/.gitignore
vendored
|
@ -1,175 +0,0 @@
|
||||||
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
|
|
||||||
logs
|
|
||||||
_.log
|
|
||||||
npm-debug.log_
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
.pnpm-debug.log*
|
|
||||||
|
|
||||||
# Caches
|
|
||||||
|
|
||||||
.cache
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
|
|
||||||
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
|
|
||||||
pids
|
|
||||||
_.pid
|
|
||||||
_.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
|
|
||||||
node_modules/
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# Snowpack dependency directory (https://snowpack.dev/)
|
|
||||||
|
|
||||||
web_modules/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
|
|
||||||
.stylelintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variable files
|
|
||||||
|
|
||||||
.env
|
|
||||||
.env.development.local
|
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
|
|
||||||
.parcel-cache
|
|
||||||
|
|
||||||
# Next.js build output
|
|
||||||
|
|
||||||
.next
|
|
||||||
out
|
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
|
||||||
|
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
|
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
|
||||||
|
|
||||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
|
||||||
|
|
||||||
# public
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# vuepress v2.x temp and cache directory
|
|
||||||
|
|
||||||
.temp
|
|
||||||
|
|
||||||
# Docusaurus cache and generated files
|
|
||||||
|
|
||||||
.docusaurus
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# yarn v2
|
|
||||||
|
|
||||||
.yarn/cache
|
|
||||||
.yarn/unplugged
|
|
||||||
.yarn/build-state.yml
|
|
||||||
.yarn/install-state.gz
|
|
||||||
.pnp.*
|
|
||||||
|
|
||||||
# IntelliJ based IDEs
|
|
||||||
.idea
|
|
||||||
|
|
||||||
# Finder (MacOS) folder config
|
|
||||||
.DS_Store
|
|
|
@ -1,15 +0,0 @@
|
||||||
# sepa-index-gen
|
|
||||||
|
|
||||||
To install dependencies:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bun install
|
|
||||||
```
|
|
||||||
|
|
||||||
To run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bun run index.ts
|
|
||||||
```
|
|
||||||
|
|
||||||
This project was created using `bun init` in bun v1.1.26. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
|
|
|
@ -1,67 +0,0 @@
|
||||||
import {
|
|
||||||
S3Client,
|
|
||||||
HeadObjectCommand,
|
|
||||||
ListObjectsV2Command,
|
|
||||||
GetObjectCommand,
|
|
||||||
} from "@aws-sdk/client-s3";
|
|
||||||
import { Upload } from "@aws-sdk/lib-storage";
|
|
||||||
|
|
||||||
function checkEnvVariable(variableName: string) {
|
|
||||||
const value = process.env[variableName];
|
|
||||||
if (value) {
|
|
||||||
console.log(`✅ ${variableName} is set`);
|
|
||||||
return value;
|
|
||||||
} else {
|
|
||||||
console.log(`❌ ${variableName} is not set`);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
export const B2_BUCKET_NAME = checkEnvVariable("B2_BUCKET_NAME");
|
|
||||||
const B2_BUCKET_KEY_ID = checkEnvVariable("B2_BUCKET_KEY_ID");
|
|
||||||
const B2_BUCKET_KEY = checkEnvVariable("B2_BUCKET_KEY");
|
|
||||||
export const s3 = new S3Client({
|
|
||||||
endpoint: "https://s3.us-west-004.backblazeb2.com",
|
|
||||||
region: "us-west-004",
|
|
||||||
credentials: {
|
|
||||||
accessKeyId: B2_BUCKET_KEY_ID,
|
|
||||||
secretAccessKey: B2_BUCKET_KEY,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
export async function listDirectory(directoryName: string) {
|
|
||||||
const command = new ListObjectsV2Command({
|
|
||||||
Bucket: B2_BUCKET_NAME,
|
|
||||||
Prefix: directoryName ? `${directoryName}/` : "",
|
|
||||||
Delimiter: "/",
|
|
||||||
});
|
|
||||||
|
|
||||||
const response = await s3.send(command);
|
|
||||||
return response.Contents?.map((item) => item.Key ?? "") ?? [];
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function getFileContent(fileName: string) {
|
|
||||||
const fileContent = await s3.send(
|
|
||||||
new GetObjectCommand({
|
|
||||||
Bucket: B2_BUCKET_NAME,
|
|
||||||
Key: fileName,
|
|
||||||
})
|
|
||||||
);
|
|
||||||
return (await fileContent.Body?.transformToString()) ?? "";
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function checkFileExists(fileName: string): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
await s3.send(
|
|
||||||
new HeadObjectCommand({
|
|
||||||
Bucket: B2_BUCKET_NAME,
|
|
||||||
Key: fileName,
|
|
||||||
})
|
|
||||||
);
|
|
||||||
return true;
|
|
||||||
} catch (error) {
|
|
||||||
if ((error as any).name === "NotFound") {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,53 +0,0 @@
|
||||||
import { zDatasetInfo, type DatasetInfo, type Resource } from "ckan/schemas";
|
|
||||||
import { getFileContent, listDirectory } from "./b2";
|
|
||||||
|
|
||||||
import { basename } from "path";
|
|
||||||
|
|
||||||
export async function indexResources() {
|
|
||||||
const fileNames = await listDirectory("timestamped-metadata");
|
|
||||||
|
|
||||||
let datasetInfos = new Map<string, Map<string, Resource>>();
|
|
||||||
|
|
||||||
await Promise.all(
|
|
||||||
fileNames.map(async (fileName) => {
|
|
||||||
let resources = new Map<string, Resource>();
|
|
||||||
|
|
||||||
const fileContentText = await getFileContent(fileName);
|
|
||||||
const json = JSON.parse(fileContentText);
|
|
||||||
if (!json.success) {
|
|
||||||
console.log(`skipping ${fileName} because of ${json.error.message}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let datasetInfo: DatasetInfo;
|
|
||||||
try {
|
|
||||||
datasetInfo = zDatasetInfo.parse(json.result);
|
|
||||||
} catch (e) {
|
|
||||||
console.log(fileName, e);
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const resource of datasetInfo.resources) {
|
|
||||||
const id = `${resource.id}-${resource.revision_id}`;
|
|
||||||
const existingResource = resources.get(id);
|
|
||||||
if (
|
|
||||||
existingResource &&
|
|
||||||
existingResource.modified &&
|
|
||||||
existingResource.modified > (resource.modified ?? 0)
|
|
||||||
) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
resources.set(id, resource);
|
|
||||||
}
|
|
||||||
const base = basename(fileName, ".json");
|
|
||||||
datasetInfos.set(base, resources);
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
return Array.from(datasetInfos.entries()).reduce(
|
|
||||||
(acc, [fileName, resources]) => {
|
|
||||||
acc[fileName] = Array.from(resources.values());
|
|
||||||
return acc;
|
|
||||||
},
|
|
||||||
{} as Record<string, Resource[]>
|
|
||||||
);
|
|
||||||
}
|
|
|
@ -1,141 +0,0 @@
|
||||||
import { zResource, type Resource } from "ckan/schemas";
|
|
||||||
import { z } from "zod";
|
|
||||||
import { listDirectory } from "./b2";
|
|
||||||
import { isSameDay } from "date-fns";
|
|
||||||
import { indexResources } from "./index-resources";
|
|
||||||
|
|
||||||
export async function generateMarkdown() {
|
|
||||||
const resourcesIndex = await indexResources();
|
|
||||||
|
|
||||||
const datasets = z
|
|
||||||
.record(z.string(), z.array(zResource))
|
|
||||||
.parse(resourcesIndex);
|
|
||||||
const datasetsArray = Object.entries(datasets)
|
|
||||||
.map(([date, resources]) => ({
|
|
||||||
date: new Date(date),
|
|
||||||
resources,
|
|
||||||
}))
|
|
||||||
.sort((a, b) => +b.date - +a.date);
|
|
||||||
|
|
||||||
let latestResources = new Map<string, Resource>();
|
|
||||||
|
|
||||||
for (const { date, resources } of datasetsArray) {
|
|
||||||
for (const resource of resources) {
|
|
||||||
const id = `${resource.id}-revID-${resource.revision_id}`;
|
|
||||||
if (latestResources.has(id)) continue;
|
|
||||||
latestResources.set(id, resource);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function getDate(resource: Resource) {
|
|
||||||
{
|
|
||||||
const matches = resource.name.match(/precios_(\d{4})(\d{2})(\d{2})/);
|
|
||||||
if (matches) {
|
|
||||||
return new Date(
|
|
||||||
parseInt(matches[1]),
|
|
||||||
parseInt(matches[2]) - 1,
|
|
||||||
parseInt(matches[3])
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{
|
|
||||||
const matches = resource.description.match(
|
|
||||||
/(?<day>\d{2})\/(?<month>\d{2})\/(?<year>\d{4})|(?<year2>\d{4})-(?<month2>\d{2})-(?<day2>\d{2})/
|
|
||||||
);
|
|
||||||
if (matches) {
|
|
||||||
const { day, month, year, day2, month2, year2 } = matches.groups!;
|
|
||||||
return new Date(
|
|
||||||
parseInt(year || year2),
|
|
||||||
parseInt(month || month2) - 1,
|
|
||||||
parseInt(day || day2)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.debug(resource);
|
|
||||||
throw new Error(`No date found for ${resource.name}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const fileList = await listDirectory("");
|
|
||||||
|
|
||||||
const zipResources = [...latestResources.values()].filter(
|
|
||||||
(r) => r.format === "ZIP"
|
|
||||||
);
|
|
||||||
const dates = [
|
|
||||||
...new Set(
|
|
||||||
zipResources.map((r) => getDate(r).toISOString().split("T")[0]).sort()
|
|
||||||
),
|
|
||||||
];
|
|
||||||
|
|
||||||
// check if dates are missing in between min and max date
|
|
||||||
const minDate = new Date(
|
|
||||||
Math.min(...[...dates].map((d) => new Date(d).getTime()))
|
|
||||||
);
|
|
||||||
const maxDate = new Date(
|
|
||||||
Math.max(...[...dates].map((d) => new Date(d).getTime()))
|
|
||||||
);
|
|
||||||
for (let d = minDate; d <= maxDate; d.setDate(d.getDate() + 1)) {
|
|
||||||
const dateStr = d.toISOString().split("T")[0];
|
|
||||||
if (!dates.includes(dateStr)) dates.push(dateStr);
|
|
||||||
}
|
|
||||||
dates.sort();
|
|
||||||
|
|
||||||
function getWeekDay(searchIn: string) {
|
|
||||||
if (/domingo/iu.test(searchIn)) return 0;
|
|
||||||
if (/lunes/iu.test(searchIn)) return 1;
|
|
||||||
if (/martes/iu.test(searchIn)) return 2;
|
|
||||||
if (/mi[eé]rcoles/iu.test(searchIn)) return 3;
|
|
||||||
if (/jueves/iu.test(searchIn)) return 4;
|
|
||||||
if (/viernes/iu.test(searchIn)) return 5;
|
|
||||||
if (/s[aá]bado/iu.test(searchIn)) return 6;
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
function getWeekDayInResource(resource: Resource) {
|
|
||||||
return getWeekDay(resource.description) ?? getWeekDay(resource.name);
|
|
||||||
}
|
|
||||||
|
|
||||||
let markdown = `# index de archivo de datasets de precios SEPA
|
|
||||||
|
|
||||||
esto esta automáticamente generado por sepa-index-gen dentro de preciazo.`;
|
|
||||||
|
|
||||||
const formatter = Intl.DateTimeFormat("es-AR", {
|
|
||||||
year: "numeric",
|
|
||||||
month: "2-digit",
|
|
||||||
day: "2-digit",
|
|
||||||
weekday: "long",
|
|
||||||
});
|
|
||||||
const dateTimeFormatter = Intl.DateTimeFormat("es-AR", {
|
|
||||||
year: "numeric",
|
|
||||||
month: "2-digit",
|
|
||||||
day: "2-digit",
|
|
||||||
hour: "2-digit",
|
|
||||||
minute: "2-digit",
|
|
||||||
});
|
|
||||||
for (const dateStr of dates) {
|
|
||||||
const date = new Date(dateStr);
|
|
||||||
markdown += `\n* ${formatter.format(date)}:`;
|
|
||||||
const resourcesInDate = zipResources.filter((r) =>
|
|
||||||
isSameDay(getDate(r), date)
|
|
||||||
);
|
|
||||||
if (!resourcesInDate.length) {
|
|
||||||
markdown += " ❌ no tengo recursos para esta fecha";
|
|
||||||
}
|
|
||||||
for (const resource of resourcesInDate) {
|
|
||||||
const id = `${resource.id}-revID-${resource.revision_id}`;
|
|
||||||
const fileExists = fileList.find((file) => file.startsWith(id));
|
|
||||||
const link =
|
|
||||||
fileExists ??
|
|
||||||
`https://f004.backblazeb2.com/file/precios-justos-datasets/${fileExists}`;
|
|
||||||
let warnings = "";
|
|
||||||
if (
|
|
||||||
getWeekDayInResource(resource) &&
|
|
||||||
date.getDay() !== getWeekDayInResource(resource)
|
|
||||||
) {
|
|
||||||
warnings +=
|
|
||||||
"⁉️⚠️ dia de semana incorrecto, puede haberse subido incorrectamente ";
|
|
||||||
}
|
|
||||||
markdown += `\n * ${id} ${warnings} ${fileExists ? `[✅ descargar](${link})` : "❌"} (${dateTimeFormatter.format(resource.modified)})`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return markdown;
|
|
||||||
}
|
|
|
@ -1,19 +0,0 @@
|
||||||
{
|
|
||||||
"name": "sepa-index-gen",
|
|
||||||
"module": "index.ts",
|
|
||||||
"type": "module",
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/bun": "latest",
|
|
||||||
"prettier": "^3.3.3"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"typescript": "^5.0.0"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@aws-sdk/client-s3": "^3.637.0",
|
|
||||||
"@aws-sdk/lib-storage": "^3.637.0",
|
|
||||||
"ckan": "workspace:*",
|
|
||||||
"date-fns": "^3.6.0",
|
|
||||||
"zod": "^3.23.8"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,27 +0,0 @@
|
||||||
{
|
|
||||||
"compilerOptions": {
|
|
||||||
// Enable latest features
|
|
||||||
"lib": ["ESNext", "DOM"],
|
|
||||||
"target": "ESNext",
|
|
||||||
"module": "ESNext",
|
|
||||||
"moduleDetection": "force",
|
|
||||||
"jsx": "react-jsx",
|
|
||||||
"allowJs": true,
|
|
||||||
|
|
||||||
// Bundler mode
|
|
||||||
"moduleResolution": "bundler",
|
|
||||||
"allowImportingTsExtensions": true,
|
|
||||||
"verbatimModuleSyntax": true,
|
|
||||||
"noEmit": true,
|
|
||||||
|
|
||||||
// Best practices
|
|
||||||
"strict": true,
|
|
||||||
"skipLibCheck": true,
|
|
||||||
"noFallthroughCasesInSwitch": true,
|
|
||||||
|
|
||||||
// Some stricter flags (disabled by default)
|
|
||||||
"noUnusedLocals": false,
|
|
||||||
"noUnusedParameters": false,
|
|
||||||
"noPropertyAccessFromIndexSignature": false
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -5,7 +5,6 @@ import { basename, extname, join } from "path";
|
||||||
import { $, write } from "bun";
|
import { $, write } from "bun";
|
||||||
import { S3Client, HeadObjectCommand } from "@aws-sdk/client-s3";
|
import { S3Client, HeadObjectCommand } from "@aws-sdk/client-s3";
|
||||||
import { Upload } from "@aws-sdk/lib-storage";
|
import { Upload } from "@aws-sdk/lib-storage";
|
||||||
import { generateMarkdown } from "sepa-index-gen";
|
|
||||||
|
|
||||||
function checkEnvVariable(variableName: string) {
|
function checkEnvVariable(variableName: string) {
|
||||||
const value = process.env[variableName];
|
const value = process.env[variableName];
|
||||||
|
@ -39,25 +38,29 @@ async function getRawDatasetInfo() {
|
||||||
console.error(
|
console.error(
|
||||||
`❌ Error fetching dataset info`,
|
`❌ Error fetching dataset info`,
|
||||||
error,
|
error,
|
||||||
`retrying in 5min...`
|
`retrying in 5min...`,
|
||||||
);
|
);
|
||||||
await new Promise((resolve) => setTimeout(resolve, 5 * 60 * 1000));
|
await new Promise((resolve) => setTimeout(resolve, 5 * 60 * 1000));
|
||||||
return await getRawDatasetInfo();
|
return await getRawDatasetInfo();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
async function saveFileIntoRepo(fileName: string, fileContent: string) {
|
|
||||||
|
async function saveDatasetInfoIntoRepo(datasetInfo: any) {
|
||||||
const dir = await mkdtemp("/tmp/sepa-precios-archiver-metadata-repo-");
|
const dir = await mkdtemp("/tmp/sepa-precios-archiver-metadata-repo-");
|
||||||
try {
|
try {
|
||||||
await $`git clone https://catdevnull:${GITHUB_TOKEN}@github.com/catdevnull/sepa-precios-metadata.git ${dir}`;
|
await $`git clone https://catdevnull:${GITHUB_TOKEN}@github.com/catdevnull/sepa-precios-metadata.git ${dir}`;
|
||||||
await writeFile(join(dir, fileName), fileContent);
|
await writeFile(
|
||||||
|
dir + "/dataset-info.json",
|
||||||
|
JSON.stringify(datasetInfo, null, 2),
|
||||||
|
);
|
||||||
|
await $`cd ${dir} && git add dataset-info.json`;
|
||||||
await $`cd ${dir} && git config user.email "git@nulo.in" && git config user.name "github actions"`;
|
await $`cd ${dir} && git config user.email "git@nulo.in" && git config user.name "github actions"`;
|
||||||
await $`cd ${dir} && git add ${fileName}`;
|
await $`cd ${dir} && git diff --staged --quiet || git commit -m "Update dataset info"`;
|
||||||
await $`cd ${dir} && git diff --staged --quiet || git commit -m "Update ${fileName}"`;
|
|
||||||
await $`cd ${dir} && git push origin main`;
|
await $`cd ${dir} && git push origin main`;
|
||||||
} finally {
|
} finally {
|
||||||
await $`rm -rf ${dir}`;
|
await $`rm -rf ${dir}`;
|
||||||
}
|
}
|
||||||
console.log(`✅ Saved ${fileName} into repo`);
|
console.log(`✅ Saved dataset info into repo`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
|
@ -66,7 +69,7 @@ async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
new HeadObjectCommand({
|
new HeadObjectCommand({
|
||||||
Bucket: B2_BUCKET_NAME,
|
Bucket: B2_BUCKET_NAME,
|
||||||
Key: fileName,
|
Key: fileName,
|
||||||
})
|
}),
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -79,7 +82,7 @@ async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
||||||
|
|
||||||
async function uploadToB2Bucket(
|
async function uploadToB2Bucket(
|
||||||
fileName: string,
|
fileName: string,
|
||||||
fileContent: ReadableStream | Blob | string
|
fileContent: ReadableStream | Blob | string,
|
||||||
) {
|
) {
|
||||||
const upload = new Upload({
|
const upload = new Upload({
|
||||||
client: s3,
|
client: s3,
|
||||||
|
@ -95,15 +98,12 @@ async function uploadToB2Bucket(
|
||||||
|
|
||||||
const rawDatasetInfo = await getRawDatasetInfo();
|
const rawDatasetInfo = await getRawDatasetInfo();
|
||||||
|
|
||||||
await saveFileIntoRepo(
|
await saveDatasetInfoIntoRepo(rawDatasetInfo);
|
||||||
"dataset-info.json",
|
|
||||||
JSON.stringify(rawDatasetInfo, null, 2)
|
|
||||||
);
|
|
||||||
|
|
||||||
let errored = false;
|
let errored = false;
|
||||||
|
|
||||||
function checkRes(
|
function checkRes(
|
||||||
res: Response
|
res: Response,
|
||||||
): res is Response & { body: ReadableStream<Uint8Array> } {
|
): res is Response & { body: ReadableStream<Uint8Array> } {
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
console.error(`❌ Error downloading ${res.url}`);
|
console.error(`❌ Error downloading ${res.url}`);
|
||||||
|
@ -116,7 +116,7 @@ function checkRes(
|
||||||
|
|
||||||
await uploadToB2Bucket(
|
await uploadToB2Bucket(
|
||||||
`timestamped-metadata/${new Date().toISOString()}.json`,
|
`timestamped-metadata/${new Date().toISOString()}.json`,
|
||||||
JSON.stringify(rawDatasetInfo, null, 2)
|
JSON.stringify(rawDatasetInfo, null, 2),
|
||||||
);
|
);
|
||||||
|
|
||||||
const datasetInfo = z.object({ result: zDatasetInfo }).parse(rawDatasetInfo);
|
const datasetInfo = z.object({ result: zDatasetInfo }).parse(rawDatasetInfo);
|
||||||
|
@ -144,7 +144,7 @@ for (const resource of datasetInfo.result.resources) {
|
||||||
|
|
||||||
await writeFile(
|
await writeFile(
|
||||||
join(dir, "dataset-info.json"),
|
join(dir, "dataset-info.json"),
|
||||||
JSON.stringify(rawDatasetInfo, null, 2)
|
JSON.stringify(rawDatasetInfo, null, 2),
|
||||||
);
|
);
|
||||||
|
|
||||||
const compressed =
|
const compressed =
|
||||||
|
@ -163,8 +163,6 @@ for (const resource of datasetInfo.result.resources) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await saveFileIntoRepo("index.md", await generateMarkdown());
|
|
||||||
|
|
||||||
if (errored) {
|
if (errored) {
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/client-s3": "^3.637.0",
|
"@aws-sdk/client-s3": "^3.637.0",
|
||||||
"@aws-sdk/lib-storage": "^3.637.0",
|
"@aws-sdk/lib-storage": "^3.637.0",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8"
|
||||||
"sepa-index-gen": "workspace:*"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,7 +126,6 @@ async function importSucursales(
|
||||||
}
|
}
|
||||||
|
|
||||||
async function importDataset(dir: string) {
|
async function importDataset(dir: string) {
|
||||||
console.log(dir);
|
|
||||||
const date = basename(dir).match(/(\d{4}-\d{2}-\d{2})/)![1];
|
const date = basename(dir).match(/(\d{4}-\d{2}-\d{2})/)![1];
|
||||||
// TODO: parsear "Ultima actualizacion" al final del CSV y insertarlo en la tabla datasets
|
// TODO: parsear "Ultima actualizacion" al final del CSV y insertarlo en la tabla datasets
|
||||||
|
|
||||||
|
@ -247,12 +246,13 @@ async function importDataset(dir: string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const pQueue = new PQueue({ concurrency: 2 });
|
const pQueue = new PQueue({ concurrency: 4 });
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const glob = new Glob("**/productos.csv");
|
const glob = new Glob("**/productos.csv");
|
||||||
for await (const file of glob.scan(process.argv[2])) {
|
for await (const file of glob.scan(process.argv[2])) {
|
||||||
const dir = join(process.argv[2], dirname(file));
|
const dir = join(process.argv[2], dirname(file));
|
||||||
|
console.log(dir);
|
||||||
pQueue.add(() => importDataset(dir));
|
pQueue.add(() => importDataset(dir));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
Loading…
Reference in a new issue