Compare commits

..

21 commits

Author SHA1 Message Date
6b75cea27a guardar script util para diff 2023-12-29 12:37:14 -03:00
1e85db9443 permitir reintentar descargar después de que esté generado el dump 2023-12-29 12:27:16 -03:00
20c3deb18e borrar json duplicado
esto hace que se corrompa el dump
2023-12-29 12:25:44 -03:00
1f07c48cac mejorar lógica de ignorar tls 2023-12-29 12:23:34 -03:00
8c06f68484 agregar parches para sitios 2023-12-29 10:01:59 -03:00
6c3776ffd2 solo usar limites locales 2023-12-19 12:01:55 -03:00
5406113dbb refactor get json 2023-12-19 00:55:47 -03:00
eb18951686 reintentar cada más tiempo en ratelimit 2023-12-18 14:12:25 -03:00
5aca382027 usar un throttle para cdn.buenosaires.gob.ar 2023-12-18 13:44:28 -03:00
fb7dea8b7d mostrar reintentos y agregar random al wait de throttle 2023-12-18 13:44:28 -03:00
cf46317d7b reactivar keepalive y aumentar timeout connect 2023-12-18 12:46:34 -03:00
8708f13838 chore: limpiar imports 2023-12-18 12:34:41 -03:00
91035fa2b0 upgrade undici 2023-12-18 12:34:31 -03:00
64b1c8550d fix: arreglar uso de promises 2023-12-18 12:17:31 -03:00
697eb887cf refactor: network 2023-12-18 12:13:09 -03:00
8d401e6ca3 Revert "usar fetch"
This reverts commit 31b58d373c.
2023-12-16 11:27:51 -03:00
31b58d373c usar fetch
experimento
2023-12-16 11:26:55 -03:00
2f55749bcb usar api mas estándar para errorFile
funciona en bun
2023-12-16 11:25:46 -03:00
59db305e74 WIP: estandarizar pedidos http 2023-12-16 11:15:27 -03:00
5880f9f289 pami via https 2023-12-15 17:58:50 -03:00
3cb8fb16ff borrar arsat duplicado 2023-12-15 17:58:46 -03:00
8 changed files with 300 additions and 178 deletions

View file

@ -30,14 +30,6 @@ export const zData = z.object({
}); });
/** @typedef {z.infer<typeof zData>} Data */ /** @typedef {z.infer<typeof zData>} Data */
export const zError = z.object({
url: z.string().optional(),
datasetIdentifier: z.string(),
distributionIdentifier: z.string(),
kind: z.enum(["generic_error", "http_error", "infinite_redirect"]),
error: z.string().optional(),
});
export const zDumpMetadata = z.object({ export const zDumpMetadata = z.object({
sites: z.array( sites: z.array(
z.object({ z.object({
@ -49,3 +41,23 @@ export const zDumpMetadata = z.object({
), ),
}); });
/** @typedef {z.infer<typeof zDumpMetadata>} DumpMetadata */ /** @typedef {z.infer<typeof zDumpMetadata>} DumpMetadata */
const zDumpErrorAlways = {
url: z.string().optional(),
datasetIdentifier: z.string(),
distributionIdentifier: z.string(),
};
export const zDumpError = z.discriminatedUnion("kind", [
z.object({
...zDumpErrorAlways,
kind: z.literal("http_error"),
status_code: z.number(),
}),
z.object({ ...zDumpErrorAlways, kind: z.literal("infinite_redirect") }),
z.object({
...zDumpErrorAlways,
kind: z.literal("generic_error"),
error: z.string(),
}),
]);
/** @typedef {z.infer<typeof zDumpError>} DumpError */

View file

@ -1,7 +1,7 @@
import { request } from "undici";
import z from "zod"; import z from "zod";
import { userAgent } from "./config.js"; import pMap from "p-map";
import { basename } from "path"; import { basename } from "path";
import { customRequest } from "./network.js";
const zCkanPackageList = z.object({ const zCkanPackageList = z.object({
success: z.literal(true), success: z.literal(true),
@ -12,11 +12,7 @@ const zCkanPackageList = z.object({
* @param {string} url * @param {string} url
*/ */
async function getJson(url) { async function getJson(url) {
const res = await request(url, { const res = await customRequest(new URL(url));
headers: {
"User-Agent": userAgent,
},
});
const json = await res.body.json(); const json = await res.body.json();
return json; return json;
} }
@ -119,9 +115,9 @@ async function getCkanInfo(ckanUrl) {
export async function generateDataJsonFromCkan(ckanUrl) { export async function generateDataJsonFromCkan(ckanUrl) {
const list = await getCkanPackageList(ckanUrl); const list = await getCkanPackageList(ckanUrl);
const info = await getCkanInfo(ckanUrl); const info = await getCkanInfo(ckanUrl);
const packages = await Promise.all( const packages = await pMap(list, (link) => getCkanPackage(ckanUrl, link), {
list.map((n) => getCkanPackage(ckanUrl, n)) concurrency: 12,
); });
/** @type {import("common/schema.js").Data & { generatedBy: string }} */ /** @type {import("common/schema.js").Data & { generatedBy: string }} */
const data = { const data = {
generatedBy: generatedBy:

View file

@ -4,7 +4,6 @@ export const targetsPorDefecto = [
"datajson+https://datos.magyp.gob.ar/data.json", "datajson+https://datos.magyp.gob.ar/data.json",
"datajson+https://datos.acumar.gov.ar/data.json", "datajson+https://datos.acumar.gov.ar/data.json",
"datajson+https://datasets.datos.mincyt.gob.ar/data.json", "datajson+https://datasets.datos.mincyt.gob.ar/data.json",
"datajson+https://datos.arsat.com.ar/data.json",
"datajson+https://datos.cultura.gob.ar/data.json", "datajson+https://datos.cultura.gob.ar/data.json",
"datajson+https://datos.mininterior.gob.ar/data.json", "datajson+https://datos.mininterior.gob.ar/data.json",
"datajson+https://datos.produccion.gob.ar/data.json", "datajson+https://datos.produccion.gob.ar/data.json",
@ -36,14 +35,13 @@ export const targetsPorDefecto = [
// "datajson+http://andino.siu.edu.ar/data.json", // "datajson+http://andino.siu.edu.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/educacion/data.json", "datajson+https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/inti/data.json", "datajson+https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public", "datajson+https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
"datajson+https://transparencia.enargas.gob.ar/data.json", "datajson+https://transparencia.enargas.gob.ar/data.json",
"datajson+https://infra.datos.gob.ar/catalog/sspm/data.json", "datajson+https://infra.datos.gob.ar/catalog/sspm/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json", "datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/siep/data.json", "datajson+https://monitoreo.datos.gob.ar/catalog/siep/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/exterior/data.json", "datajson+https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
"datajson+http://datos.pami.org.ar/data.json", "datajson+https://datos.pami.org.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json", "datajson+https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
"datajson+https://datos.yvera.gob.ar/data.json", "datajson+https://datos.yvera.gob.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json", "datajson+https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",

View file

@ -1,33 +1,20 @@
import { mkdir, open, writeFile } from "node:fs/promises"; import { mkdir, readFile, writeFile } from "node:fs/promises";
import { Agent, fetch, request, setGlobalDispatcher } from "undici";
import { join, normalize } from "node:path"; import { join, normalize } from "node:path";
import pLimit from "p-limit"; import { targetsPorDefecto } from "./config.js";
import { targetsPorDefecto, userAgent } from "./config.js";
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js"; import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
import { zData } from "common/schema.js"; import { zData, zDumpError } from "common/schema.js";
import {
StatusCodeError,
TooManyRedirectsError,
customRequestWithRetries,
} from "./network.js";
import { createWriteStream } from "node:fs";
import pMap from "p-map";
setGlobalDispatcher(
new Agent({
pipelining: 0,
}),
);
/** key es host
* @type {Map<string, import("p-limit").LimitFunction>} */
const limiters = new Map();
const nThreads = process.env.N_THREADS ? parseInt(process.env.N_THREADS) : 8;
class StatusCodeError extends Error {
/**
* @param {number} code
*/
constructor(code) {
super(`Status code: ${code}`);
this.code = code;
}
}
class TooManyRedirectsError extends Error {}
let urls = process.argv.slice(2); let urls = process.argv.slice(2);
if (process.argv[2] === "retry") {
urls = process.argv.slice(3);
}
if (urls.length < 1) { if (urls.length < 1) {
urls = targetsPorDefecto; urls = targetsPorDefecto;
} }
@ -41,64 +28,69 @@ const targets = urls.map((url) => {
return { type: "ckan", url: url.slice("ckan+".length) }; return { type: "ckan", url: url.slice("ckan+".length) };
} else return { type: "datajson", url }; } else return { type: "datajson", url };
}); });
const action = process.argv[2] === "retry" ? retryErrors : downloadEverything;
for (const target of targets) for (const target of targets)
downloadFromData(target).catch((error) => action(target).catch((error) =>
console.error(`${target.type}+${target.url} FALLÓ CON`, error), console.error(`${target.type}+${target.url} FALLÓ CON`, error)
); );
/** /**
* @param {Target} target * @param {Target} target
*/ */
async function downloadFromData(target) { async function downloadEverything(target) {
const outputPath = generateOutputPath(target.url); const outputPath = generateOutputPath(target.url);
let json; const json = await getDataJsonForTarget(target);
if (target.type === "ckan") {
json = await generateDataJsonFromCkan(target.url);
} else if (target.type === "datajson") {
const jsonRes = await fetch(target.url);
json = await jsonRes.json();
}
const parsed = zData.parse(json);
await mkdir(outputPath, { recursive: true }); await mkdir(outputPath, { recursive: true });
await writeFile(join(outputPath, "data.json"), JSON.stringify(json)); await writeFile(join(outputPath, "data.json"), json);
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`); await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
const errorFile = (
await open(join(outputPath, "errors.jsonl"), "w") await downloadFiles(target);
).createWriteStream(); }
/**
* @param {Target} target
*/
async function retryErrors(target) {
const outputPath = generateOutputPath(target.url);
const jsonl = await readFile(join(outputPath, "errors.jsonl"), "utf-8");
const errors = jsonl
.split("\n")
.filter((l) => l.length > 0)
.map((line) => zDumpError.parse(JSON.parse(line)));
await downloadFiles(target, (job) =>
errors.some(
(e) =>
e.datasetIdentifier === job.dataset.identifier &&
e.distributionIdentifier === job.dist.identifier
)
);
}
/**
* @param {Target} target
* @param {(job: DownloadJob) => boolean=} filterJobs
*/
async function downloadFiles(target, filterJobs) {
const outputPath = generateOutputPath(target.url);
const json = await readFile(join(outputPath, "data.json"), "utf-8");
const parsed = zData.parse(JSON.parse(json));
let nFinished = 0;
let nErrors = 0;
/** @param {ReturnType<typeof encodeError>} err */
const onError = (err) => {
errorFile.write(JSON.stringify(err) + "\n");
nErrors++;
};
const errorFile = createWriteStream(join(outputPath, "errors.jsonl"), {
flags: "w",
});
try { try {
let nFinished = 0; let jobs = jobsFromDataset(parsed.dataset, onError, outputPath);
let nErrors = 0; if (filterJobs) jobs = jobs.filter(filterJobs);
/** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution
.filter(
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
(dist) => {
try {
if (!dist.downloadURL) {
throw new Error("No downloadURL in distribution");
}
patchUrl(new URL(dist.downloadURL));
return true;
} catch (error) {
errorFile.write(
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n",
);
nErrors++;
return false;
}
},
)
.map((dist) => ({
dataset,
dist,
url: patchUrl(new URL(dist.downloadURL)),
outputPath,
attempts: 0,
})),
);
const totalJobs = jobs.length; const totalJobs = jobs.length;
// por las dudas verificar que no hayan archivos duplicados // por las dudas verificar que no hayan archivos duplicados
@ -106,31 +98,27 @@ async function downloadFromData(target) {
shuffleArray(jobs); shuffleArray(jobs);
const promises = jobs.map((job) => { const promise = pMap(
let limit = limiters.get(job.url.host); jobs,
if (!limit) { async (job) => {
limit = pLimit(nThreads);
limiters.set(job.url.host, limit);
}
return limit(async () => {
try { try {
await downloadDistWithRetries(job); return await downloadDistWithRetries(job);
} catch (error) { } catch (error) {
errorFile.write(JSON.stringify(encodeError(job, error)) + "\n"); onError(encodeError(job, error));
nErrors++;
} finally { } finally {
nFinished++; nFinished++;
} }
}); },
}); { concurrency: 32 }
);
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`); process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
const interval = setInterval(() => { const interval = setInterval(() => {
process.stderr.write( process.stderr.write(
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`, `info[${outputPath}]: ${nFinished}/${totalJobs} done\n`
); );
}, 30000); }, 30000);
await Promise.all(promises); await promise;
clearInterval(interval); clearInterval(interval);
if (nErrors > 0) if (nErrors > 0)
console.error(`${outputPath}: Finished with ${nErrors} errors`); console.error(`${outputPath}: Finished with ${nErrors} errors`);
@ -139,6 +127,53 @@ async function downloadFromData(target) {
} }
} }
/**
* @param {import("common/schema.js").Dataset[]} datasets
* @param {(err: ReturnType<typeof encodeError>) => void} onError
* @param {string} outputPath
* @returns {DownloadJob[]}
*/
function jobsFromDataset(datasets, onError, outputPath) {
return datasets.flatMap((dataset) =>
dataset.distribution
.filter(
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
(dist) => {
try {
if (!dist.downloadURL) {
throw new Error("No downloadURL in distribution");
}
patchUrl(new URL(dist.downloadURL));
return true;
} catch (error) {
onError(encodeError({ dataset, dist }, error));
return false;
}
}
)
.map((dist) => ({
dataset,
dist,
url: patchUrl(new URL(dist.downloadURL)),
outputPath,
attempts: 0,
}))
);
}
/**
* @param {Target} target
* @returns {Promise<string>}
*/
async function getDataJsonForTarget(target) {
if (target.type === "ckan") {
return JSON.stringify(await generateDataJsonFromCkan(target.url));
} else if (target.type === "datajson") {
const jsonRes = await customRequestWithRetries(new URL(target.url));
return await jsonRes.body.text();
} else throw new Error("?????????????");
}
/** /**
* @param {string} jsonUrlString * @param {string} jsonUrlString
*/ */
@ -150,67 +185,19 @@ export function generateOutputPath(jsonUrlString) {
/** /**
* @argument {DownloadJob} job * @argument {DownloadJob} job
* @argument {number} attempts
* @returns {Promise<void>}
*/ */
async function downloadDistWithRetries(job, attempts = 0) { async function downloadDistWithRetries({ dist, dataset, url, outputPath }) {
const { url } = job; const res = await customRequestWithRetries(url);
try {
await downloadDist(job);
} catch (error) {
// algunos servidores usan 403 como coso para decir "calmate"
// intentar hasta 15 veces con 15 segundos de por medio
if (
error instanceof StatusCodeError &&
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar")) &&
attempts < 15
) {
await wait(15000);
return await downloadDistWithRetries(job, attempts + 1);
}
// si no fue un error de http, reintentar hasta 3 veces con 5 segundos de por medio
else if (
!(error instanceof StatusCodeError) &&
!(error instanceof TooManyRedirectsError) &&
attempts < 3
) {
await wait(5000 + Math.random() * 10000);
return await downloadDistWithRetries(job, attempts + 1);
} else throw error;
}
}
/**
* @argument {DownloadJob} job
*/
async function downloadDist({ dist, dataset, url, outputPath }) {
// sharepoint no le gusta compartir a bots lol
const spoofUserAgent = url.host.endsWith("sharepoint.com");
const res = await request(url.toString(), {
maxRedirections: 20,
headers: {
"User-Agent": spoofUserAgent
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
: userAgent,
},
});
if (res.statusCode >= 300 && res.statusCode <= 399)
throw new TooManyRedirectsError();
if (res.statusCode < 200 || res.statusCode > 299) {
throw new StatusCodeError(res.statusCode);
}
const fileDirPath = join( const fileDirPath = join(
outputPath, outputPath,
sanitizeSuffix(dataset.identifier), sanitizeSuffix(dataset.identifier),
sanitizeSuffix(dist.identifier), sanitizeSuffix(dist.identifier)
); );
await mkdir(fileDirPath, { recursive: true }); await mkdir(fileDirPath, { recursive: true });
const filePath = join( const filePath = join(
fileDirPath, fileDirPath,
sanitizeSuffix(dist.fileName || dist.identifier), sanitizeSuffix(dist.fileName || dist.identifier)
); );
if (!res.body) throw new Error("no body"); if (!res.body) throw new Error("no body");
@ -240,11 +227,11 @@ function sanitizeSuffix(path) {
*/ */
function chequearIdsDuplicados(jobs, id) { function chequearIdsDuplicados(jobs, id) {
const duplicated = hasDuplicates( const duplicated = hasDuplicates(
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`), jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`)
); );
if (duplicated) { if (duplicated) {
console.error( console.error(
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`, `ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`
); );
} }
} }
@ -254,14 +241,10 @@ function hasDuplicates(array) {
return new Set(array).size !== array.length; return new Set(array).size !== array.length;
} }
/** @argument {number} ms */
function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/** /**
* @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job * @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
* @param {any} error * @param {any} error
* @returns {import("common/schema.js").DumpError}
*/ */
function encodeError(job, error) { function encodeError(job, error) {
const always = { const always = {

113
downloader/network.js Normal file
View file

@ -0,0 +1,113 @@
import { Dispatcher, request, Agent } from "undici";
import { userAgent } from "./config.js";
const dispatcher = new Agent({
connect: { timeout: 60 * 1000 },
bodyTimeout: 15 * 60 * 1000,
maxRedirections: 20,
});
const ignoreTlsDispatcher = new Agent({
connect: {
timeout: 60 * 1000,
rejectUnauthorized: false,
checkServerIdentity() {
return undefined;
},
},
bodyTimeout: 15 * 60 * 1000,
maxRedirections: 20,
});
export class StatusCodeError extends Error {
/**
* @param {number} code
*/
constructor(code) {
super(`Status code: ${code}`);
this.code = code;
}
}
export class TooManyRedirectsError extends Error {}
const REPORT_RETRIES = process.env.REPORT_RETRIES === "true" || false;
/**
* @argument {URL} url
* @argument {number} attempts
* @returns {Promise<Dispatcher.ResponseData>}
*/
export async function customRequestWithRetries(url, attempts = 0) {
try {
return await customRequest(url);
} catch (error) {
// algunos servidores usan 403 como coso para decir "calmate"
// intentar hasta 15 veces con 15 segundos de por medio
if (
error instanceof StatusCodeError &&
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar") ||
(error.code === 502 && url.host === "datos.jus.gob.ar")) &&
attempts < 15
) {
if (REPORT_RETRIES)
console.debug(`reintentando(status)[${attempts}] ${url.toString()}`);
await wait(1000 * (attempts + 1) ** 2 + Math.random() * 10000);
return await customRequestWithRetries(url, attempts + 1);
}
// si no fue un error de http, reintentar hasta 3 veces con ~10 segundos de por medio
else if (
!(error instanceof StatusCodeError) &&
!(error instanceof TooManyRedirectsError) &&
attempts < 7
) {
if (REPORT_RETRIES)
console.debug(`reintentando[${attempts}] ${url.toString()}`);
await wait(5000 + Math.random() * 10000);
return await customRequestWithRetries(url, attempts + 1);
} else throw error;
}
}
/** @argument {number} ms */
function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* genera los headers para hacer un pedido dependiendo de la url
* @param {URL} url
*/
function getHeaders(url) {
// sharepoint no le gusta compartir a bots lol
const spoofUserAgent = url.host.endsWith("sharepoint.com");
return {
"User-Agent": spoofUserAgent
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
: userAgent,
};
}
/**
* @param {URL} url
*/
export async function customRequest(url) {
let d = dispatcher;
if (
url.hostname === "www.energia.gob.ar" ||
url.hostname === "datos.agroindustria.gob.ar" ||
url.hostname === "www.agroindustria.gob.ar"
) {
d = ignoreTlsDispatcher;
}
const res = await request(url.toString(), {
headers: getHeaders(url),
dispatcher,
});
if (res.statusCode >= 300 && res.statusCode <= 399)
throw new TooManyRedirectsError();
if (res.statusCode < 200 || res.statusCode > 299)
throw new StatusCodeError(res.statusCode);
return res;
}

View file

@ -13,7 +13,9 @@
"dependencies": { "dependencies": {
"common": "workspace:", "common": "workspace:",
"p-limit": "^5.0.0", "p-limit": "^5.0.0",
"undici": "^5.28.0", "p-map": "^7.0.0",
"p-throttle": "^6.1.0",
"undici": "^6.0.1",
"zod": "^3.22.4" "zod": "^3.22.4"
}, },
"devDependencies": { "devDependencies": {

View file

@ -27,6 +27,8 @@
<ul <ul
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700" class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
> >
<!-- para ver diferencias entre dumps descomprimidos (fish shell): diff -u (find data-2023-12-09 -printf '%P\n' | sort | psub) (find data -printf '%P\n' | sort | psub)|less -->
<!-- nPortales: find . -maxdepth 1 -mindepth 1 -type d | wc -l --> <!-- nPortales: find . -maxdepth 1 -mindepth 1 -type d | wc -l -->
<!-- nDatasets: jq '.dataset | length' */data.json | awk '{s+=$1} END {print s}' --> <!-- nDatasets: jq '.dataset | length' */data.json | awk '{s+=$1} END {print s}' -->
<!-- size: du -sh --> <!-- size: du -sh -->

View file

@ -20,9 +20,15 @@ importers:
p-limit: p-limit:
specifier: ^5.0.0 specifier: ^5.0.0
version: 5.0.0 version: 5.0.0
p-map:
specifier: ^7.0.0
version: 7.0.0
p-throttle:
specifier: ^6.1.0
version: 6.1.0
undici: undici:
specifier: ^5.28.0 specifier: ^6.0.1
version: 5.28.2 version: 6.0.1
zod: zod:
specifier: ^3.22.4 specifier: ^3.22.4
version: 3.22.4 version: 3.22.4
@ -57,7 +63,7 @@ importers:
devDependencies: devDependencies:
'@poppanator/sveltekit-svg': '@poppanator/sveltekit-svg':
specifier: ^4.1.3 specifier: ^4.1.3
version: 4.1.3(svelte@4.2.8)(svgo@3.0.5)(vite@5.0.7) version: 4.1.3(svelte@4.2.8)(svgo@3.1.0)(vite@5.0.7)
'@sveltejs/vite-plugin-svelte': '@sveltejs/vite-plugin-svelte':
specifier: ^3.0.0 specifier: ^3.0.0
version: 3.0.1(svelte@4.2.8)(vite@5.0.7) version: 3.0.1(svelte@4.2.8)(vite@5.0.7)
@ -373,7 +379,7 @@ packages:
fastq: 1.15.0 fastq: 1.15.0
dev: true dev: true
/@poppanator/sveltekit-svg@4.1.3(svelte@4.2.8)(svgo@3.0.5)(vite@5.0.7): /@poppanator/sveltekit-svg@4.1.3(svelte@4.2.8)(svgo@3.1.0)(vite@5.0.7):
resolution: {integrity: sha512-cKdFxFPPzS470xy2XFQ2m/URa9On4fw7n5wvBqAwVO4sY8dmski+2N3GKFELt4tvzM3JPjAqz76Ex7U5IpKeIg==} resolution: {integrity: sha512-cKdFxFPPzS470xy2XFQ2m/URa9On4fw7n5wvBqAwVO4sY8dmski+2N3GKFELt4tvzM3JPjAqz76Ex7U5IpKeIg==}
peerDependencies: peerDependencies:
svelte: '>=4.x' svelte: '>=4.x'
@ -381,7 +387,7 @@ packages:
vite: '>=4.x' vite: '>=4.x'
dependencies: dependencies:
svelte: 4.2.8 svelte: 4.2.8
svgo: 3.0.5 svgo: 3.1.0
vite: 5.0.7 vite: 5.0.7
dev: true dev: true
@ -1203,6 +1209,16 @@ packages:
yocto-queue: 1.0.0 yocto-queue: 1.0.0
dev: false dev: false
/p-map@7.0.0:
resolution: {integrity: sha512-EZl03dLKv3RypkrjlevZoNwQMSy4bAblWcR18zhonktnN4fUs3asFQKSe0awn982omGxamvbejqQKQYDJYHCEg==}
engines: {node: '>=18'}
dev: false
/p-throttle@6.1.0:
resolution: {integrity: sha512-eQMdGTxk2+047La67wefUtt0tEHh7D+C8Jl7QXoFCuIiNYeQ9zWs2AZiJdIAs72rSXZ06t11me2bgalRNdy3SQ==}
engines: {node: '>=18'}
dev: false
/parent-module@1.0.1: /parent-module@1.0.1:
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
engines: {node: '>=6'} engines: {node: '>=6'}
@ -1626,8 +1642,8 @@ packages:
periscopic: 3.1.0 periscopic: 3.1.0
dev: true dev: true
/svgo@3.0.5: /svgo@3.1.0:
resolution: {integrity: sha512-HQKHEo73pMNOlDlBcLgZRcHW2+1wo7bFYayAXkGN0l/2+h68KjlfZyMRhdhaGvoHV2eApOovl12zoFz42sT6rQ==} resolution: {integrity: sha512-R5SnNA89w1dYgNv570591F66v34b3eQShpIBcQtZtM5trJwm1VvxbIoMpRYY3ybTAutcKTLEmTsdnaknOHbiQA==}
engines: {node: '>=14.0.0'} engines: {node: '>=14.0.0'}
hasBin: true hasBin: true
dependencies: dependencies:
@ -1709,9 +1725,9 @@ packages:
resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==}
dev: true dev: true
/undici@5.28.2: /undici@6.0.1:
resolution: {integrity: sha512-wh1pHJHnUeQV5Xa8/kyQhO7WFa8M34l026L5P/+2TYiakvGy5Rdc8jWZVyG7ieht/0WgJLEd3kcU5gKx+6GC8w==} resolution: {integrity: sha512-eZFYQLeS9BiXpsU0cuFhCwfeda2MnC48EVmmOz/eCjsTgmyTdaHdVsPSC/kwC2GtW2e0uH0HIPbadf3/bRWSxw==}
engines: {node: '>=14.0'} engines: {node: '>=18.0'}
dependencies: dependencies:
'@fastify/busboy': 2.1.0 '@fastify/busboy': 2.1.0
dev: false dev: false