downloader: usar el schema global

This commit is contained in:
Cat /dev/Nulo 2023-12-09 17:10:21 -03:00
parent 37756fbf3c
commit d8b2b29709

View file

@ -4,6 +4,7 @@ import { join, normalize } from "node:path";
import pLimit from "p-limit"; import pLimit from "p-limit";
import { targetsPorDefecto, userAgent } from "./config.js"; import { targetsPorDefecto, userAgent } from "./config.js";
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js"; import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
import { zData } from "common/schema.js";
setGlobalDispatcher( setGlobalDispatcher(
new Agent({ new Agent({
@ -58,10 +59,10 @@ async function downloadFromData(target) {
json = await jsonRes.json(); json = await jsonRes.json();
} }
// prettier-ignore const parsed = zData.parse(json);
const parsed = /** @type {{ dataset: Dataset[] }} */(json)
await mkdir(outputPath, { recursive: true }); await mkdir(outputPath, { recursive: true });
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed)); await writeFile(join(outputPath, "data.json"), JSON.stringify(json));
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`); await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
const errorFile = ( const errorFile = (
await open(join(outputPath, "errors.jsonl"), "w") await open(join(outputPath, "errors.jsonl"), "w")
@ -70,17 +71,23 @@ async function downloadFromData(target) {
/** @type {DownloadJob[]} */ /** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) => const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution dataset.distribution
.filter((dist) => { .filter(
try { /** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
patchUrl(new URL(dist.downloadURL)); (dist) => {
return true; try {
} catch (error) { if (!dist.downloadURL) {
errorFile.write( throw new Error("No downloadURL in distribution");
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n" }
); patchUrl(new URL(dist.downloadURL));
return false; return true;
} catch (error) {
errorFile.write(
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
);
return false;
}
} }
}) )
.map((dist) => ({ .map((dist) => ({
dataset, dataset,
dist, dist,
@ -210,22 +217,13 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
} }
/** @typedef DownloadJob /** @typedef DownloadJob
* @prop {Dataset} dataset * @prop {import("common/schema.js").Dataset} dataset
* @prop {Distribution} dist * @prop {import("common/schema.js").Distribution} dist
* @prop {URL} url * @prop {URL} url
* @prop {string} outputPath * @prop {string} outputPath
* @prop {number} attempts * @prop {number} attempts
* @prop {Date=} waitUntil * @prop {Date=} waitUntil
*/ */
/** @typedef Dataset
* @prop {string} identifier
* @prop {Distribution[]} distribution
*/
/** @typedef Distribution
* @prop {string} identifier
* @prop {string} fileName
* @prop {string} downloadURL
*/
// https://security.stackexchange.com/a/123723 // https://security.stackexchange.com/a/123723
/** /**
@ -261,7 +259,7 @@ function wait(ms) {
} }
/** /**
* @param {{ dataset: Dataset, dist: Distribution, url?: URL }} job * @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
* @param {any} error * @param {any} error
*/ */
function encodeError(job, error) { function encodeError(job, error) {