mirror of
https://github.com/catdevnull/transicion-desordenada-diablo
synced 2024-11-15 02:21:39 +00:00
downloader: usar el schema global
This commit is contained in:
parent
37756fbf3c
commit
d8b2b29709
1 changed files with 23 additions and 25 deletions
|
@ -4,6 +4,7 @@ import { join, normalize } from "node:path";
|
||||||
import pLimit from "p-limit";
|
import pLimit from "p-limit";
|
||||||
import { targetsPorDefecto, userAgent } from "./config.js";
|
import { targetsPorDefecto, userAgent } from "./config.js";
|
||||||
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
|
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
|
||||||
|
import { zData } from "common/schema.js";
|
||||||
|
|
||||||
setGlobalDispatcher(
|
setGlobalDispatcher(
|
||||||
new Agent({
|
new Agent({
|
||||||
|
@ -58,10 +59,10 @@ async function downloadFromData(target) {
|
||||||
json = await jsonRes.json();
|
json = await jsonRes.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
// prettier-ignore
|
const parsed = zData.parse(json);
|
||||||
const parsed = /** @type {{ dataset: Dataset[] }} */(json)
|
|
||||||
await mkdir(outputPath, { recursive: true });
|
await mkdir(outputPath, { recursive: true });
|
||||||
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed));
|
await writeFile(join(outputPath, "data.json"), JSON.stringify(json));
|
||||||
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
|
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
|
||||||
const errorFile = (
|
const errorFile = (
|
||||||
await open(join(outputPath, "errors.jsonl"), "w")
|
await open(join(outputPath, "errors.jsonl"), "w")
|
||||||
|
@ -70,17 +71,23 @@ async function downloadFromData(target) {
|
||||||
/** @type {DownloadJob[]} */
|
/** @type {DownloadJob[]} */
|
||||||
const jobs = parsed.dataset.flatMap((dataset) =>
|
const jobs = parsed.dataset.flatMap((dataset) =>
|
||||||
dataset.distribution
|
dataset.distribution
|
||||||
.filter((dist) => {
|
.filter(
|
||||||
try {
|
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
|
||||||
patchUrl(new URL(dist.downloadURL));
|
(dist) => {
|
||||||
return true;
|
try {
|
||||||
} catch (error) {
|
if (!dist.downloadURL) {
|
||||||
errorFile.write(
|
throw new Error("No downloadURL in distribution");
|
||||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
}
|
||||||
);
|
patchUrl(new URL(dist.downloadURL));
|
||||||
return false;
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
errorFile.write(
|
||||||
|
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
||||||
|
);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
)
|
||||||
.map((dist) => ({
|
.map((dist) => ({
|
||||||
dataset,
|
dataset,
|
||||||
dist,
|
dist,
|
||||||
|
@ -210,22 +217,13 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @typedef DownloadJob
|
/** @typedef DownloadJob
|
||||||
* @prop {Dataset} dataset
|
* @prop {import("common/schema.js").Dataset} dataset
|
||||||
* @prop {Distribution} dist
|
* @prop {import("common/schema.js").Distribution} dist
|
||||||
* @prop {URL} url
|
* @prop {URL} url
|
||||||
* @prop {string} outputPath
|
* @prop {string} outputPath
|
||||||
* @prop {number} attempts
|
* @prop {number} attempts
|
||||||
* @prop {Date=} waitUntil
|
* @prop {Date=} waitUntil
|
||||||
*/
|
*/
|
||||||
/** @typedef Dataset
|
|
||||||
* @prop {string} identifier
|
|
||||||
* @prop {Distribution[]} distribution
|
|
||||||
*/
|
|
||||||
/** @typedef Distribution
|
|
||||||
* @prop {string} identifier
|
|
||||||
* @prop {string} fileName
|
|
||||||
* @prop {string} downloadURL
|
|
||||||
*/
|
|
||||||
|
|
||||||
// https://security.stackexchange.com/a/123723
|
// https://security.stackexchange.com/a/123723
|
||||||
/**
|
/**
|
||||||
|
@ -261,7 +259,7 @@ function wait(ms) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {{ dataset: Dataset, dist: Distribution, url?: URL }} job
|
* @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
|
||||||
* @param {any} error
|
* @param {any} error
|
||||||
*/
|
*/
|
||||||
function encodeError(job, error) {
|
function encodeError(job, error) {
|
||||||
|
|
Loading…
Reference in a new issue