mirror of
https://github.com/catdevnull/transicion-desordenada-diablo
synced 2024-11-26 11:26:18 +00:00
Compare commits
15 commits
d01d197bde
...
5aa96adc8d
Author | SHA1 | Date | |
---|---|---|---|
5aa96adc8d | |||
|
6e89cc0c49 | ||
d9ce50bc9c | |||
b060fc49ec | |||
d8b2b29709 | |||
37756fbf3c | |||
bbfec73b48 | |||
de45e30de9 | |||
81de080d22 | |||
11a64468e5 | |||
e9de6d00d4 | |||
6c65aa2ead | |||
840496a153 | |||
b87738ea13 | |||
99ab91c552 |
23 changed files with 416 additions and 160 deletions
|
@ -6,6 +6,6 @@ prueba
|
|||
datos.gob.ar*
|
||||
data/
|
||||
data*
|
||||
downloader/data
|
||||
downloader/data*
|
||||
|
||||
*.zip
|
|
@ -28,6 +28,7 @@ export const zData = z.object({
|
|||
homepage: z.string().optional(),
|
||||
dataset: z.array(zDataset),
|
||||
});
|
||||
/** @typedef {z.infer<typeof zData>} Data */
|
||||
|
||||
export const zError = z.object({
|
||||
url: z.string().optional(),
|
||||
|
|
|
@ -3,12 +3,16 @@
|
|||
"lib": ["es2023"],
|
||||
"module": "ES2020",
|
||||
"target": "es2022",
|
||||
"moduleResolution": "Bundler",
|
||||
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"emitDeclarationOnly": true,
|
||||
"declaration": true
|
||||
}
|
||||
"declaration": true,
|
||||
"noEmit": true,
|
||||
"allowJs": true,
|
||||
"checkJs": true
|
||||
},
|
||||
"include": ["*.js"]
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ FROM docker.io/alpine:3.18 as build
|
|||
RUN apk add --no-cache npm \
|
||||
&& npm install -g esbuild pnpm
|
||||
|
||||
COPY .. /tmp/build/
|
||||
COPY . /tmp/build/
|
||||
WORKDIR /tmp/build/downloader
|
||||
RUN pnpm install \
|
||||
&& esbuild --bundle --format=cjs --platform=node --outfile=download_json.build.js --sourcemap=inline download_json.js \
|
152
downloader/ckan_to_datajson.js
Normal file
152
downloader/ckan_to_datajson.js
Normal file
|
@ -0,0 +1,152 @@
|
|||
import { request } from "undici";
|
||||
import z from "zod";
|
||||
import { userAgent } from "./config.js";
|
||||
import { basename } from "path";
|
||||
|
||||
const zCkanPackageList = z.object({
|
||||
success: z.literal(true),
|
||||
result: z.array(z.string()),
|
||||
});
|
||||
|
||||
/**
|
||||
* @param {string} url
|
||||
*/
|
||||
async function getJson(url) {
|
||||
const res = await request(url, {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
},
|
||||
});
|
||||
const json = await res.body.json();
|
||||
return json;
|
||||
}
|
||||
|
||||
/**
|
||||
* descarga una lista de los names de los datasets
|
||||
* @param {string} ckanUrl
|
||||
* @returns {Promise<string[]>}
|
||||
*/
|
||||
async function getCkanPackageList(ckanUrl) {
|
||||
const json = await getJson(`${ckanUrl}/api/3/action/package_list`);
|
||||
return zCkanPackageList.parse(json).result;
|
||||
}
|
||||
|
||||
const zCkanOrganization = z.object({
|
||||
name: z.string(),
|
||||
title: z.string(),
|
||||
id: z.string(),
|
||||
created: z.string(),
|
||||
});
|
||||
const zCkanResource = z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string(),
|
||||
format: z.string(),
|
||||
url: z.string(),
|
||||
});
|
||||
const zCkanTag = z.object({
|
||||
id: z.string(),
|
||||
display_name: z.string(),
|
||||
name: z.string(),
|
||||
});
|
||||
const zCkanGroup = z.object({
|
||||
id: z.string(),
|
||||
display_name: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string(),
|
||||
});
|
||||
const zCkanPackage = z.object({
|
||||
license_title: z.string(),
|
||||
license_id: z.string(),
|
||||
license_url: z.string().optional(),
|
||||
maintainer: z.string(),
|
||||
maintainer_email: z.string(),
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
title: z.string(),
|
||||
metadata_created: z.string(),
|
||||
metadata_modified: z.string(),
|
||||
author: z.string(),
|
||||
author_email: z.string(),
|
||||
resources: z.array(zCkanResource),
|
||||
tags: z.array(zCkanTag),
|
||||
groups: z.array(zCkanGroup),
|
||||
organization: zCkanOrganization,
|
||||
url: z.string(),
|
||||
notes: z.string(),
|
||||
});
|
||||
const zCkanPackageShow = z.object({
|
||||
success: z.literal(true),
|
||||
result: zCkanPackage,
|
||||
});
|
||||
|
||||
/**
|
||||
* @param {string} ckanUrl
|
||||
* @param {string} packageName
|
||||
*/
|
||||
async function getCkanPackage(ckanUrl, packageName) {
|
||||
const json = await getJson(
|
||||
`${ckanUrl}/api/3/action/package_show?id=${encodeURIComponent(packageName)}`
|
||||
);
|
||||
return zCkanPackageShow.parse(json).result;
|
||||
}
|
||||
|
||||
const zCkanStatusShow = z.object({
|
||||
success: z.literal(true),
|
||||
result: z.object({
|
||||
site_url: z.string().describe("Titulo del portal. A veces vacio."),
|
||||
site_description: z
|
||||
.string()
|
||||
.describe("Descripción del portal. A veces vacio."),
|
||||
site_title: z.string(),
|
||||
error_emails_to: z.string().nullable(),
|
||||
}),
|
||||
});
|
||||
|
||||
/**
|
||||
* Consigue información general sobre el portal
|
||||
* @param {string} ckanUrl
|
||||
*/
|
||||
async function getCkanInfo(ckanUrl) {
|
||||
const json = await getJson(`${ckanUrl}/api/3/action/status_show`);
|
||||
return zCkanStatusShow.parse(json).result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Genera un data.json a partir de un CKAN que quizás no tiene un data.json oficial.
|
||||
* @param {string} ckanUrl
|
||||
*/
|
||||
export async function generateDataJsonFromCkan(ckanUrl) {
|
||||
const list = await getCkanPackageList(ckanUrl);
|
||||
const info = await getCkanInfo(ckanUrl);
|
||||
const packages = await Promise.all(
|
||||
list.map((n) => getCkanPackage(ckanUrl, n))
|
||||
);
|
||||
/** @type {import("common/schema.js").Data & { generatedBy: string }} */
|
||||
const data = {
|
||||
generatedBy:
|
||||
"archivador de datos abiertos (ckan_to_datajson) <https://github.com/catdevnull/transicion-desordenada-diablo>",
|
||||
title: info.site_title || ckanUrl,
|
||||
description: info.site_description || "",
|
||||
homepage: info.site_url || ckanUrl,
|
||||
dataset: packages.map((p) => ({
|
||||
title: p.title,
|
||||
description: p.notes,
|
||||
identifier: p.id,
|
||||
publisher: {
|
||||
name: p.maintainer,
|
||||
mbox: p.maintainer_email,
|
||||
},
|
||||
landingPage: p.url,
|
||||
distribution: p.resources.map((r) => ({
|
||||
identifier: r.id,
|
||||
title: r.name,
|
||||
description: r.description,
|
||||
fileName: basename(r.url),
|
||||
format: r.format,
|
||||
downloadURL: r.url,
|
||||
})),
|
||||
})),
|
||||
};
|
||||
return data;
|
||||
}
|
66
downloader/config.js
Normal file
66
downloader/config.js
Normal file
|
@ -0,0 +1,66 @@
|
|||
export const targetsPorDefecto = [
|
||||
"datajson+https://datos.gob.ar/data.json",
|
||||
"datajson+http://datos.energia.gob.ar/data.json",
|
||||
"datajson+https://datos.magyp.gob.ar/data.json",
|
||||
"datajson+https://datos.acumar.gov.ar/data.json",
|
||||
"datajson+https://datasets.datos.mincyt.gob.ar/data.json",
|
||||
"datajson+https://datos.arsat.com.ar/data.json",
|
||||
"datajson+https://datos.cultura.gob.ar/data.json",
|
||||
"datajson+https://datos.mininterior.gob.ar/data.json",
|
||||
"datajson+https://datos.produccion.gob.ar/data.json",
|
||||
"datajson+https://datos.salud.gob.ar/data.json",
|
||||
"datajson+https://datos.transporte.gob.ar/data.json",
|
||||
"datajson+https://ckan.ciudaddemendoza.gov.ar/data.json",
|
||||
"datajson+https://datos.santafe.gob.ar/data.json",
|
||||
"datajson+https://datosabiertos.chaco.gob.ar/data.json",
|
||||
"datajson+https://datosabiertos.mercedes.gob.ar/data.json",
|
||||
"datajson+http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
|
||||
"datajson+https://datosabiertos.desarrollosocial.gob.ar/data.json",
|
||||
"datajson+http://datos.mindef.gov.ar/data.json",
|
||||
"datajson+http://datos.legislatura.gob.ar/data.json",
|
||||
"datajson+https://portal.hcdiputados-ba.gov.ar/data.json", // Cámara de Diputados de la Provincia de Buenos Aires
|
||||
"datajson+https://datos.arsat.com.ar/data.json",
|
||||
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
|
||||
// "datajson+https://datosabiertos.enacom.gob.ar/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/otros/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/smn/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/ign/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
|
||||
// "datajson+http://andino.siu.edu.ar/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
|
||||
"datajson+https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
|
||||
"datajson+https://transparencia.enargas.gob.ar/data.json",
|
||||
"datajson+https://infra.datos.gob.ar/catalog/sspm/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/siep/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
|
||||
"datajson+http://datos.pami.org.ar/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
|
||||
"datajson+https://datos.yvera.gob.ar/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
|
||||
"datajson+https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
|
||||
|
||||
"ckan+http://datos.jus.gob.ar", // justicia nacional
|
||||
"ckan+https://datos.csjn.gov.ar", // corte suprema de justicia nacional
|
||||
"ckan+https://datos.hcdn.gob.ar", // diputados nacional
|
||||
"ckan+https://data.buenosaires.gob.ar", // CABA
|
||||
"ckan+https://datos.tsjbaires.gov.ar", // tribunal superior de justicia CABA
|
||||
];
|
||||
|
||||
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
|
||||
|
||||
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
|
||||
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
|
||||
|
||||
export const userAgent = "transicion-desordenada (https://nulo.ar)";
|
|
@ -1,64 +1,10 @@
|
|||
// @ts-check
|
||||
import { mkdir, open, writeFile } from "node:fs/promises";
|
||||
import { Agent, fetch, request, setGlobalDispatcher } from "undici";
|
||||
import { join, normalize } from "node:path";
|
||||
import pLimit from "p-limit";
|
||||
|
||||
export const sitiosPorDefecto = [
|
||||
"https://datos.gob.ar/data.json",
|
||||
"http://datos.energia.gob.ar/data.json",
|
||||
"https://datos.magyp.gob.ar/data.json",
|
||||
"https://datos.acumar.gov.ar/data.json",
|
||||
"https://datasets.datos.mincyt.gob.ar/data.json",
|
||||
"https://datos.arsat.com.ar/data.json",
|
||||
"https://datos.cultura.gob.ar/data.json",
|
||||
"https://datos.mininterior.gob.ar/data.json",
|
||||
"https://datos.produccion.gob.ar/data.json",
|
||||
"https://datos.salud.gob.ar/data.json",
|
||||
"https://datos.transporte.gob.ar/data.json",
|
||||
"https://ckan.ciudaddemendoza.gov.ar/data.json",
|
||||
"https://datos.santafe.gob.ar/data.json",
|
||||
"https://datosabiertos.chaco.gob.ar/data.json",
|
||||
"https://datosabiertos.mercedes.gob.ar/data.json",
|
||||
"http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
|
||||
"https://datosabiertos.desarrollosocial.gob.ar/data.json",
|
||||
"http://datos.mindef.gov.ar/data.json",
|
||||
|
||||
"https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
|
||||
// 'https://datosabiertos.enacom.gob.ar/data.json',
|
||||
"https://monitoreo.datos.gob.ar/catalog/otros/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/smn/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/ign/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
|
||||
// "http://andino.siu.edu.ar/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
|
||||
"https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
|
||||
"https://transparencia.enargas.gob.ar/data.json",
|
||||
"https://infra.datos.gob.ar/catalog/sspm/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/siep/data.json",
|
||||
"https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
|
||||
"http://datos.pami.org.ar/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
|
||||
"https://datos.yvera.gob.ar/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
|
||||
"https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
|
||||
];
|
||||
|
||||
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
|
||||
|
||||
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
|
||||
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
|
||||
import { targetsPorDefecto, userAgent } from "./config.js";
|
||||
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
|
||||
import { zData } from "common/schema.js";
|
||||
|
||||
setGlobalDispatcher(
|
||||
new Agent({
|
||||
|
@ -81,26 +27,43 @@ class StatusCodeError extends Error {
|
|||
}
|
||||
}
|
||||
class TooManyRedirectsError extends Error {}
|
||||
let jsonUrls = process.argv.slice(2);
|
||||
if (jsonUrls.length < 1) {
|
||||
jsonUrls = sitiosPorDefecto;
|
||||
let urls = process.argv.slice(2);
|
||||
if (urls.length < 1) {
|
||||
urls = targetsPorDefecto;
|
||||
}
|
||||
for (const url of jsonUrls)
|
||||
downloadFromData(url).catch((error) =>
|
||||
console.error(`${url} FALLÓ CON`, error)
|
||||
/** @typedef {{type: "datajson" | "ckan"; url: string;}} Target */
|
||||
|
||||
/** @type {Target[]} */
|
||||
const targets = urls.map((url) => {
|
||||
if (url.startsWith("datajson+")) {
|
||||
return { type: "datajson", url: url.slice("datajson+".length) };
|
||||
} else if (url.startsWith("ckan+")) {
|
||||
return { type: "ckan", url: url.slice("ckan+".length) };
|
||||
} else return { type: "datajson", url };
|
||||
});
|
||||
for (const target of targets)
|
||||
downloadFromData(target).catch((error) =>
|
||||
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
|
||||
);
|
||||
|
||||
/**
|
||||
* @param {string} jsonUrl
|
||||
* @param {Target} target
|
||||
*/
|
||||
async function downloadFromData(jsonUrl) {
|
||||
const outputPath = generateOutputPath(jsonUrl);
|
||||
const jsonRes = await fetch(jsonUrl);
|
||||
// prettier-ignore
|
||||
const parsed = /** @type {{ dataset: Dataset[] }} */(await jsonRes.json())
|
||||
async function downloadFromData(target) {
|
||||
const outputPath = generateOutputPath(target.url);
|
||||
let json;
|
||||
if (target.type === "ckan") {
|
||||
json = await generateDataJsonFromCkan(target.url);
|
||||
} else if (target.type === "datajson") {
|
||||
const jsonRes = await fetch(target.url);
|
||||
json = await jsonRes.json();
|
||||
}
|
||||
|
||||
const parsed = zData.parse(json);
|
||||
|
||||
await mkdir(outputPath, { recursive: true });
|
||||
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed));
|
||||
await writeFile(join(outputPath, "url.txt"), jsonUrl);
|
||||
await writeFile(join(outputPath, "data.json"), JSON.stringify(json));
|
||||
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
|
||||
const errorFile = (
|
||||
await open(join(outputPath, "errors.jsonl"), "w")
|
||||
).createWriteStream();
|
||||
|
@ -108,17 +71,23 @@ async function downloadFromData(jsonUrl) {
|
|||
/** @type {DownloadJob[]} */
|
||||
const jobs = parsed.dataset.flatMap((dataset) =>
|
||||
dataset.distribution
|
||||
.filter((dist) => {
|
||||
try {
|
||||
patchUrl(new URL(dist.downloadURL));
|
||||
return true;
|
||||
} catch (error) {
|
||||
errorFile.write(
|
||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
||||
);
|
||||
return false;
|
||||
.filter(
|
||||
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
|
||||
(dist) => {
|
||||
try {
|
||||
if (!dist.downloadURL) {
|
||||
throw new Error("No downloadURL in distribution");
|
||||
}
|
||||
patchUrl(new URL(dist.downloadURL));
|
||||
return true;
|
||||
} catch (error) {
|
||||
errorFile.write(
|
||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
.map((dist) => ({
|
||||
dataset,
|
||||
dist,
|
||||
|
@ -181,6 +150,7 @@ export function generateOutputPath(jsonUrlString) {
|
|||
/**
|
||||
* @argument {DownloadJob} job
|
||||
* @argument {number} attempts
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function downloadDistWithRetries(job, attempts = 0) {
|
||||
const { url } = job;
|
||||
|
@ -222,7 +192,7 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
|
|||
headers: {
|
||||
"User-Agent": spoofUserAgent
|
||||
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
|
||||
: "transicion-desordenada (https://nulo.ar)",
|
||||
: userAgent,
|
||||
},
|
||||
});
|
||||
if (res.statusCode >= 300 && res.statusCode <= 399)
|
||||
|
@ -247,22 +217,13 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
|
|||
}
|
||||
|
||||
/** @typedef DownloadJob
|
||||
* @prop {Dataset} dataset
|
||||
* @prop {Distribution} dist
|
||||
* @prop {import("common/schema.js").Dataset} dataset
|
||||
* @prop {import("common/schema.js").Distribution} dist
|
||||
* @prop {URL} url
|
||||
* @prop {string} outputPath
|
||||
* @prop {number} attempts
|
||||
* @prop {Date=} waitUntil
|
||||
*/
|
||||
/** @typedef Dataset
|
||||
* @prop {string} identifier
|
||||
* @prop {Distribution[]} distribution
|
||||
*/
|
||||
/** @typedef Distribution
|
||||
* @prop {string} identifier
|
||||
* @prop {string} fileName
|
||||
* @prop {string} downloadURL
|
||||
*/
|
||||
|
||||
// https://security.stackexchange.com/a/123723
|
||||
/**
|
||||
|
@ -298,7 +259,7 @@ function wait(ms) {
|
|||
}
|
||||
|
||||
/**
|
||||
* @param {{ dataset: Dataset, dist: Distribution, url?: URL }} job
|
||||
* @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
|
||||
* @param {any} error
|
||||
*/
|
||||
function encodeError(job, error) {
|
||||
|
|
|
@ -22,12 +22,15 @@ async function generateMetadata(dumpDir) {
|
|||
.map(async (file) => {
|
||||
const path = join(file.path, file.name);
|
||||
const data = await loadDataJson(path);
|
||||
const url = await readFile(join(path, "url.txt"), "utf-8");
|
||||
let url = await readFile(join(path, "url.txt"), "utf-8");
|
||||
if (url.startsWith("datajson+") || url.startsWith("ckan+"))
|
||||
url = url.slice(url.indexOf("+") + 1);
|
||||
return {
|
||||
title: data.title,
|
||||
description: data.description,
|
||||
url,
|
||||
path: file.name,
|
||||
nDatasets: data.dataset.length,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
|
|
@ -11,9 +11,10 @@
|
|||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"common": "workspace:",
|
||||
"p-limit": "^5.0.0",
|
||||
"undici": "^5.28.0",
|
||||
"common": "workspace:"
|
||||
"zod": "^3.22.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tsconfig/node20": "^20.1.2",
|
||||
|
|
|
@ -14,7 +14,7 @@ pnpm install
|
|||
|
||||
```
|
||||
# descargar portal datos.gob.ar
|
||||
pnpm run run https://datos.gob.ar/data.json
|
||||
pnpm run run datajson+https://datos.gob.ar/data.json
|
||||
# guarda en data/datos.gob.ar_data.json
|
||||
|
||||
# descargar todos los portales conocidos
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"checkJs": true,
|
||||
"noEmit": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"moduleResolution": "node16"
|
||||
},
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
"vite": "^5.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"compression-streams-polyfill": "^0.1.6",
|
||||
"eva-icons": "^1.1.3",
|
||||
"navaid": "^1.2.0",
|
||||
"regexparam": "^3.0.0",
|
||||
|
|
|
@ -25,7 +25,15 @@ async function fetchGzipped(url: string): Promise<Response> {
|
|||
res = await fetch(url.slice(0, url.length - ".gz".length));
|
||||
return res;
|
||||
}
|
||||
const ds = new DecompressionStream("gzip");
|
||||
let DecStream;
|
||||
if ("DecompressionStream" in window) DecStream = window.DecompressionStream;
|
||||
else {
|
||||
const { makeDecompressionStream } = await import(
|
||||
"compression-streams-polyfill/ponyfill"
|
||||
);
|
||||
DecStream = makeDecompressionStream(TransformStream);
|
||||
}
|
||||
const ds = new DecStream("gzip");
|
||||
const decompressedStream = res.body!.pipeThrough(ds);
|
||||
const resD = new Response(decompressedStream);
|
||||
return resD;
|
||||
|
|
|
@ -11,9 +11,9 @@
|
|||
export let params: { dumpUrl: string; portal: string; id: string };
|
||||
$: url = decodeURIComponent(params.dumpUrl) + "/" + params.portal;
|
||||
|
||||
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
|
||||
([data, errors]) => ({ data, errors }),
|
||||
);
|
||||
$: data = Promise.all([fetchData(url), fetchErrors(url)])
|
||||
.then(([data, errors]) => ({ data, errors }))
|
||||
.catch(alert);
|
||||
</script>
|
||||
|
||||
<main class="mx-auto max-w-3xl">
|
||||
|
@ -28,9 +28,9 @@
|
|||
<NotFound />
|
||||
{:else}
|
||||
<header
|
||||
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700"
|
||||
class="border-b border-b-gray-200 px-6 py-5 dark:border-b-gray-700"
|
||||
>
|
||||
<h1 class="font-bold text-3xl">{dataset.title}</h1>
|
||||
<h1 class="text-3xl font-bold">{dataset.title}</h1>
|
||||
<p class="text-xl">{dataset.description}</p>
|
||||
<!--
|
||||
lo saqué porque aún antes de que venga la motosierra estos links no funcionan...
|
||||
|
@ -54,13 +54,13 @@
|
|||
e.datasetIdentifier === dataset.identifier &&
|
||||
e.distributionIdentifier === dist.identifier,
|
||||
)}
|
||||
<li class="flex px-6 py-5 justify-between items-center">
|
||||
<li class="flex items-center justify-between px-6 py-5">
|
||||
<div>
|
||||
<h3>
|
||||
{dist.title}
|
||||
{#if dist.format}
|
||||
<span
|
||||
class="border border-current text-blue-800 dark:text-blue-400 relative inline-flex items-center text-xs font-semibold px-2 py-1 rounded-full ml-1"
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span>{dist.format}</span>
|
||||
</span>
|
||||
|
@ -85,7 +85,7 @@
|
|||
{#if !error}
|
||||
<button
|
||||
type="button"
|
||||
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none"
|
||||
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
|
||||
on:click={() => downloadFile(url, dataset.identifier, dist)}
|
||||
>Descargar</button
|
||||
>
|
||||
|
|
|
@ -17,17 +17,20 @@
|
|||
<p class="p-6">Cargando..</p>
|
||||
{:then metadata}
|
||||
<header
|
||||
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none"
|
||||
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
||||
>
|
||||
<small>
|
||||
Viendo archivo en
|
||||
<a
|
||||
class="underline text-blue-500 dark:text-blue-300"
|
||||
class="text-blue-500 underline dark:text-blue-300"
|
||||
target="_blank"
|
||||
rel="noopener"
|
||||
href={url}>{url}</a
|
||||
>
|
||||
</small>
|
||||
<h1 class="mt-2 text-3xl font-bold">
|
||||
Portales ({metadata.sites.length})
|
||||
</h1>
|
||||
</header>
|
||||
|
||||
<ul class="divide-y divide-gray-100 dark:divide-gray-700">
|
||||
|
@ -37,15 +40,15 @@
|
|||
portal: site.path,
|
||||
})}
|
||||
<li>
|
||||
<div class="flex px-6 py-5 justify-between gap-3">
|
||||
<div class="flex justify-between gap-3 px-6 py-5">
|
||||
<div class="flex flex-col">
|
||||
<h3 class="text-lg">{site.title}</h3>
|
||||
<p class="text-sm">{site.description}</p>
|
||||
</div>
|
||||
<div class="flex flex-col items-center justify-center shrink-0">
|
||||
<div class="flex shrink-0 flex-col items-center justify-center">
|
||||
<a
|
||||
href={portalLink}
|
||||
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none"
|
||||
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
|
||||
>Ver portal</a
|
||||
>
|
||||
<SourceLink href={site.url} />
|
||||
|
|
|
@ -1,41 +1,41 @@
|
|||
<script lang="ts">
|
||||
import { inject } from "regexparam";
|
||||
import { routes } from "../router";
|
||||
import Container from "../components/Container.svelte";
|
||||
import Portal from "./Home/Portal.svelte";
|
||||
</script>
|
||||
|
||||
<main class="mx-auto prose dark:prose-invert">
|
||||
<main class="prose mx-auto dark:prose-invert">
|
||||
<Container>
|
||||
<div class="py-5 px-6">
|
||||
<h1>Archivo de portales de datos abiertos</h1>
|
||||
<div class="px-6 pt-5">
|
||||
<h1>Archivo de portales de datos abiertos de Argentina</h1>
|
||||
<p>
|
||||
Esta herramienta permite ver datos en archivos de portales de datos
|
||||
abiertos de <a
|
||||
href="https://github.com/catdevnull/transicion-desordenada-diablo/"
|
||||
rel="noopener">transicion-desordenada-diablo</a
|
||||
Explorá respaldos de los datos publicados en 43 portales de datos
|
||||
abiertos.
|
||||
</p>
|
||||
<p>
|
||||
Este proyecto surge en el marco de la solicitada "Por un gobierno de
|
||||
datos abiertos" publicada el 5/12 por el grupo de usuaries de datos de
|
||||
Argentina y firmada por una serie de organizaciones de la sociedad civil
|
||||
y daterxs. <a href="https://bit.ly/CartaDatosAbiertos"
|
||||
>Leer la solicitada</a
|
||||
>
|
||||
(un mejor nombre sería genial), creada en el marco de
|
||||
<a href="https://bit.ly/CartaDatosAbiertos">un pedido hecho</a> al gobierno
|
||||
entrante el 10 de diciembre de 2023 por garantizar el mantenimiento de las
|
||||
políticas de datos públicos en Argentina.
|
||||
</p>
|
||||
|
||||
<div class="not-prose flex place-content-center">
|
||||
<a
|
||||
href={inject(routes.Dump, {
|
||||
dumpUrl: encodeURIComponent(
|
||||
"https://archivos.nulo.ar/dump-2023-12-08/",
|
||||
),
|
||||
})}
|
||||
class="flex items-center justify-center px-4 py-2 text-xl font-medium text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none text-center"
|
||||
>
|
||||
Acceder al archivo creado el 8 de diciembre de 2023
|
||||
</a>
|
||||
</div>
|
||||
<h2 class="my-4">Respaldos</h2>
|
||||
</div>
|
||||
|
||||
<div class="not-prose">
|
||||
<ul
|
||||
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
|
||||
>
|
||||
<Portal />
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="px-6 py-5">
|
||||
<p>
|
||||
Los archivos y las herramientas fueron creados por
|
||||
<a href="https://nulo.ar">Nulo</a> con ayuda de varias personas. El
|
||||
Las herramientas para descargar masivamente los archivos fueron
|
||||
desarrolladas por
|
||||
<a href="https://nulo.ar">Nulo</a> (y con ayuda de varias personas). El
|
||||
código está disponible
|
||||
<a
|
||||
href="https://github.com/catdevnull/transicion-desordenada-diablo/"
|
||||
|
|
33
frontend/src/lib/routes/Home/Portal.svelte
Normal file
33
frontend/src/lib/routes/Home/Portal.svelte
Normal file
|
@ -0,0 +1,33 @@
|
|||
<script lang="ts">
|
||||
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
|
||||
import { inject } from "regexparam";
|
||||
import { routes } from "../../router";
|
||||
</script>
|
||||
|
||||
<li>
|
||||
<a
|
||||
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
|
||||
href={inject(routes.Dump, {
|
||||
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
|
||||
})}
|
||||
>
|
||||
<div>
|
||||
<h3 class="text-lg">8 de diciembre de 2023</h3>
|
||||
<!-- <span
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span>portales</span>
|
||||
</span>
|
||||
<span
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span>datasets</span>
|
||||
</span> -->
|
||||
</div>
|
||||
<ArrowForward
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
|
||||
/>
|
||||
</a>
|
||||
</li>
|
|
@ -11,9 +11,9 @@
|
|||
export let params: { dumpUrl: string; portal: string };
|
||||
$: url = `${decodeURIComponent(params.dumpUrl)}/${params.portal}`;
|
||||
|
||||
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
|
||||
([data, errors]) => ({ data, errors }),
|
||||
);
|
||||
$: data = Promise.all([fetchData(url), fetchErrors(url)])
|
||||
.then(([data, errors]) => ({ data, errors }))
|
||||
.catch(alert);
|
||||
|
||||
function arreglarHomepageUrl(url: string): string {
|
||||
if (!url.startsWith("http://") && !url.startsWith("https://"))
|
||||
|
@ -51,29 +51,29 @@
|
|||
<p class="p-6">Cargando..</p>
|
||||
{:then { data, errors }}
|
||||
<header
|
||||
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none"
|
||||
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
||||
>
|
||||
<small>
|
||||
Viendo portal archivado de
|
||||
<a
|
||||
class="underline text-blue-500 dark:text-blue-300"
|
||||
class="text-blue-500 underline dark:text-blue-300"
|
||||
target="_blank"
|
||||
rel="noopener"
|
||||
href={url}>{url}</a
|
||||
>
|
||||
</small>
|
||||
<h1 class="font-bold text-3xl">{data.title}</h1>
|
||||
<h1 class="text-3xl font-bold">{data.title}</h1>
|
||||
<p class="text-xl">{data.description}</p>
|
||||
{#if data.homepage}
|
||||
<SourceLink href={arreglarHomepageUrl(data.homepage)} />
|
||||
{/if}
|
||||
</header>
|
||||
|
||||
<div class="w-full mx-auto px-6 py-2">
|
||||
<div class="mx-auto w-full px-6 py-2">
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Buscar..."
|
||||
class="flex w-full h-10 px-3 py-2 text-sm bg-white dark:bg-gray-800 border rounded-md border-neutral-300 dark:border-gray-700 ring-offset-background placeholder:text-neutral-500 dark:placeholder:text-gray-500 focus:border-neutral-300 dark:focus:border-gray-700 focus:outline-none focus:ring-2 focus:ring-neutral-400 dark:focus:ring-gray-600 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
class="ring-offset-background flex h-10 w-full rounded-md border border-neutral-300 bg-white px-3 py-2 text-sm placeholder:text-neutral-500 focus:border-neutral-300 focus:outline-none focus:ring-2 focus:ring-neutral-400 disabled:cursor-not-allowed disabled:opacity-50 dark:border-gray-700 dark:bg-gray-800 dark:placeholder:text-gray-500 dark:focus:border-gray-700 dark:focus:ring-gray-600"
|
||||
bind:value={query}
|
||||
/>
|
||||
</div>
|
||||
|
@ -87,7 +87,7 @@
|
|||
})}
|
||||
<li>
|
||||
<a
|
||||
class="flex px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700 justify-between"
|
||||
class="flex justify-between px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700"
|
||||
href={datasetLink}
|
||||
>
|
||||
<div>
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import './app.css'
|
||||
import App from './App.svelte'
|
||||
import "./app.css";
|
||||
import App from "./App.svelte";
|
||||
|
||||
const app = new App({
|
||||
target: document.getElementById('app'),
|
||||
})
|
||||
target: document.getElementById("app")!,
|
||||
});
|
||||
|
||||
export default app
|
||||
export default app;
|
||||
|
|
|
@ -2,7 +2,11 @@
|
|||
export default {
|
||||
content: ["./index.html", "./src/**/*.svelte"],
|
||||
theme: {
|
||||
extend: {},
|
||||
extend: {
|
||||
boxShadow: {
|
||||
glow: "0 0px 35px rgb(0 0 0 / .2)",
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: [require("@tailwindcss/typography")],
|
||||
};
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
*/
|
||||
"allowJs": true,
|
||||
"checkJs": true,
|
||||
"isolatedModules": true
|
||||
"isolatedModules": true,
|
||||
"noEmit": true
|
||||
},
|
||||
"include": [
|
||||
"src/**/*.ts",
|
||||
|
|
|
@ -23,6 +23,9 @@ importers:
|
|||
undici:
|
||||
specifier: ^5.28.0
|
||||
version: 5.28.2
|
||||
zod:
|
||||
specifier: ^3.22.4
|
||||
version: 3.22.4
|
||||
devDependencies:
|
||||
'@tsconfig/node20':
|
||||
specifier: ^20.1.2
|
||||
|
@ -33,6 +36,9 @@ importers:
|
|||
|
||||
frontend:
|
||||
dependencies:
|
||||
compression-streams-polyfill:
|
||||
specifier: ^0.1.6
|
||||
version: 0.1.6
|
||||
eva-icons:
|
||||
specifier: ^1.1.3
|
||||
version: 1.1.3
|
||||
|
@ -703,6 +709,12 @@ packages:
|
|||
engines: {node: '>= 10'}
|
||||
dev: true
|
||||
|
||||
/compression-streams-polyfill@0.1.6:
|
||||
resolution: {integrity: sha512-vYXHeCzZPfKqz/ppInffY2HkevGbV6zm5wlSgtrW0+6neApbA5qZdG48KaDEhRmbIFhLgmVUlUc+szH9NrcaBA==}
|
||||
dependencies:
|
||||
fflate: 0.8.1
|
||||
dev: false
|
||||
|
||||
/concat-map@0.0.1:
|
||||
resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
|
||||
dev: true
|
||||
|
@ -888,6 +900,10 @@ packages:
|
|||
reusify: 1.0.4
|
||||
dev: true
|
||||
|
||||
/fflate@0.8.1:
|
||||
resolution: {integrity: sha512-/exOvEuc+/iaUm105QIiOt4LpBdMTWsXxqR0HDF35vx3fmaKzw7354gTilCh5rkzEt8WYyG//ku3h3nRmd7CHQ==}
|
||||
dev: false
|
||||
|
||||
/fill-range@7.0.1:
|
||||
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
|
||||
engines: {node: '>=8'}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Transicion Desordeanada (diablo)
|
||||
# Archivador de Datos Abiertos
|
||||
|
||||
Herramientas para descargar masivamente portales de datos abiertos y generar un archivo, que luego se puede ver en una página web.
|
||||
|
||||
|
|
Loading…
Reference in a new issue