Compare commits

...

15 commits

Author SHA1 Message Date
5aa96adc8d downloader: corregir readme con nuevo formato url 2023-12-09 17:47:40 -03:00
Nulo
6e89cc0c49
Update readme.md 2023-12-09 17:46:53 -03:00
d9ce50bc9c agregar Cámara de Diputados de la Provincia de Buenos Aires 2023-12-09 17:43:13 -03:00
b060fc49ec agregar portal arsat 2023-12-09 17:39:03 -03:00
d8b2b29709 downloader: usar el schema global 2023-12-09 17:10:21 -03:00
37756fbf3c arreglar sistema de varios tipos de target 2023-12-09 17:10:10 -03:00
bbfec73b48 arreglar schema ckan api 2023-12-09 17:09:40 -03:00
de45e30de9 arreglar typescript 2023-12-09 17:02:47 -03:00
81de080d22 descargar de ckan directo inventando un data.json
por si no soporta data.json

y guardar cantidad de datasets en metadata
2023-12-09 16:53:49 -03:00
11a64468e5 mejorar home
gracias data
2023-12-09 16:51:38 -03:00
e9de6d00d4 frontend: mandar alert en error 2023-12-09 14:09:50 -03:00
6c65aa2ead frontend: decompressionstream polyfill 2023-12-09 14:05:44 -03:00
840496a153 borrar link duplicado a github 2023-12-09 13:45:11 -03:00
b87738ea13 downloader: mover containerfile a lugar mas comodo 2023-12-09 13:12:57 -03:00
99ab91c552 downloader: mejorar types 2023-12-09 13:12:43 -03:00
23 changed files with 416 additions and 160 deletions

View file

@ -6,6 +6,6 @@ prueba
datos.gob.ar* datos.gob.ar*
data/ data/
data* data*
downloader/data downloader/data*
*.zip *.zip

View file

@ -28,6 +28,7 @@ export const zData = z.object({
homepage: z.string().optional(), homepage: z.string().optional(),
dataset: z.array(zDataset), dataset: z.array(zDataset),
}); });
/** @typedef {z.infer<typeof zData>} Data */
export const zError = z.object({ export const zError = z.object({
url: z.string().optional(), url: z.string().optional(),

View file

@ -3,12 +3,16 @@
"lib": ["es2023"], "lib": ["es2023"],
"module": "ES2020", "module": "ES2020",
"target": "es2022", "target": "es2022",
"moduleResolution": "Bundler",
"strict": true, "strict": true,
"esModuleInterop": true, "esModuleInterop": true,
"skipLibCheck": true, "skipLibCheck": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"emitDeclarationOnly": true, "declaration": true,
"declaration": true "noEmit": true,
} "allowJs": true,
"checkJs": true
},
"include": ["*.js"]
} }

View file

@ -3,7 +3,7 @@ FROM docker.io/alpine:3.18 as build
RUN apk add --no-cache npm \ RUN apk add --no-cache npm \
&& npm install -g esbuild pnpm && npm install -g esbuild pnpm
COPY .. /tmp/build/ COPY . /tmp/build/
WORKDIR /tmp/build/downloader WORKDIR /tmp/build/downloader
RUN pnpm install \ RUN pnpm install \
&& esbuild --bundle --format=cjs --platform=node --outfile=download_json.build.js --sourcemap=inline download_json.js \ && esbuild --bundle --format=cjs --platform=node --outfile=download_json.build.js --sourcemap=inline download_json.js \

View file

@ -0,0 +1,152 @@
import { request } from "undici";
import z from "zod";
import { userAgent } from "./config.js";
import { basename } from "path";
const zCkanPackageList = z.object({
success: z.literal(true),
result: z.array(z.string()),
});
/**
* @param {string} url
*/
async function getJson(url) {
const res = await request(url, {
headers: {
"User-Agent": userAgent,
},
});
const json = await res.body.json();
return json;
}
/**
* descarga una lista de los names de los datasets
* @param {string} ckanUrl
* @returns {Promise<string[]>}
*/
async function getCkanPackageList(ckanUrl) {
const json = await getJson(`${ckanUrl}/api/3/action/package_list`);
return zCkanPackageList.parse(json).result;
}
const zCkanOrganization = z.object({
name: z.string(),
title: z.string(),
id: z.string(),
created: z.string(),
});
const zCkanResource = z.object({
id: z.string(),
name: z.string(),
description: z.string(),
format: z.string(),
url: z.string(),
});
const zCkanTag = z.object({
id: z.string(),
display_name: z.string(),
name: z.string(),
});
const zCkanGroup = z.object({
id: z.string(),
display_name: z.string(),
name: z.string(),
description: z.string(),
});
const zCkanPackage = z.object({
license_title: z.string(),
license_id: z.string(),
license_url: z.string().optional(),
maintainer: z.string(),
maintainer_email: z.string(),
id: z.string(),
name: z.string(),
title: z.string(),
metadata_created: z.string(),
metadata_modified: z.string(),
author: z.string(),
author_email: z.string(),
resources: z.array(zCkanResource),
tags: z.array(zCkanTag),
groups: z.array(zCkanGroup),
organization: zCkanOrganization,
url: z.string(),
notes: z.string(),
});
const zCkanPackageShow = z.object({
success: z.literal(true),
result: zCkanPackage,
});
/**
* @param {string} ckanUrl
* @param {string} packageName
*/
async function getCkanPackage(ckanUrl, packageName) {
const json = await getJson(
`${ckanUrl}/api/3/action/package_show?id=${encodeURIComponent(packageName)}`
);
return zCkanPackageShow.parse(json).result;
}
const zCkanStatusShow = z.object({
success: z.literal(true),
result: z.object({
site_url: z.string().describe("Titulo del portal. A veces vacio."),
site_description: z
.string()
.describe("Descripción del portal. A veces vacio."),
site_title: z.string(),
error_emails_to: z.string().nullable(),
}),
});
/**
* Consigue información general sobre el portal
* @param {string} ckanUrl
*/
async function getCkanInfo(ckanUrl) {
const json = await getJson(`${ckanUrl}/api/3/action/status_show`);
return zCkanStatusShow.parse(json).result;
}
/**
* Genera un data.json a partir de un CKAN que quizás no tiene un data.json oficial.
* @param {string} ckanUrl
*/
export async function generateDataJsonFromCkan(ckanUrl) {
const list = await getCkanPackageList(ckanUrl);
const info = await getCkanInfo(ckanUrl);
const packages = await Promise.all(
list.map((n) => getCkanPackage(ckanUrl, n))
);
/** @type {import("common/schema.js").Data & { generatedBy: string }} */
const data = {
generatedBy:
"archivador de datos abiertos (ckan_to_datajson) <https://github.com/catdevnull/transicion-desordenada-diablo>",
title: info.site_title || ckanUrl,
description: info.site_description || "",
homepage: info.site_url || ckanUrl,
dataset: packages.map((p) => ({
title: p.title,
description: p.notes,
identifier: p.id,
publisher: {
name: p.maintainer,
mbox: p.maintainer_email,
},
landingPage: p.url,
distribution: p.resources.map((r) => ({
identifier: r.id,
title: r.name,
description: r.description,
fileName: basename(r.url),
format: r.format,
downloadURL: r.url,
})),
})),
};
return data;
}

66
downloader/config.js Normal file
View file

@ -0,0 +1,66 @@
export const targetsPorDefecto = [
"datajson+https://datos.gob.ar/data.json",
"datajson+http://datos.energia.gob.ar/data.json",
"datajson+https://datos.magyp.gob.ar/data.json",
"datajson+https://datos.acumar.gov.ar/data.json",
"datajson+https://datasets.datos.mincyt.gob.ar/data.json",
"datajson+https://datos.arsat.com.ar/data.json",
"datajson+https://datos.cultura.gob.ar/data.json",
"datajson+https://datos.mininterior.gob.ar/data.json",
"datajson+https://datos.produccion.gob.ar/data.json",
"datajson+https://datos.salud.gob.ar/data.json",
"datajson+https://datos.transporte.gob.ar/data.json",
"datajson+https://ckan.ciudaddemendoza.gov.ar/data.json",
"datajson+https://datos.santafe.gob.ar/data.json",
"datajson+https://datosabiertos.chaco.gob.ar/data.json",
"datajson+https://datosabiertos.mercedes.gob.ar/data.json",
"datajson+http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
"datajson+https://datosabiertos.desarrollosocial.gob.ar/data.json",
"datajson+http://datos.mindef.gov.ar/data.json",
"datajson+http://datos.legislatura.gob.ar/data.json",
"datajson+https://portal.hcdiputados-ba.gov.ar/data.json", // Cámara de Diputados de la Provincia de Buenos Aires
"datajson+https://datos.arsat.com.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
// "datajson+https://datosabiertos.enacom.gob.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/otros/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/smn/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ign/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
// "datajson+http://andino.siu.edu.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
"datajson+https://transparencia.enargas.gob.ar/data.json",
"datajson+https://infra.datos.gob.ar/catalog/sspm/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/siep/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
"datajson+http://datos.pami.org.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
"datajson+https://datos.yvera.gob.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
"ckan+http://datos.jus.gob.ar", // justicia nacional
"ckan+https://datos.csjn.gov.ar", // corte suprema de justicia nacional
"ckan+https://datos.hcdn.gob.ar", // diputados nacional
"ckan+https://data.buenosaires.gob.ar", // CABA
"ckan+https://datos.tsjbaires.gov.ar", // tribunal superior de justicia CABA
];
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
export const userAgent = "transicion-desordenada (https://nulo.ar)";

View file

@ -1,64 +1,10 @@
// @ts-check
import { mkdir, open, writeFile } from "node:fs/promises"; import { mkdir, open, writeFile } from "node:fs/promises";
import { Agent, fetch, request, setGlobalDispatcher } from "undici"; import { Agent, fetch, request, setGlobalDispatcher } from "undici";
import { join, normalize } from "node:path"; import { join, normalize } from "node:path";
import pLimit from "p-limit"; import pLimit from "p-limit";
import { targetsPorDefecto, userAgent } from "./config.js";
export const sitiosPorDefecto = [ import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
"https://datos.gob.ar/data.json", import { zData } from "common/schema.js";
"http://datos.energia.gob.ar/data.json",
"https://datos.magyp.gob.ar/data.json",
"https://datos.acumar.gov.ar/data.json",
"https://datasets.datos.mincyt.gob.ar/data.json",
"https://datos.arsat.com.ar/data.json",
"https://datos.cultura.gob.ar/data.json",
"https://datos.mininterior.gob.ar/data.json",
"https://datos.produccion.gob.ar/data.json",
"https://datos.salud.gob.ar/data.json",
"https://datos.transporte.gob.ar/data.json",
"https://ckan.ciudaddemendoza.gov.ar/data.json",
"https://datos.santafe.gob.ar/data.json",
"https://datosabiertos.chaco.gob.ar/data.json",
"https://datosabiertos.mercedes.gob.ar/data.json",
"http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
"https://datosabiertos.desarrollosocial.gob.ar/data.json",
"http://datos.mindef.gov.ar/data.json",
"https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
// 'https://datosabiertos.enacom.gob.ar/data.json',
"https://monitoreo.datos.gob.ar/catalog/otros/data.json",
"https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
"https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
"https://monitoreo.datos.gob.ar/catalog/smn/data.json",
"https://monitoreo.datos.gob.ar/catalog/ign/data.json",
"https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
"https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
// "http://andino.siu.edu.ar/data.json",
"https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
"https://transparencia.enargas.gob.ar/data.json",
"https://infra.datos.gob.ar/catalog/sspm/data.json",
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"https://monitoreo.datos.gob.ar/catalog/siep/data.json",
"https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
"http://datos.pami.org.ar/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
"https://datos.yvera.gob.ar/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
];
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
setGlobalDispatcher( setGlobalDispatcher(
new Agent({ new Agent({
@ -81,26 +27,43 @@ class StatusCodeError extends Error {
} }
} }
class TooManyRedirectsError extends Error {} class TooManyRedirectsError extends Error {}
let jsonUrls = process.argv.slice(2); let urls = process.argv.slice(2);
if (jsonUrls.length < 1) { if (urls.length < 1) {
jsonUrls = sitiosPorDefecto; urls = targetsPorDefecto;
} }
for (const url of jsonUrls) /** @typedef {{type: "datajson" | "ckan"; url: string;}} Target */
downloadFromData(url).catch((error) =>
console.error(`${url} FALLÓ CON`, error) /** @type {Target[]} */
const targets = urls.map((url) => {
if (url.startsWith("datajson+")) {
return { type: "datajson", url: url.slice("datajson+".length) };
} else if (url.startsWith("ckan+")) {
return { type: "ckan", url: url.slice("ckan+".length) };
} else return { type: "datajson", url };
});
for (const target of targets)
downloadFromData(target).catch((error) =>
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
); );
/** /**
* @param {string} jsonUrl * @param {Target} target
*/ */
async function downloadFromData(jsonUrl) { async function downloadFromData(target) {
const outputPath = generateOutputPath(jsonUrl); const outputPath = generateOutputPath(target.url);
const jsonRes = await fetch(jsonUrl); let json;
// prettier-ignore if (target.type === "ckan") {
const parsed = /** @type {{ dataset: Dataset[] }} */(await jsonRes.json()) json = await generateDataJsonFromCkan(target.url);
} else if (target.type === "datajson") {
const jsonRes = await fetch(target.url);
json = await jsonRes.json();
}
const parsed = zData.parse(json);
await mkdir(outputPath, { recursive: true }); await mkdir(outputPath, { recursive: true });
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed)); await writeFile(join(outputPath, "data.json"), JSON.stringify(json));
await writeFile(join(outputPath, "url.txt"), jsonUrl); await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
const errorFile = ( const errorFile = (
await open(join(outputPath, "errors.jsonl"), "w") await open(join(outputPath, "errors.jsonl"), "w")
).createWriteStream(); ).createWriteStream();
@ -108,8 +71,13 @@ async function downloadFromData(jsonUrl) {
/** @type {DownloadJob[]} */ /** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) => const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution dataset.distribution
.filter((dist) => { .filter(
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
(dist) => {
try { try {
if (!dist.downloadURL) {
throw new Error("No downloadURL in distribution");
}
patchUrl(new URL(dist.downloadURL)); patchUrl(new URL(dist.downloadURL));
return true; return true;
} catch (error) { } catch (error) {
@ -118,7 +86,8 @@ async function downloadFromData(jsonUrl) {
); );
return false; return false;
} }
}) }
)
.map((dist) => ({ .map((dist) => ({
dataset, dataset,
dist, dist,
@ -181,6 +150,7 @@ export function generateOutputPath(jsonUrlString) {
/** /**
* @argument {DownloadJob} job * @argument {DownloadJob} job
* @argument {number} attempts * @argument {number} attempts
* @returns {Promise<void>}
*/ */
async function downloadDistWithRetries(job, attempts = 0) { async function downloadDistWithRetries(job, attempts = 0) {
const { url } = job; const { url } = job;
@ -222,7 +192,7 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
headers: { headers: {
"User-Agent": spoofUserAgent "User-Agent": spoofUserAgent
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0" ? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
: "transicion-desordenada (https://nulo.ar)", : userAgent,
}, },
}); });
if (res.statusCode >= 300 && res.statusCode <= 399) if (res.statusCode >= 300 && res.statusCode <= 399)
@ -247,22 +217,13 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
} }
/** @typedef DownloadJob /** @typedef DownloadJob
* @prop {Dataset} dataset * @prop {import("common/schema.js").Dataset} dataset
* @prop {Distribution} dist * @prop {import("common/schema.js").Distribution} dist
* @prop {URL} url * @prop {URL} url
* @prop {string} outputPath * @prop {string} outputPath
* @prop {number} attempts * @prop {number} attempts
* @prop {Date=} waitUntil * @prop {Date=} waitUntil
*/ */
/** @typedef Dataset
* @prop {string} identifier
* @prop {Distribution[]} distribution
*/
/** @typedef Distribution
* @prop {string} identifier
* @prop {string} fileName
* @prop {string} downloadURL
*/
// https://security.stackexchange.com/a/123723 // https://security.stackexchange.com/a/123723
/** /**
@ -298,7 +259,7 @@ function wait(ms) {
} }
/** /**
* @param {{ dataset: Dataset, dist: Distribution, url?: URL }} job * @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
* @param {any} error * @param {any} error
*/ */
function encodeError(job, error) { function encodeError(job, error) {

View file

@ -22,12 +22,15 @@ async function generateMetadata(dumpDir) {
.map(async (file) => { .map(async (file) => {
const path = join(file.path, file.name); const path = join(file.path, file.name);
const data = await loadDataJson(path); const data = await loadDataJson(path);
const url = await readFile(join(path, "url.txt"), "utf-8"); let url = await readFile(join(path, "url.txt"), "utf-8");
if (url.startsWith("datajson+") || url.startsWith("ckan+"))
url = url.slice(url.indexOf("+") + 1);
return { return {
title: data.title, title: data.title,
description: data.description, description: data.description,
url, url,
path: file.name, path: file.name,
nDatasets: data.dataset.length,
}; };
}) })
); );

View file

@ -11,9 +11,10 @@
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"common": "workspace:",
"p-limit": "^5.0.0", "p-limit": "^5.0.0",
"undici": "^5.28.0", "undici": "^5.28.0",
"common": "workspace:" "zod": "^3.22.4"
}, },
"devDependencies": { "devDependencies": {
"@tsconfig/node20": "^20.1.2", "@tsconfig/node20": "^20.1.2",

View file

@ -14,7 +14,7 @@ pnpm install
``` ```
# descargar portal datos.gob.ar # descargar portal datos.gob.ar
pnpm run run https://datos.gob.ar/data.json pnpm run run datajson+https://datos.gob.ar/data.json
# guarda en data/datos.gob.ar_data.json # guarda en data/datos.gob.ar_data.json
# descargar todos los portales conocidos # descargar todos los portales conocidos

View file

@ -11,6 +11,8 @@
"strict": true, "strict": true,
"esModuleInterop": true, "esModuleInterop": true,
"skipLibCheck": true, "skipLibCheck": true,
"checkJs": true,
"noEmit": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"moduleResolution": "node16" "moduleResolution": "node16"
}, },

View file

@ -28,6 +28,7 @@
"vite": "^5.0.0" "vite": "^5.0.0"
}, },
"dependencies": { "dependencies": {
"compression-streams-polyfill": "^0.1.6",
"eva-icons": "^1.1.3", "eva-icons": "^1.1.3",
"navaid": "^1.2.0", "navaid": "^1.2.0",
"regexparam": "^3.0.0", "regexparam": "^3.0.0",

View file

@ -25,7 +25,15 @@ async function fetchGzipped(url: string): Promise<Response> {
res = await fetch(url.slice(0, url.length - ".gz".length)); res = await fetch(url.slice(0, url.length - ".gz".length));
return res; return res;
} }
const ds = new DecompressionStream("gzip"); let DecStream;
if ("DecompressionStream" in window) DecStream = window.DecompressionStream;
else {
const { makeDecompressionStream } = await import(
"compression-streams-polyfill/ponyfill"
);
DecStream = makeDecompressionStream(TransformStream);
}
const ds = new DecStream("gzip");
const decompressedStream = res.body!.pipeThrough(ds); const decompressedStream = res.body!.pipeThrough(ds);
const resD = new Response(decompressedStream); const resD = new Response(decompressedStream);
return resD; return resD;

View file

@ -11,9 +11,9 @@
export let params: { dumpUrl: string; portal: string; id: string }; export let params: { dumpUrl: string; portal: string; id: string };
$: url = decodeURIComponent(params.dumpUrl) + "/" + params.portal; $: url = decodeURIComponent(params.dumpUrl) + "/" + params.portal;
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then( $: data = Promise.all([fetchData(url), fetchErrors(url)])
([data, errors]) => ({ data, errors }), .then(([data, errors]) => ({ data, errors }))
); .catch(alert);
</script> </script>
<main class="mx-auto max-w-3xl"> <main class="mx-auto max-w-3xl">
@ -28,9 +28,9 @@
<NotFound /> <NotFound />
{:else} {:else}
<header <header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700" class="border-b border-b-gray-200 px-6 py-5 dark:border-b-gray-700"
> >
<h1 class="font-bold text-3xl">{dataset.title}</h1> <h1 class="text-3xl font-bold">{dataset.title}</h1>
<p class="text-xl">{dataset.description}</p> <p class="text-xl">{dataset.description}</p>
<!-- <!--
lo saqué porque aún antes de que venga la motosierra estos links no funcionan... lo saqué porque aún antes de que venga la motosierra estos links no funcionan...
@ -54,13 +54,13 @@
e.datasetIdentifier === dataset.identifier && e.datasetIdentifier === dataset.identifier &&
e.distributionIdentifier === dist.identifier, e.distributionIdentifier === dist.identifier,
)} )}
<li class="flex px-6 py-5 justify-between items-center"> <li class="flex items-center justify-between px-6 py-5">
<div> <div>
<h3> <h3>
{dist.title} {dist.title}
{#if dist.format} {#if dist.format}
<span <span
class="border border-current text-blue-800 dark:text-blue-400 relative inline-flex items-center text-xs font-semibold px-2 py-1 rounded-full ml-1" class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
> >
<span>{dist.format}</span> <span>{dist.format}</span>
</span> </span>
@ -85,7 +85,7 @@
{#if !error} {#if !error}
<button <button
type="button" type="button"
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
on:click={() => downloadFile(url, dataset.identifier, dist)} on:click={() => downloadFile(url, dataset.identifier, dist)}
>Descargar</button >Descargar</button
> >

View file

@ -17,17 +17,20 @@
<p class="p-6">Cargando..</p> <p class="p-6">Cargando..</p>
{:then metadata} {:then metadata}
<header <header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none" class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
> >
<small> <small>
Viendo archivo en Viendo archivo en
<a <a
class="underline text-blue-500 dark:text-blue-300" class="text-blue-500 underline dark:text-blue-300"
target="_blank" target="_blank"
rel="noopener" rel="noopener"
href={url}>{url}</a href={url}>{url}</a
> >
</small> </small>
<h1 class="mt-2 text-3xl font-bold">
Portales ({metadata.sites.length})
</h1>
</header> </header>
<ul class="divide-y divide-gray-100 dark:divide-gray-700"> <ul class="divide-y divide-gray-100 dark:divide-gray-700">
@ -37,15 +40,15 @@
portal: site.path, portal: site.path,
})} })}
<li> <li>
<div class="flex px-6 py-5 justify-between gap-3"> <div class="flex justify-between gap-3 px-6 py-5">
<div class="flex flex-col"> <div class="flex flex-col">
<h3 class="text-lg">{site.title}</h3> <h3 class="text-lg">{site.title}</h3>
<p class="text-sm">{site.description}</p> <p class="text-sm">{site.description}</p>
</div> </div>
<div class="flex flex-col items-center justify-center shrink-0"> <div class="flex shrink-0 flex-col items-center justify-center">
<a <a
href={portalLink} href={portalLink}
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none" class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
>Ver portal</a >Ver portal</a
> >
<SourceLink href={site.url} /> <SourceLink href={site.url} />

View file

@ -1,41 +1,41 @@
<script lang="ts"> <script lang="ts">
import { inject } from "regexparam";
import { routes } from "../router";
import Container from "../components/Container.svelte"; import Container from "../components/Container.svelte";
import Portal from "./Home/Portal.svelte";
</script> </script>
<main class="mx-auto prose dark:prose-invert"> <main class="prose mx-auto dark:prose-invert">
<Container> <Container>
<div class="py-5 px-6"> <div class="px-6 pt-5">
<h1>Archivo de portales de datos abiertos</h1> <h1>Archivo de portales de datos abiertos de Argentina</h1>
<p> <p>
Esta herramienta permite ver datos en archivos de portales de datos Explorá respaldos de los datos publicados en 43 portales de datos
abiertos de <a abiertos.
href="https://github.com/catdevnull/transicion-desordenada-diablo/" </p>
rel="noopener">transicion-desordenada-diablo</a <p>
Este proyecto surge en el marco de la solicitada "Por un gobierno de
datos abiertos" publicada el 5/12 por el grupo de usuaries de datos de
Argentina y firmada por una serie de organizaciones de la sociedad civil
y daterxs. <a href="https://bit.ly/CartaDatosAbiertos"
>Leer la solicitada</a
> >
(un mejor nombre sería genial), creada en el marco de
<a href="https://bit.ly/CartaDatosAbiertos">un pedido hecho</a> al gobierno
entrante el 10 de diciembre de 2023 por garantizar el mantenimiento de las
políticas de datos públicos en Argentina.
</p> </p>
<div class="not-prose flex place-content-center"> <h2 class="my-4">Respaldos</h2>
<a
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent(
"https://archivos.nulo.ar/dump-2023-12-08/",
),
})}
class="flex items-center justify-center px-4 py-2 text-xl font-medium text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none text-center"
>
Acceder al archivo creado el 8 de diciembre de 2023
</a>
</div> </div>
<div class="not-prose">
<ul
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
>
<Portal />
</ul>
</div>
<div class="px-6 py-5">
<p> <p>
Los archivos y las herramientas fueron creados por Las herramientas para descargar masivamente los archivos fueron
<a href="https://nulo.ar">Nulo</a> con ayuda de varias personas. El desarrolladas por
<a href="https://nulo.ar">Nulo</a> (y con ayuda de varias personas). El
código está disponible código está disponible
<a <a
href="https://github.com/catdevnull/transicion-desordenada-diablo/" href="https://github.com/catdevnull/transicion-desordenada-diablo/"

View file

@ -0,0 +1,33 @@
<script lang="ts">
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
import { inject } from "regexparam";
import { routes } from "../../router";
</script>
<li>
<a
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
})}
>
<div>
<h3 class="text-lg">8 de diciembre de 2023</h3>
<!-- <span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>portales</span>
</span>
<span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>datasets</span>
</span> -->
</div>
<ArrowForward
fill="currentColor"
aria-hidden="true"
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
/>
</a>
</li>

View file

@ -11,9 +11,9 @@
export let params: { dumpUrl: string; portal: string }; export let params: { dumpUrl: string; portal: string };
$: url = `${decodeURIComponent(params.dumpUrl)}/${params.portal}`; $: url = `${decodeURIComponent(params.dumpUrl)}/${params.portal}`;
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then( $: data = Promise.all([fetchData(url), fetchErrors(url)])
([data, errors]) => ({ data, errors }), .then(([data, errors]) => ({ data, errors }))
); .catch(alert);
function arreglarHomepageUrl(url: string): string { function arreglarHomepageUrl(url: string): string {
if (!url.startsWith("http://") && !url.startsWith("https://")) if (!url.startsWith("http://") && !url.startsWith("https://"))
@ -51,29 +51,29 @@
<p class="p-6">Cargando..</p> <p class="p-6">Cargando..</p>
{:then { data, errors }} {:then { data, errors }}
<header <header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none" class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
> >
<small> <small>
Viendo portal archivado de Viendo portal archivado de
<a <a
class="underline text-blue-500 dark:text-blue-300" class="text-blue-500 underline dark:text-blue-300"
target="_blank" target="_blank"
rel="noopener" rel="noopener"
href={url}>{url}</a href={url}>{url}</a
> >
</small> </small>
<h1 class="font-bold text-3xl">{data.title}</h1> <h1 class="text-3xl font-bold">{data.title}</h1>
<p class="text-xl">{data.description}</p> <p class="text-xl">{data.description}</p>
{#if data.homepage} {#if data.homepage}
<SourceLink href={arreglarHomepageUrl(data.homepage)} /> <SourceLink href={arreglarHomepageUrl(data.homepage)} />
{/if} {/if}
</header> </header>
<div class="w-full mx-auto px-6 py-2"> <div class="mx-auto w-full px-6 py-2">
<input <input
type="text" type="text"
placeholder="Buscar..." placeholder="Buscar..."
class="flex w-full h-10 px-3 py-2 text-sm bg-white dark:bg-gray-800 border rounded-md border-neutral-300 dark:border-gray-700 ring-offset-background placeholder:text-neutral-500 dark:placeholder:text-gray-500 focus:border-neutral-300 dark:focus:border-gray-700 focus:outline-none focus:ring-2 focus:ring-neutral-400 dark:focus:ring-gray-600 disabled:cursor-not-allowed disabled:opacity-50" class="ring-offset-background flex h-10 w-full rounded-md border border-neutral-300 bg-white px-3 py-2 text-sm placeholder:text-neutral-500 focus:border-neutral-300 focus:outline-none focus:ring-2 focus:ring-neutral-400 disabled:cursor-not-allowed disabled:opacity-50 dark:border-gray-700 dark:bg-gray-800 dark:placeholder:text-gray-500 dark:focus:border-gray-700 dark:focus:ring-gray-600"
bind:value={query} bind:value={query}
/> />
</div> </div>
@ -87,7 +87,7 @@
})} })}
<li> <li>
<a <a
class="flex px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700 justify-between" class="flex justify-between px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700"
href={datasetLink} href={datasetLink}
> >
<div> <div>

View file

@ -1,8 +1,8 @@
import './app.css' import "./app.css";
import App from './App.svelte' import App from "./App.svelte";
const app = new App({ const app = new App({
target: document.getElementById('app'), target: document.getElementById("app")!,
}) });
export default app export default app;

View file

@ -2,7 +2,11 @@
export default { export default {
content: ["./index.html", "./src/**/*.svelte"], content: ["./index.html", "./src/**/*.svelte"],
theme: { theme: {
extend: {}, extend: {
boxShadow: {
glow: "0 0px 35px rgb(0 0 0 / .2)",
},
},
}, },
plugins: [require("@tailwindcss/typography")], plugins: [require("@tailwindcss/typography")],
}; };

View file

@ -13,7 +13,8 @@
*/ */
"allowJs": true, "allowJs": true,
"checkJs": true, "checkJs": true,
"isolatedModules": true "isolatedModules": true,
"noEmit": true
}, },
"include": [ "include": [
"src/**/*.ts", "src/**/*.ts",

View file

@ -23,6 +23,9 @@ importers:
undici: undici:
specifier: ^5.28.0 specifier: ^5.28.0
version: 5.28.2 version: 5.28.2
zod:
specifier: ^3.22.4
version: 3.22.4
devDependencies: devDependencies:
'@tsconfig/node20': '@tsconfig/node20':
specifier: ^20.1.2 specifier: ^20.1.2
@ -33,6 +36,9 @@ importers:
frontend: frontend:
dependencies: dependencies:
compression-streams-polyfill:
specifier: ^0.1.6
version: 0.1.6
eva-icons: eva-icons:
specifier: ^1.1.3 specifier: ^1.1.3
version: 1.1.3 version: 1.1.3
@ -703,6 +709,12 @@ packages:
engines: {node: '>= 10'} engines: {node: '>= 10'}
dev: true dev: true
/compression-streams-polyfill@0.1.6:
resolution: {integrity: sha512-vYXHeCzZPfKqz/ppInffY2HkevGbV6zm5wlSgtrW0+6neApbA5qZdG48KaDEhRmbIFhLgmVUlUc+szH9NrcaBA==}
dependencies:
fflate: 0.8.1
dev: false
/concat-map@0.0.1: /concat-map@0.0.1:
resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
dev: true dev: true
@ -888,6 +900,10 @@ packages:
reusify: 1.0.4 reusify: 1.0.4
dev: true dev: true
/fflate@0.8.1:
resolution: {integrity: sha512-/exOvEuc+/iaUm105QIiOt4LpBdMTWsXxqR0HDF35vx3fmaKzw7354gTilCh5rkzEt8WYyG//ku3h3nRmd7CHQ==}
dev: false
/fill-range@7.0.1: /fill-range@7.0.1:
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==} resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
engines: {node: '>=8'} engines: {node: '>=8'}

View file

@ -1,4 +1,4 @@
# Transicion Desordeanada (diablo) # Archivador de Datos Abiertos
Herramientas para descargar masivamente portales de datos abiertos y generar un archivo, que luego se puede ver en una página web. Herramientas para descargar masivamente portales de datos abiertos y generar un archivo, que luego se puede ver en una página web.