Compare commits

...

15 commits

Author SHA1 Message Date
5aa96adc8d downloader: corregir readme con nuevo formato url 2023-12-09 17:47:40 -03:00
Nulo
6e89cc0c49
Update readme.md 2023-12-09 17:46:53 -03:00
d9ce50bc9c agregar Cámara de Diputados de la Provincia de Buenos Aires 2023-12-09 17:43:13 -03:00
b060fc49ec agregar portal arsat 2023-12-09 17:39:03 -03:00
d8b2b29709 downloader: usar el schema global 2023-12-09 17:10:21 -03:00
37756fbf3c arreglar sistema de varios tipos de target 2023-12-09 17:10:10 -03:00
bbfec73b48 arreglar schema ckan api 2023-12-09 17:09:40 -03:00
de45e30de9 arreglar typescript 2023-12-09 17:02:47 -03:00
81de080d22 descargar de ckan directo inventando un data.json
por si no soporta data.json

y guardar cantidad de datasets en metadata
2023-12-09 16:53:49 -03:00
11a64468e5 mejorar home
gracias data
2023-12-09 16:51:38 -03:00
e9de6d00d4 frontend: mandar alert en error 2023-12-09 14:09:50 -03:00
6c65aa2ead frontend: decompressionstream polyfill 2023-12-09 14:05:44 -03:00
840496a153 borrar link duplicado a github 2023-12-09 13:45:11 -03:00
b87738ea13 downloader: mover containerfile a lugar mas comodo 2023-12-09 13:12:57 -03:00
99ab91c552 downloader: mejorar types 2023-12-09 13:12:43 -03:00
23 changed files with 416 additions and 160 deletions

View file

@ -6,6 +6,6 @@ prueba
datos.gob.ar*
data/
data*
downloader/data
downloader/data*
*.zip

View file

@ -28,6 +28,7 @@ export const zData = z.object({
homepage: z.string().optional(),
dataset: z.array(zDataset),
});
/** @typedef {z.infer<typeof zData>} Data */
export const zError = z.object({
url: z.string().optional(),

View file

@ -3,12 +3,16 @@
"lib": ["es2023"],
"module": "ES2020",
"target": "es2022",
"moduleResolution": "Bundler",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"emitDeclarationOnly": true,
"declaration": true
}
"declaration": true,
"noEmit": true,
"allowJs": true,
"checkJs": true
},
"include": ["*.js"]
}

View file

@ -3,7 +3,7 @@ FROM docker.io/alpine:3.18 as build
RUN apk add --no-cache npm \
&& npm install -g esbuild pnpm
COPY .. /tmp/build/
COPY . /tmp/build/
WORKDIR /tmp/build/downloader
RUN pnpm install \
&& esbuild --bundle --format=cjs --platform=node --outfile=download_json.build.js --sourcemap=inline download_json.js \

View file

@ -0,0 +1,152 @@
import { request } from "undici";
import z from "zod";
import { userAgent } from "./config.js";
import { basename } from "path";
const zCkanPackageList = z.object({
success: z.literal(true),
result: z.array(z.string()),
});
/**
* @param {string} url
*/
async function getJson(url) {
const res = await request(url, {
headers: {
"User-Agent": userAgent,
},
});
const json = await res.body.json();
return json;
}
/**
* descarga una lista de los names de los datasets
* @param {string} ckanUrl
* @returns {Promise<string[]>}
*/
async function getCkanPackageList(ckanUrl) {
const json = await getJson(`${ckanUrl}/api/3/action/package_list`);
return zCkanPackageList.parse(json).result;
}
const zCkanOrganization = z.object({
name: z.string(),
title: z.string(),
id: z.string(),
created: z.string(),
});
const zCkanResource = z.object({
id: z.string(),
name: z.string(),
description: z.string(),
format: z.string(),
url: z.string(),
});
const zCkanTag = z.object({
id: z.string(),
display_name: z.string(),
name: z.string(),
});
const zCkanGroup = z.object({
id: z.string(),
display_name: z.string(),
name: z.string(),
description: z.string(),
});
const zCkanPackage = z.object({
license_title: z.string(),
license_id: z.string(),
license_url: z.string().optional(),
maintainer: z.string(),
maintainer_email: z.string(),
id: z.string(),
name: z.string(),
title: z.string(),
metadata_created: z.string(),
metadata_modified: z.string(),
author: z.string(),
author_email: z.string(),
resources: z.array(zCkanResource),
tags: z.array(zCkanTag),
groups: z.array(zCkanGroup),
organization: zCkanOrganization,
url: z.string(),
notes: z.string(),
});
const zCkanPackageShow = z.object({
success: z.literal(true),
result: zCkanPackage,
});
/**
* @param {string} ckanUrl
* @param {string} packageName
*/
async function getCkanPackage(ckanUrl, packageName) {
const json = await getJson(
`${ckanUrl}/api/3/action/package_show?id=${encodeURIComponent(packageName)}`
);
return zCkanPackageShow.parse(json).result;
}
const zCkanStatusShow = z.object({
success: z.literal(true),
result: z.object({
site_url: z.string().describe("Titulo del portal. A veces vacio."),
site_description: z
.string()
.describe("Descripción del portal. A veces vacio."),
site_title: z.string(),
error_emails_to: z.string().nullable(),
}),
});
/**
* Consigue información general sobre el portal
* @param {string} ckanUrl
*/
async function getCkanInfo(ckanUrl) {
const json = await getJson(`${ckanUrl}/api/3/action/status_show`);
return zCkanStatusShow.parse(json).result;
}
/**
* Genera un data.json a partir de un CKAN que quizás no tiene un data.json oficial.
* @param {string} ckanUrl
*/
export async function generateDataJsonFromCkan(ckanUrl) {
const list = await getCkanPackageList(ckanUrl);
const info = await getCkanInfo(ckanUrl);
const packages = await Promise.all(
list.map((n) => getCkanPackage(ckanUrl, n))
);
/** @type {import("common/schema.js").Data & { generatedBy: string }} */
const data = {
generatedBy:
"archivador de datos abiertos (ckan_to_datajson) <https://github.com/catdevnull/transicion-desordenada-diablo>",
title: info.site_title || ckanUrl,
description: info.site_description || "",
homepage: info.site_url || ckanUrl,
dataset: packages.map((p) => ({
title: p.title,
description: p.notes,
identifier: p.id,
publisher: {
name: p.maintainer,
mbox: p.maintainer_email,
},
landingPage: p.url,
distribution: p.resources.map((r) => ({
identifier: r.id,
title: r.name,
description: r.description,
fileName: basename(r.url),
format: r.format,
downloadURL: r.url,
})),
})),
};
return data;
}

66
downloader/config.js Normal file
View file

@ -0,0 +1,66 @@
export const targetsPorDefecto = [
"datajson+https://datos.gob.ar/data.json",
"datajson+http://datos.energia.gob.ar/data.json",
"datajson+https://datos.magyp.gob.ar/data.json",
"datajson+https://datos.acumar.gov.ar/data.json",
"datajson+https://datasets.datos.mincyt.gob.ar/data.json",
"datajson+https://datos.arsat.com.ar/data.json",
"datajson+https://datos.cultura.gob.ar/data.json",
"datajson+https://datos.mininterior.gob.ar/data.json",
"datajson+https://datos.produccion.gob.ar/data.json",
"datajson+https://datos.salud.gob.ar/data.json",
"datajson+https://datos.transporte.gob.ar/data.json",
"datajson+https://ckan.ciudaddemendoza.gov.ar/data.json",
"datajson+https://datos.santafe.gob.ar/data.json",
"datajson+https://datosabiertos.chaco.gob.ar/data.json",
"datajson+https://datosabiertos.mercedes.gob.ar/data.json",
"datajson+http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
"datajson+https://datosabiertos.desarrollosocial.gob.ar/data.json",
"datajson+http://datos.mindef.gov.ar/data.json",
"datajson+http://datos.legislatura.gob.ar/data.json",
"datajson+https://portal.hcdiputados-ba.gov.ar/data.json", // Cámara de Diputados de la Provincia de Buenos Aires
"datajson+https://datos.arsat.com.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
// "datajson+https://datosabiertos.enacom.gob.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/otros/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/smn/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ign/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
// "datajson+http://andino.siu.edu.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
"datajson+https://transparencia.enargas.gob.ar/data.json",
"datajson+https://infra.datos.gob.ar/catalog/sspm/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/siep/data.json",
"datajson+https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
"datajson+http://datos.pami.org.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
"datajson+https://datos.yvera.gob.ar/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
"datajson+https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
"ckan+http://datos.jus.gob.ar", // justicia nacional
"ckan+https://datos.csjn.gov.ar", // corte suprema de justicia nacional
"ckan+https://datos.hcdn.gob.ar", // diputados nacional
"ckan+https://data.buenosaires.gob.ar", // CABA
"ckan+https://datos.tsjbaires.gov.ar", // tribunal superior de justicia CABA
];
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
export const userAgent = "transicion-desordenada (https://nulo.ar)";

View file

@ -1,64 +1,10 @@
// @ts-check
import { mkdir, open, writeFile } from "node:fs/promises";
import { Agent, fetch, request, setGlobalDispatcher } from "undici";
import { join, normalize } from "node:path";
import pLimit from "p-limit";
export const sitiosPorDefecto = [
"https://datos.gob.ar/data.json",
"http://datos.energia.gob.ar/data.json",
"https://datos.magyp.gob.ar/data.json",
"https://datos.acumar.gov.ar/data.json",
"https://datasets.datos.mincyt.gob.ar/data.json",
"https://datos.arsat.com.ar/data.json",
"https://datos.cultura.gob.ar/data.json",
"https://datos.mininterior.gob.ar/data.json",
"https://datos.produccion.gob.ar/data.json",
"https://datos.salud.gob.ar/data.json",
"https://datos.transporte.gob.ar/data.json",
"https://ckan.ciudaddemendoza.gov.ar/data.json",
"https://datos.santafe.gob.ar/data.json",
"https://datosabiertos.chaco.gob.ar/data.json",
"https://datosabiertos.mercedes.gob.ar/data.json",
"http://luj-bue-datos.paisdigital.innovacion.gob.ar/data.json",
"https://datosabiertos.desarrollosocial.gob.ar/data.json",
"http://datos.mindef.gov.ar/data.json",
"https://monitoreo.datos.gob.ar/catalog/jgm/data.json",
// 'https://datosabiertos.enacom.gob.ar/data.json',
"https://monitoreo.datos.gob.ar/catalog/otros/data.json",
"https://monitoreo.datos.gob.ar/catalog/aaip/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/sedronar/data.json",
"https://monitoreo.datos.gob.ar/catalog/modernizacion/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/shn/data.json",
"https://monitoreo.datos.gob.ar/catalog/smn/data.json",
"https://monitoreo.datos.gob.ar/catalog/ign/data.json",
"https://monitoreo.datos.gob.ar/catalog/justicia/data.json",
"https://monitoreo.datos.gob.ar/catalog/seguridad/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/ambiente/data.json",
// "http://andino.siu.edu.ar/data.json",
"https://monitoreo.datos.gob.ar/catalog/educacion/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/inti/data.json",
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"https://www.presupuestoabierto.gob.ar/sici/rest-api/catalog/public",
"https://transparencia.enargas.gob.ar/data.json",
"https://infra.datos.gob.ar/catalog/sspm/data.json",
"https://monitoreo.datos.gob.ar/catalog/ssprys/data.json",
"https://monitoreo.datos.gob.ar/catalog/siep/data.json",
"https://monitoreo.datos.gob.ar/catalog/exterior/data.json",
"http://datos.pami.org.ar/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/trabajo/data.json",
"https://datos.yvera.gob.ar/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/renaper/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/dine/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/obras/data.json",
"https://monitoreo.datos.gob.ar/media/catalog/generos/data.json",
];
// desactivado porque va MUY lento: datosabiertos.gualeguaychu.gov.ar
// FYI: al menos los siguientes dominios no tienen la cadena completa de certificados en HTTPS. tenemos que usar un hack (node_extra_ca_certs_mozilla_bundle) para conectarnos a estos sitios. (se puede ver con ssllabs.com) ojalá lxs administradorxs de estos servidores lo arreglen.
// www.enargas.gov.ar, transparencia.enargas.gov.ar, www.energia.gob.ar, www.economia.gob.ar, datos.yvera.gob.ar
import { targetsPorDefecto, userAgent } from "./config.js";
import { generateDataJsonFromCkan } from "./ckan_to_datajson.js";
import { zData } from "common/schema.js";
setGlobalDispatcher(
new Agent({
@ -81,26 +27,43 @@ class StatusCodeError extends Error {
}
}
class TooManyRedirectsError extends Error {}
let jsonUrls = process.argv.slice(2);
if (jsonUrls.length < 1) {
jsonUrls = sitiosPorDefecto;
let urls = process.argv.slice(2);
if (urls.length < 1) {
urls = targetsPorDefecto;
}
for (const url of jsonUrls)
downloadFromData(url).catch((error) =>
console.error(`${url} FALLÓ CON`, error)
/** @typedef {{type: "datajson" | "ckan"; url: string;}} Target */
/** @type {Target[]} */
const targets = urls.map((url) => {
if (url.startsWith("datajson+")) {
return { type: "datajson", url: url.slice("datajson+".length) };
} else if (url.startsWith("ckan+")) {
return { type: "ckan", url: url.slice("ckan+".length) };
} else return { type: "datajson", url };
});
for (const target of targets)
downloadFromData(target).catch((error) =>
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
);
/**
* @param {string} jsonUrl
* @param {Target} target
*/
async function downloadFromData(jsonUrl) {
const outputPath = generateOutputPath(jsonUrl);
const jsonRes = await fetch(jsonUrl);
// prettier-ignore
const parsed = /** @type {{ dataset: Dataset[] }} */(await jsonRes.json())
async function downloadFromData(target) {
const outputPath = generateOutputPath(target.url);
let json;
if (target.type === "ckan") {
json = await generateDataJsonFromCkan(target.url);
} else if (target.type === "datajson") {
const jsonRes = await fetch(target.url);
json = await jsonRes.json();
}
const parsed = zData.parse(json);
await mkdir(outputPath, { recursive: true });
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed));
await writeFile(join(outputPath, "url.txt"), jsonUrl);
await writeFile(join(outputPath, "data.json"), JSON.stringify(json));
await writeFile(join(outputPath, "url.txt"), `${target.type}+${target.url}`);
const errorFile = (
await open(join(outputPath, "errors.jsonl"), "w")
).createWriteStream();
@ -108,8 +71,13 @@ async function downloadFromData(jsonUrl) {
/** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution
.filter((dist) => {
.filter(
/** @returns {dist is import("common/schema.js").Distribution & {downloadURL: string}} */
(dist) => {
try {
if (!dist.downloadURL) {
throw new Error("No downloadURL in distribution");
}
patchUrl(new URL(dist.downloadURL));
return true;
} catch (error) {
@ -118,7 +86,8 @@ async function downloadFromData(jsonUrl) {
);
return false;
}
})
}
)
.map((dist) => ({
dataset,
dist,
@ -181,6 +150,7 @@ export function generateOutputPath(jsonUrlString) {
/**
* @argument {DownloadJob} job
* @argument {number} attempts
* @returns {Promise<void>}
*/
async function downloadDistWithRetries(job, attempts = 0) {
const { url } = job;
@ -222,7 +192,7 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
headers: {
"User-Agent": spoofUserAgent
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
: "transicion-desordenada (https://nulo.ar)",
: userAgent,
},
});
if (res.statusCode >= 300 && res.statusCode <= 399)
@ -247,22 +217,13 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
}
/** @typedef DownloadJob
* @prop {Dataset} dataset
* @prop {Distribution} dist
* @prop {import("common/schema.js").Dataset} dataset
* @prop {import("common/schema.js").Distribution} dist
* @prop {URL} url
* @prop {string} outputPath
* @prop {number} attempts
* @prop {Date=} waitUntil
*/
/** @typedef Dataset
* @prop {string} identifier
* @prop {Distribution[]} distribution
*/
/** @typedef Distribution
* @prop {string} identifier
* @prop {string} fileName
* @prop {string} downloadURL
*/
// https://security.stackexchange.com/a/123723
/**
@ -298,7 +259,7 @@ function wait(ms) {
}
/**
* @param {{ dataset: Dataset, dist: Distribution, url?: URL }} job
* @param {{ dataset: import("common/schema.js").Dataset, dist: import("common/schema.js").Distribution, url?: URL }} job
* @param {any} error
*/
function encodeError(job, error) {

View file

@ -22,12 +22,15 @@ async function generateMetadata(dumpDir) {
.map(async (file) => {
const path = join(file.path, file.name);
const data = await loadDataJson(path);
const url = await readFile(join(path, "url.txt"), "utf-8");
let url = await readFile(join(path, "url.txt"), "utf-8");
if (url.startsWith("datajson+") || url.startsWith("ckan+"))
url = url.slice(url.indexOf("+") + 1);
return {
title: data.title,
description: data.description,
url,
path: file.name,
nDatasets: data.dataset.length,
};
})
);

View file

@ -11,9 +11,10 @@
"author": "",
"license": "ISC",
"dependencies": {
"common": "workspace:",
"p-limit": "^5.0.0",
"undici": "^5.28.0",
"common": "workspace:"
"zod": "^3.22.4"
},
"devDependencies": {
"@tsconfig/node20": "^20.1.2",

View file

@ -14,7 +14,7 @@ pnpm install
```
# descargar portal datos.gob.ar
pnpm run run https://datos.gob.ar/data.json
pnpm run run datajson+https://datos.gob.ar/data.json
# guarda en data/datos.gob.ar_data.json
# descargar todos los portales conocidos

View file

@ -11,6 +11,8 @@
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"checkJs": true,
"noEmit": true,
"forceConsistentCasingInFileNames": true,
"moduleResolution": "node16"
},

View file

@ -28,6 +28,7 @@
"vite": "^5.0.0"
},
"dependencies": {
"compression-streams-polyfill": "^0.1.6",
"eva-icons": "^1.1.3",
"navaid": "^1.2.0",
"regexparam": "^3.0.0",

View file

@ -25,7 +25,15 @@ async function fetchGzipped(url: string): Promise<Response> {
res = await fetch(url.slice(0, url.length - ".gz".length));
return res;
}
const ds = new DecompressionStream("gzip");
let DecStream;
if ("DecompressionStream" in window) DecStream = window.DecompressionStream;
else {
const { makeDecompressionStream } = await import(
"compression-streams-polyfill/ponyfill"
);
DecStream = makeDecompressionStream(TransformStream);
}
const ds = new DecStream("gzip");
const decompressedStream = res.body!.pipeThrough(ds);
const resD = new Response(decompressedStream);
return resD;

View file

@ -11,9 +11,9 @@
export let params: { dumpUrl: string; portal: string; id: string };
$: url = decodeURIComponent(params.dumpUrl) + "/" + params.portal;
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
([data, errors]) => ({ data, errors }),
);
$: data = Promise.all([fetchData(url), fetchErrors(url)])
.then(([data, errors]) => ({ data, errors }))
.catch(alert);
</script>
<main class="mx-auto max-w-3xl">
@ -28,9 +28,9 @@
<NotFound />
{:else}
<header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700"
class="border-b border-b-gray-200 px-6 py-5 dark:border-b-gray-700"
>
<h1 class="font-bold text-3xl">{dataset.title}</h1>
<h1 class="text-3xl font-bold">{dataset.title}</h1>
<p class="text-xl">{dataset.description}</p>
<!--
lo saqué porque aún antes de que venga la motosierra estos links no funcionan...
@ -54,13 +54,13 @@
e.datasetIdentifier === dataset.identifier &&
e.distributionIdentifier === dist.identifier,
)}
<li class="flex px-6 py-5 justify-between items-center">
<li class="flex items-center justify-between px-6 py-5">
<div>
<h3>
{dist.title}
{#if dist.format}
<span
class="border border-current text-blue-800 dark:text-blue-400 relative inline-flex items-center text-xs font-semibold px-2 py-1 rounded-full ml-1"
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>{dist.format}</span>
</span>
@ -85,7 +85,7 @@
{#if !error}
<button
type="button"
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none"
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
on:click={() => downloadFile(url, dataset.identifier, dist)}
>Descargar</button
>

View file

@ -17,17 +17,20 @@
<p class="p-6">Cargando..</p>
{:then metadata}
<header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none"
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
>
<small>
Viendo archivo en
<a
class="underline text-blue-500 dark:text-blue-300"
class="text-blue-500 underline dark:text-blue-300"
target="_blank"
rel="noopener"
href={url}>{url}</a
>
</small>
<h1 class="mt-2 text-3xl font-bold">
Portales ({metadata.sites.length})
</h1>
</header>
<ul class="divide-y divide-gray-100 dark:divide-gray-700">
@ -37,15 +40,15 @@
portal: site.path,
})}
<li>
<div class="flex px-6 py-5 justify-between gap-3">
<div class="flex justify-between gap-3 px-6 py-5">
<div class="flex flex-col">
<h3 class="text-lg">{site.title}</h3>
<p class="text-sm">{site.description}</p>
</div>
<div class="flex flex-col items-center justify-center shrink-0">
<div class="flex shrink-0 flex-col items-center justify-center">
<a
href={portalLink}
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none"
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
>Ver portal</a
>
<SourceLink href={site.url} />

View file

@ -1,41 +1,41 @@
<script lang="ts">
import { inject } from "regexparam";
import { routes } from "../router";
import Container from "../components/Container.svelte";
import Portal from "./Home/Portal.svelte";
</script>
<main class="mx-auto prose dark:prose-invert">
<main class="prose mx-auto dark:prose-invert">
<Container>
<div class="py-5 px-6">
<h1>Archivo de portales de datos abiertos</h1>
<div class="px-6 pt-5">
<h1>Archivo de portales de datos abiertos de Argentina</h1>
<p>
Esta herramienta permite ver datos en archivos de portales de datos
abiertos de <a
href="https://github.com/catdevnull/transicion-desordenada-diablo/"
rel="noopener">transicion-desordenada-diablo</a
Explorá respaldos de los datos publicados en 43 portales de datos
abiertos.
</p>
<p>
Este proyecto surge en el marco de la solicitada "Por un gobierno de
datos abiertos" publicada el 5/12 por el grupo de usuaries de datos de
Argentina y firmada por una serie de organizaciones de la sociedad civil
y daterxs. <a href="https://bit.ly/CartaDatosAbiertos"
>Leer la solicitada</a
>
(un mejor nombre sería genial), creada en el marco de
<a href="https://bit.ly/CartaDatosAbiertos">un pedido hecho</a> al gobierno
entrante el 10 de diciembre de 2023 por garantizar el mantenimiento de las
políticas de datos públicos en Argentina.
</p>
<div class="not-prose flex place-content-center">
<a
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent(
"https://archivos.nulo.ar/dump-2023-12-08/",
),
})}
class="flex items-center justify-center px-4 py-2 text-xl font-medium text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none text-center"
>
Acceder al archivo creado el 8 de diciembre de 2023
</a>
<h2 class="my-4">Respaldos</h2>
</div>
<div class="not-prose">
<ul
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
>
<Portal />
</ul>
</div>
<div class="px-6 py-5">
<p>
Los archivos y las herramientas fueron creados por
<a href="https://nulo.ar">Nulo</a> con ayuda de varias personas. El
Las herramientas para descargar masivamente los archivos fueron
desarrolladas por
<a href="https://nulo.ar">Nulo</a> (y con ayuda de varias personas). El
código está disponible
<a
href="https://github.com/catdevnull/transicion-desordenada-diablo/"

View file

@ -0,0 +1,33 @@
<script lang="ts">
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
import { inject } from "regexparam";
import { routes } from "../../router";
</script>
<li>
<a
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
})}
>
<div>
<h3 class="text-lg">8 de diciembre de 2023</h3>
<!-- <span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>portales</span>
</span>
<span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>datasets</span>
</span> -->
</div>
<ArrowForward
fill="currentColor"
aria-hidden="true"
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
/>
</a>
</li>

View file

@ -11,9 +11,9 @@
export let params: { dumpUrl: string; portal: string };
$: url = `${decodeURIComponent(params.dumpUrl)}/${params.portal}`;
$: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
([data, errors]) => ({ data, errors }),
);
$: data = Promise.all([fetchData(url), fetchErrors(url)])
.then(([data, errors]) => ({ data, errors }))
.catch(alert);
function arreglarHomepageUrl(url: string): string {
if (!url.startsWith("http://") && !url.startsWith("https://"))
@ -51,29 +51,29 @@
<p class="p-6">Cargando..</p>
{:then { data, errors }}
<header
class="py-5 px-6 border-b border-b-gray-200 dark:border-b-gray-700 leading-none"
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
>
<small>
Viendo portal archivado de
<a
class="underline text-blue-500 dark:text-blue-300"
class="text-blue-500 underline dark:text-blue-300"
target="_blank"
rel="noopener"
href={url}>{url}</a
>
</small>
<h1 class="font-bold text-3xl">{data.title}</h1>
<h1 class="text-3xl font-bold">{data.title}</h1>
<p class="text-xl">{data.description}</p>
{#if data.homepage}
<SourceLink href={arreglarHomepageUrl(data.homepage)} />
{/if}
</header>
<div class="w-full mx-auto px-6 py-2">
<div class="mx-auto w-full px-6 py-2">
<input
type="text"
placeholder="Buscar..."
class="flex w-full h-10 px-3 py-2 text-sm bg-white dark:bg-gray-800 border rounded-md border-neutral-300 dark:border-gray-700 ring-offset-background placeholder:text-neutral-500 dark:placeholder:text-gray-500 focus:border-neutral-300 dark:focus:border-gray-700 focus:outline-none focus:ring-2 focus:ring-neutral-400 dark:focus:ring-gray-600 disabled:cursor-not-allowed disabled:opacity-50"
class="ring-offset-background flex h-10 w-full rounded-md border border-neutral-300 bg-white px-3 py-2 text-sm placeholder:text-neutral-500 focus:border-neutral-300 focus:outline-none focus:ring-2 focus:ring-neutral-400 disabled:cursor-not-allowed disabled:opacity-50 dark:border-gray-700 dark:bg-gray-800 dark:placeholder:text-gray-500 dark:focus:border-gray-700 dark:focus:ring-gray-600"
bind:value={query}
/>
</div>
@ -87,7 +87,7 @@
})}
<li>
<a
class="flex px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700 justify-between"
class="flex justify-between px-6 py-5 hover:bg-gray-50 dark:hover:bg-gray-700"
href={datasetLink}
>
<div>

View file

@ -1,8 +1,8 @@
import './app.css'
import App from './App.svelte'
import "./app.css";
import App from "./App.svelte";
const app = new App({
target: document.getElementById('app'),
})
target: document.getElementById("app")!,
});
export default app
export default app;

View file

@ -2,7 +2,11 @@
export default {
content: ["./index.html", "./src/**/*.svelte"],
theme: {
extend: {},
extend: {
boxShadow: {
glow: "0 0px 35px rgb(0 0 0 / .2)",
},
},
},
plugins: [require("@tailwindcss/typography")],
};

View file

@ -13,7 +13,8 @@
*/
"allowJs": true,
"checkJs": true,
"isolatedModules": true
"isolatedModules": true,
"noEmit": true
},
"include": [
"src/**/*.ts",

View file

@ -23,6 +23,9 @@ importers:
undici:
specifier: ^5.28.0
version: 5.28.2
zod:
specifier: ^3.22.4
version: 3.22.4
devDependencies:
'@tsconfig/node20':
specifier: ^20.1.2
@ -33,6 +36,9 @@ importers:
frontend:
dependencies:
compression-streams-polyfill:
specifier: ^0.1.6
version: 0.1.6
eva-icons:
specifier: ^1.1.3
version: 1.1.3
@ -703,6 +709,12 @@ packages:
engines: {node: '>= 10'}
dev: true
/compression-streams-polyfill@0.1.6:
resolution: {integrity: sha512-vYXHeCzZPfKqz/ppInffY2HkevGbV6zm5wlSgtrW0+6neApbA5qZdG48KaDEhRmbIFhLgmVUlUc+szH9NrcaBA==}
dependencies:
fflate: 0.8.1
dev: false
/concat-map@0.0.1:
resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
dev: true
@ -888,6 +900,10 @@ packages:
reusify: 1.0.4
dev: true
/fflate@0.8.1:
resolution: {integrity: sha512-/exOvEuc+/iaUm105QIiOt4LpBdMTWsXxqR0HDF35vx3fmaKzw7354gTilCh5rkzEt8WYyG//ku3h3nRmd7CHQ==}
dev: false
/fill-range@7.0.1:
resolution: {integrity: sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==}
engines: {node: '>=8'}

View file

@ -1,4 +1,4 @@
# Transicion Desordeanada (diablo)
# Archivador de Datos Abiertos
Herramientas para descargar masivamente portales de datos abiertos y generar un archivo, que luego se puede ver en una página web.