Compare commits

..

13 commits

25 changed files with 2295 additions and 138 deletions

11
.dockerignore Normal file
View file

@ -0,0 +1,11 @@
*/node_modules/
node_modules/
dataJsons/
log
prueba
datos.gob.ar*
data/
data*
downloader/data
*.zip

View file

@ -1,19 +0,0 @@
FROM docker.io/alpine:3.18 as build
RUN apk add --no-cache npm
RUN npm install -g esbuild
WORKDIR /tmp/build
COPY package.json .
RUN npm install
COPY download_json.js .
RUN esbuild --bundle --format=cjs --platform=node --outfile=build.js --sourcemap=inline download_json.js
FROM docker.io/alpine:3.18
RUN apk add --no-cache nodejs-current tini
COPY pki/ca_intermediate_root_bundle.pem /usr/lib/ca_intermediate_root_bundle.pem
COPY --from=build /tmp/build/build.js /usr/local/bin/download_json.js
ENV NODE_EXTRA_CA_CERTS=/usr/lib/ca_intermediate_root_bundle.pem
WORKDIR /data
CMD ["/sbin/tini", "node", "--enable-source-maps", "/usr/local/bin/download_json.js"]

16
common/package.json Normal file
View file

@ -0,0 +1,16 @@
{
"name": "common",
"type": "module",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"zod": "^3.22.4"
}
}

View file

@ -12,7 +12,7 @@ export const zDistribution = z.object({
title: z.string(), title: z.string(),
description: z.string().optional(), description: z.string().optional(),
}); });
export type Distribution = z.infer<typeof zDistribution>; /** @typedef {z.infer<typeof zDistribution>} Distribution */
export const zDataset = z.object({ export const zDataset = z.object({
identifier: z.string(), identifier: z.string(),
title: z.string(), title: z.string(),
@ -21,7 +21,7 @@ export const zDataset = z.object({
distribution: z.array(zDistribution), distribution: z.array(zDistribution),
landingPage: z.string().optional(), landingPage: z.string().optional(),
}); });
export type Dataset = z.infer<typeof zDataset>; /** @typedef {z.infer<typeof zDataset>} Dataset */
export const zData = z.object({ export const zData = z.object({
title: z.string(), title: z.string(),
description: z.string(), description: z.string(),
@ -36,3 +36,15 @@ export const zError = z.object({
kind: z.enum(["generic_error", "http_error", "infinite_redirect"]), kind: z.enum(["generic_error", "http_error", "infinite_redirect"]),
error: z.string().optional(), error: z.string().optional(),
}); });
export const zDumpMetadata = z.object({
sites: z.array(
z.object({
title: z.string(),
description: z.string(),
url: z.string(),
path: z.string(),
})
),
});
/** @typedef {z.infer<typeof zDumpMetadata>} DumpMetadata */

14
common/tsconfig.json Normal file
View file

@ -0,0 +1,14 @@
{
"compilerOptions": {
"lib": ["es2023"],
"module": "ES2020",
"target": "es2022",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"emitDeclarationOnly": true,
"declaration": true
}
}

19
downloader/Containerfile Normal file
View file

@ -0,0 +1,19 @@
FROM docker.io/alpine:3.18 as build
RUN apk add --no-cache npm \
&& npm install -g esbuild pnpm
COPY .. /tmp/build/
WORKDIR /tmp/build/downloader
RUN pnpm install \
&& esbuild --bundle --format=cjs --platform=node --outfile=download_json.build.js --sourcemap=inline download_json.js \
&& esbuild --bundle --format=cjs --platform=node --outfile=generate_dump_metadata.build.js --sourcemap=inline generate_dump_metadata.js
FROM docker.io/alpine:3.18
RUN apk add --no-cache nodejs-current tini
COPY downloader/pki/ca_intermediate_root_bundle.pem /usr/lib/ca_intermediate_root_bundle.pem
COPY --from=build /tmp/build/downloader/download_json.build.js /usr/local/bin/download_json.js
COPY --from=build /tmp/build/downloader/generate_dump_metadata.build.js /usr/local/bin/generate_dump_metadata.js
ENV NODE_EXTRA_CA_CERTS=/usr/lib/ca_intermediate_root_bundle.pem
WORKDIR /data
CMD ["/sbin/tini", "node", "--enable-source-maps", "/usr/local/bin/download_json.js"]

View file

@ -4,7 +4,7 @@ import { Agent, fetch, request, setGlobalDispatcher } from "undici";
import { join, normalize } from "node:path"; import { join, normalize } from "node:path";
import pLimit from "p-limit"; import pLimit from "p-limit";
const sitiosPorDefecto = [ export const sitiosPorDefecto = [
"https://datos.gob.ar/data.json", "https://datos.gob.ar/data.json",
"http://datos.energia.gob.ar/data.json", "http://datos.energia.gob.ar/data.json",
"https://datos.magyp.gob.ar/data.json", "https://datos.magyp.gob.ar/data.json",
@ -63,7 +63,7 @@ const sitiosPorDefecto = [
setGlobalDispatcher( setGlobalDispatcher(
new Agent({ new Agent({
pipelining: 0, pipelining: 0,
}), })
); );
/** key es host /** key es host
@ -85,29 +85,26 @@ let jsonUrls = process.argv.slice(2);
if (jsonUrls.length < 1) { if (jsonUrls.length < 1) {
jsonUrls = sitiosPorDefecto; jsonUrls = sitiosPorDefecto;
} }
writeFile("readme.txt", generateReadme(jsonUrls));
for (const url of jsonUrls) for (const url of jsonUrls)
downloadFromData(url).catch((error) => downloadFromData(url).catch((error) =>
console.error(`${url} FALLÓ CON`, error), console.error(`${url} FALLÓ CON`, error)
); );
/** /**
* @param {string} jsonUrlString * @param {string} jsonUrl
*/ */
async function downloadFromData(jsonUrlString) { async function downloadFromData(jsonUrl) {
const jsonUrl = new URL(jsonUrlString); const outputPath = generateOutputPath(jsonUrl);
const outputPath = `${jsonUrl.host}${jsonUrl.pathname}`.replaceAll("/", "_"); const jsonRes = await fetch(jsonUrl);
// prettier-ignore
const parsed = /** @type {{ dataset: Dataset[] }} */(await jsonRes.json())
await mkdir(outputPath, { recursive: true }); await mkdir(outputPath, { recursive: true });
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed));
await writeFile(join(outputPath, "url.txt"), jsonUrl);
const errorFile = ( const errorFile = (
await open(join(outputPath, "errors.jsonl"), "w") await open(join(outputPath, "errors.jsonl"), "w")
).createWriteStream(); ).createWriteStream();
try { try {
const jsonRes = await fetch(jsonUrl);
// prettier-ignore
const parsed = /** @type {{ dataset: Dataset[] }} */(await jsonRes.json())
await writeFile(join(outputPath, "data.json"), JSON.stringify(parsed));
/** @type {DownloadJob[]} */ /** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) => const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution dataset.distribution
@ -117,7 +114,7 @@ async function downloadFromData(jsonUrlString) {
return true; return true;
} catch (error) { } catch (error) {
errorFile.write( errorFile.write(
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n", JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
); );
return false; return false;
} }
@ -128,7 +125,7 @@ async function downloadFromData(jsonUrlString) {
url: patchUrl(new URL(dist.downloadURL)), url: patchUrl(new URL(dist.downloadURL)),
outputPath, outputPath,
attempts: 0, attempts: 0,
})), }))
); );
const totalJobs = jobs.length; const totalJobs = jobs.length;
let nFinished = 0; let nFinished = 0;
@ -160,7 +157,7 @@ async function downloadFromData(jsonUrlString) {
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`); process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
const interval = setInterval(() => { const interval = setInterval(() => {
process.stderr.write( process.stderr.write(
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`, `info[${outputPath}]: ${nFinished}/${totalJobs} done\n`
); );
}, 30000); }, 30000);
await Promise.all(promises); await Promise.all(promises);
@ -172,6 +169,15 @@ async function downloadFromData(jsonUrlString) {
} }
} }
/**
* @param {string} jsonUrlString
*/
export function generateOutputPath(jsonUrlString) {
const jsonUrl = new URL(jsonUrlString);
const outputPath = `${jsonUrl.host}${jsonUrl.pathname}`.replaceAll("/", "_");
return outputPath;
}
/** /**
* @argument {DownloadJob} job * @argument {DownloadJob} job
* @argument {number} attempts * @argument {number} attempts
@ -228,12 +234,12 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
const fileDirPath = join( const fileDirPath = join(
outputPath, outputPath,
sanitizeSuffix(dataset.identifier), sanitizeSuffix(dataset.identifier),
sanitizeSuffix(dist.identifier), sanitizeSuffix(dist.identifier)
); );
await mkdir(fileDirPath, { recursive: true }); await mkdir(fileDirPath, { recursive: true });
const filePath = join( const filePath = join(
fileDirPath, fileDirPath,
sanitizeSuffix(dist.fileName || dist.identifier), sanitizeSuffix(dist.fileName || dist.identifier)
); );
if (!res.body) throw new Error("no body"); if (!res.body) throw new Error("no body");
@ -272,11 +278,11 @@ function sanitizeSuffix(path) {
*/ */
function chequearIdsDuplicados(jobs, id) { function chequearIdsDuplicados(jobs, id) {
const duplicated = hasDuplicates( const duplicated = hasDuplicates(
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`), jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`)
); );
if (duplicated) { if (duplicated) {
console.error( console.error(
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`, `ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`
); );
} }
} }
@ -334,45 +340,3 @@ function shuffleArray(array) {
[array[i], array[j]] = [array[j], array[i]]; [array[i], array[j]] = [array[j], array[i]];
} }
} }
/**
* @param {string[]} portales
*/
function generateReadme(portales) {
// basado en el readme de Patricio
return `Dumps de Portales de Datos Abiertos de la República Argentina
=============================================================
El zip contiene todo lo que se pudo descargar de los portales seleccionados, que fueron:
${portales.map((p) => `- ${p}`).join("\n")}
La carpeta está ordenada en subcarpetas cuyo nombre corresponde al ID del dataset/distribución del portal. De esta forma,
leyendo el data.json se puede programaticamente y de manera simple volver a mapear qué archivo le corresponde a cada
distribución.
Formato:
- {url de data.json sin protocolo y con / reemplazado por _}/
- data.json
- errors.jsonl: archivo con todos los errores que se obtuvieron al intentar descargar todo.
- {identifier de dataset}/
- {identifier de distribution}/
- {fileName (o, si no existe, identifier de distribution)}
Ejemplo:
- datos.gob.ar_data.json/
- data.json
- errors.jsonl
- turismo_fbc269ea-5f71-45b6-b70c-8eb38a03b8db/
  - turismo_0774a0bb-71c2-44d7-9ea6-780e6bd06d50/
  - cruceristas-por-puerto-residencia-desagregado-por-pais-mes.csv
- ...
- energia_0d4a18ee-9371-439a-8a94-4f53a9822664/
   - energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866/
   - energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866 (este archivo no tiene fileName en el data.json, entonces se reutiliza el identifier)
- ...
Este dump fue generado con transicion-desordenada-diablo: https://gitea.nulo.in/Nulo/transicion-desordenada-diablo
`;
}

View file

@ -0,0 +1,98 @@
// @ts-check
import { readFile, readdir, writeFile } from "node:fs/promises";
import { zData } from "common/schema.js";
import { join } from "node:path";
const dumpDir = process.argv[2];
generateMetadata(dumpDir);
/**
* @param {string} dumpDir
*/
async function generateMetadata(dumpDir) {
if (!dumpDir) {
console.error("Especifica una carpeta para generar los metadatos, porfa.");
process.exit(1);
}
const files = await readdir(dumpDir, { withFileTypes: true });
const sites = await Promise.all(
files
.filter((file) => file.isDirectory())
.map(async (file) => {
const path = join(file.path, file.name);
const data = await loadDataJson(path);
const url = await readFile(join(path, "url.txt"), "utf-8");
return {
title: data.title,
description: data.description,
url,
path: file.name,
};
})
);
/** @type {import("common/schema.js").DumpMetadata} */
const dumpMetadata = { sites };
await writeFile(
join(dumpDir, "dump-metadata.json"),
JSON.stringify(dumpMetadata)
);
await writeFile(
join(dumpDir, "readme.txt"),
generateReadme(sites.map((s) => s.url))
);
}
/**
* @param {string[]} portales
*/
function generateReadme(portales) {
// basado en el readme de Patricio
return `Dumps de Portales de Datos Abiertos de la República Argentina
=============================================================
Esta carpeta contiene todo lo que se pudo descargar de los portales seleccionados, que fueron:
${portales.map((p) => `- ${p}`).join("\n")}
La carpeta está ordenada en subcarpetas cuyo nombre corresponde al ID del dataset/distribución del portal. De esta forma,
leyendo el data.json se puede programaticamente y de manera simple volver a mapear qué archivo le corresponde a cada
distribución.
Formato:
- {url de data.json sin protocolo y con / reemplazado por _}/
- data.json
- errors.jsonl: archivo con todos los errores que se obtuvieron al intentar descargar todo.
- {identifier de dataset}/
- {identifier de distribution}/
- {fileName (o, si no existe, identifier de distribution)}
Ejemplo:
- datos.gob.ar_data.json/
- data.json
- errors.jsonl
- turismo_fbc269ea-5f71-45b6-b70c-8eb38a03b8db/
  - turismo_0774a0bb-71c2-44d7-9ea6-780e6bd06d50/
  - cruceristas-por-puerto-residencia-desagregado-por-pais-mes.csv
- ...
- energia_0d4a18ee-9371-439a-8a94-4f53a9822664/
   - energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866/
   - energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866 (este archivo no tiene fileName en el data.json, entonces se reutiliza el identifier)
- ...
Este dump fue generado con transicion-desordenada-diablo: https://gitea.nulo.in/Nulo/transicion-desordenada-diablo
Se puede usar el frontend en esa repo para ver el dump.
`;
}
/**
* @param {string} dir carpeta del dump
*/
async function loadDataJson(dir) {
const text = await readFile(join(dir, "data.json"), "utf-8");
const json = JSON.parse(text);
const data = zData.parse(json);
return data;
}

View file

@ -1,5 +1,5 @@
{ {
"name": "js", "name": "downloader",
"type": "module", "type": "module",
"version": "1.0.0", "version": "1.0.0",
"description": "", "description": "",
@ -12,10 +12,11 @@
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"p-limit": "^5.0.0", "p-limit": "^5.0.0",
"undici": "^5.28.0" "undici": "^5.28.0",
"common": "workspace:"
}, },
"devDependencies": { "devDependencies": {
"@tsconfig/node20": "^20.1.2", "@tsconfig/node20": "^20.1.2",
"@types/node": "^20.10.0" "@types/node": "^20.10.0"
} }
} }

View file

@ -29,6 +29,15 @@ docker run --rm -it -v ./data:/data gitea.nulo.in/nulo/transicion-desordenada-di
# descarga datos.gob.ar # descarga datos.gob.ar
``` ```
## terminar dump
```
# generar dump-metadata.json (útil para el frontend) y readme.txt
node generate_dump_metadata.js data/
# comprimir todo excepto readme.txt
pigz -1r data/*/
```
## formato de repo guardado ## formato de repo guardado
- `{url de data.json sin protocolo y con / reemplazado por _}/` - `{url de data.json sin protocolo y con / reemplazado por _}/`
@ -44,10 +53,10 @@ docker run --rm -it -v ./data:/data gitea.nulo.in/nulo/transicion-desordenada-di
- `data.json` - `data.json`
- `errors.jsonl` - `errors.jsonl`
- `turismo_fbc269ea-5f71-45b6-b70c-8eb38a03b8db/` - `turismo_fbc269ea-5f71-45b6-b70c-8eb38a03b8db/`
  - `turismo_0774a0bb-71c2-44d7-9ea6-780e6bd06d50/`   - `turismo_0774a0bb-71c2-44d7-9ea6-780e6bd06d50/`
  - `cruceristas-por-puerto-residencia-desagregado-por-pais-mes.csv`   - `cruceristas-por-puerto-residencia-desagregado-por-pais-mes.csv`
- ... - ...
- `energia_0d4a18ee-9371-439a-8a94-4f53a9822664/` - `energia_0d4a18ee-9371-439a-8a94-4f53a9822664/`
   - `energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866/`    - `energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866/`
   - `energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866` (este archivo no tiene fileName en el data.json, entonces se reutiliza el `identifier`)    - `energia_9f602b6e-2bef-4ac4-895d-f6ecd6bb1866` (este archivo no tiene fileName en el data.json, entonces se reutiliza el `identifier`)
- ... - ...

View file

@ -13,5 +13,11 @@
"skipLibCheck": true, "skipLibCheck": true,
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,
"moduleResolution": "node16" "moduleResolution": "node16"
} },
"include": [
"**/*.ts",
"**/*.js",
// https://github.com/microsoft/TypeScript/issues/33136#issuecomment-578699134
"../common/**/*.js"
]
} }

View file

@ -11,9 +11,11 @@
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/vite-plugin-svelte": "^3.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0",
"@tailwindcss/typography": "^0.5.10",
"@tsconfig/svelte": "^5.0.2", "@tsconfig/svelte": "^5.0.2",
"@types/streamsaver": "^2.0.4", "@types/streamsaver": "^2.0.4",
"autoprefixer": "^10.4.16", "autoprefixer": "^10.4.16",
"common": "workspace:",
"postcss": "^8.4.32", "postcss": "^8.4.32",
"prettier": "^3.1.0", "prettier": "^3.1.0",
"prettier-plugin-tailwindcss": "^0.5.9", "prettier-plugin-tailwindcss": "^0.5.9",

View file

@ -1,19 +1,24 @@
<script lang="ts"> <script lang="ts">
import type { Params } from "navaid";
import { currentRoute, type ComponentType } from "./lib/router"; import { currentRoute, type ComponentType } from "./lib/router";
import NotFound from "./lib/routes/NotFound.svelte"; import NotFound from "./lib/routes/NotFound.svelte";
import DumpIndex from "./lib/routes/DumpIndex.svelte"; import Home from "./lib/routes/Home.svelte";
import Dump from "./lib/routes/Dump.svelte";
import Portal from "./lib/routes/Portal.svelte";
import Dataset from "./lib/routes/Dataset.svelte"; import Dataset from "./lib/routes/Dataset.svelte";
function chooseComponent(route: ComponentType) { function chooseComponent(route: ComponentType) {
if (route === "NotFound") return NotFound; if (route === "NotFound") return NotFound;
else if (route === "Home") return Home;
else if (route === "Dataset") return Dataset; else if (route === "Dataset") return Dataset;
else if (route === "DumpIndex") return DumpIndex; else if (route === "Portal") return Portal;
else if (route === "Dump") return Dump;
} }
$: component = chooseComponent($currentRoute.component); $: r = {
$: params = $currentRoute.params as any; component: chooseComponent($currentRoute.component) as any,
params: $currentRoute.params as any,
};
</script> </script>
<svelte:component this={component} {params} /> <svelte:component this={r.component} params={r.params} />

View file

@ -1,5 +1,5 @@
import streamSaver from "streamsaver"; import streamSaver from "streamsaver";
import { zData, type Distribution, zError } from "./schema"; import { zData, type Distribution, zError, zDumpMetadata } from "common/schema";
export async function downloadFile( export async function downloadFile(
dataPath: string, dataPath: string,
@ -48,13 +48,16 @@ async function loadGzippedJson(url: string): Promise<unknown> {
return json; return json;
} }
const endpoint = "http://localhost:8081"; export async function fetchData(portalUrl: string) {
export const gobData = `${endpoint}/datos.gob.ar_data.json`; const json = await loadGzippedJson(`${portalUrl}/data.json.gz`);
export async function fetchData(url: string) {
const json = await loadGzippedJson(`${url}/data.json.gz`);
if (import.meta.env.DEV) console.debug(json); if (import.meta.env.DEV) console.debug(json);
return zData.parse(json); return zData.parse(json);
} }
export async function fetchDumpMetadata(dumpUrl: string) {
const json = await loadGzippedJson(`${dumpUrl}/dump-metadata.json.gz`);
if (import.meta.env.DEV) console.debug(json);
return zDumpMetadata.parse(json);
}
export async function fetchErrors(url: string) { export async function fetchErrors(url: string) {
const res = await fetchGzipped(`${url}/errors.jsonl.gz`); const res = await fetchGzipped(`${url}/errors.jsonl.gz`);
const text = await res.text(); const text = await res.text();

View file

@ -0,0 +1,55 @@
<script lang="ts">
import { inject } from "regexparam";
import ChevronRight from "eva-icons/outline/svg/chevron-right-outline.svg?component";
import { routes } from "../router";
import Portal from "../routes/Portal.svelte";
export let params:
| { dumpUrl: string }
| { dumpUrl: string; portal: string }
| { dumpUrl: string; portal: string; id: string };
function generateDumpName(dumpUrl: string) {
const clean = decodeURIComponent(dumpUrl).replace(/\/+$/, "");
return clean.slice(clean.lastIndexOf("/") + 1);
}
$: dumpName = generateDumpName(params.dumpUrl);
</script>
<nav class="flex justify-between m-2">
<ol
class="flex items-center mb-3 text-sm text-neutral-500 [&_.active-breadcrumb]:text-neutral-600 [&_.active-breadcrumb]:font-medium sm:mb-0"
>
<li class="flex items-center h-full">
<a
href={inject(routes.Dump, params)}
class="inline-flex items-center px-2 py-1.5 space-x-1.5 rounded-md hover:text-neutral-900 hover:bg-neutral-100"
>
<span>{dumpName}</span>
</a>
</li>
{#if "portal" in params}
<ChevronRight class="w-5 h-5 text-gray-400" fill="currentColor" />
<li>
<a
href={inject(routes.Portal, params)}
class="inline-flex items-center px-2 py-1.5 space-x-1.5 font-normal rounded-md hover:bg-neutral-100 hover:text-neutral-900"
>
<span>{params.portal}</span>
</a>
</li>
{/if}
{#if "id" in params}
<ChevronRight class="w-5 h-5 text-gray-400" fill="currentColor" />
<li>
<a
href={inject(routes.Dataset, params)}
class="inline-flex items-center px-2 py-1.5 space-x-1.5 font-normal rounded-md hover:bg-neutral-100 hover:text-neutral-900"
>
<span>{params.id}</span>
</a>
</li>
{/if}
</ol>
</nav>

View file

@ -2,11 +2,13 @@ import navaid, { type Params } from "navaid";
import { writable } from "svelte/store"; import { writable } from "svelte/store";
export const routes = { export const routes = {
DumpIndex: "/d/:dumpUrl", Home: "/",
Dataset: "/d/:dumpUrl/dataset/:id", Dump: "/dump/:dumpUrl",
Portal: "/dump/:dumpUrl/:portal",
Dataset: "/dump/:dumpUrl/:portal/dataset/:id",
}; };
export type ComponentType = "NotFound" | "DumpIndex" | "Dataset"; export type ComponentType = "NotFound" | keyof typeof routes;
type Route = { type Route = {
component: ComponentType; component: ComponentType;
@ -15,12 +17,11 @@ type Route = {
export const currentRoute = writable<Route>(); export const currentRoute = writable<Route>();
export const router = navaid(undefined, () => export const router = navaid(undefined, () =>
currentRoute.set({ component: "NotFound" }) currentRoute.set({ component: "NotFound" }),
);
router.on(routes.DumpIndex, (params) =>
currentRoute.set({ component: "DumpIndex", params })
);
router.on(routes.Dataset, (params) =>
currentRoute.set({ component: "Dataset", params })
); );
for (const [component, path] of Object.entries(routes)) {
router.on(path, (params) =>
currentRoute.set({ component: component as keyof typeof routes, params }),
);
}
router.listen(); router.listen();

View file

@ -1,20 +1,23 @@
<script lang="ts"> <script lang="ts">
import ArrowBack from "eva-icons/outline/svg/arrow-back-outline.svg?component"; import ArrowBack from "eva-icons/outline/svg/arrow-back-outline.svg?component";
import ExternalLink from "eva-icons/outline/svg/external-link-outline.svg?component"; import ExternalLink from "eva-icons/outline/svg/external-link-outline.svg?component";
import { downloadFile, fetchData, fetchErrors } from "../dump"; import { downloadFile, fetchData, fetchErrors } from "../fetch";
import NotFound from "./NotFound.svelte"; import NotFound from "./NotFound.svelte";
import { inject } from "regexparam"; import { inject } from "regexparam";
import { routes } from "../router"; import { routes } from "../router";
import Nav from "../nav/Nav.svelte";
export let params: { dumpUrl: string; id: string }; export let params: { dumpUrl: string; portal: string; id: string };
const url = decodeURIComponent(params.dumpUrl); $: url = decodeURIComponent(params.dumpUrl) + "/" + params.portal;
const data = Promise.all([fetchData(url), fetchErrors(url)]).then( $: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
([data, errors]) => ({ data, errors }), ([data, errors]) => ({ data, errors }),
); );
</script> </script>
<main class="mx-auto max-w-3xl"> <main class="mx-auto max-w-3xl">
<Nav {params} />
<div class="rounded-lg border bg-white m-2"> <div class="rounded-lg border bg-white m-2">
{#await data} {#await data}
<p class="p-6">Cargando dataset...</p> <p class="p-6">Cargando dataset...</p>
@ -27,7 +30,10 @@
<small> <small>
<a <a
class="flex text-blue-500 leading-none gap-1 items-center" class="flex text-blue-500 leading-none gap-1 items-center"
href={inject(routes.DumpIndex, { dumpUrl: params.dumpUrl })} href={inject(routes.Portal, {
dumpUrl: params.dumpUrl,
portal: params.portal,
})}
> >
<ArrowBack fill="currentColor" class="h-[1.25em]" /> Viendo {data.title} <ArrowBack fill="currentColor" class="h-[1.25em]" /> Viendo {data.title}
</a> </a>
@ -68,9 +74,15 @@
</span> </span>
{/if} {/if}
</h3> </h3>
{#if error} {#if !dist.downloadURL}
<small class="block text-red-700"> <small class="block text-red-700">
No está en este dump porque hubo un error al descargarlo :( No está en este archivo porque el link de descarga estaba
roto en la fuente al momento de descargarlo :(
</small>
{:else if error}
<small class="block text-red-700">
No está en este archivo porque hubo un error al descargarlo
:(
</small> </small>
{/if} {/if}
{#if dist.fileName} {#if dist.fileName}
@ -86,15 +98,17 @@
>Descargar</button >Descargar</button
> >
{/if} {/if}
<a {#if dist.downloadURL}
class="flex items-center leading-none text-gray-600 gap-1 pt-2" <a
href={dist.downloadURL} class="flex items-center leading-none text-gray-600 gap-1 pt-2"
target="_blank" href={dist.downloadURL}
rel="noopener" target="_blank"
> rel="noopener"
<ExternalLink fill="currentColor" class="h-4" /> >
Fuente <ExternalLink fill="currentColor" class="h-4" />
</a> Fuente
</a>
{/if}
</div> </div>
</li> </li>
{/each} {/each}

View file

@ -0,0 +1,79 @@
<script lang="ts">
import { inject } from "regexparam";
import ExternalLink from "eva-icons/outline/svg/external-link-outline.svg?component";
import { fetchDumpMetadata } from "../fetch";
import { routes } from "../router";
export let params: { dumpUrl: string };
$: url = decodeURIComponent(params.dumpUrl);
$: metadataPromise = fetchDumpMetadata(url);
</script>
<main class="mx-auto max-w-3xl">
<div class="rounded-lg border bg-white m-2">
{#await metadataPromise}
<p class="p-6">Cargando..</p>
{:then metadata}
<header class="py-5 px-6 border-b border-b-gray-200 leading-none">
<small>
Viendo archivo en
<a
class="underline text-blue-500"
target="_blank"
rel="noopener"
href={url}>{url}</a
>
</small>
<!-- <h1 class="font-bold text-3xl">{data.title}</h1>
<p class="text-xl">{data.description}</p>
{#if data.homepage}
<a
class="flex items-center leading-none text-gray-600 gap-1 pt-2"
href={arreglarHomepageUrl(data.homepage)}
target="_blank"
rel="noopener"
>
<ExternalLink fill="currentColor" class="h-4" />
Fuente
</a>
{/if} -->
</header>
<ul class="divide-y divide-gray-100">
{#each metadata.sites as site}
{@const portalLink = inject(routes.Portal, {
dumpUrl: params.dumpUrl,
portal: site.path,
})}
<li>
<div class="flex px-6 py-5 justify-between gap-3">
<div class="flex flex-col">
<h3 class="text-lg">{site.title}</h3>
<p class="text-sm">{site.description}</p>
</div>
<div class="flex flex-col items-center justify-center shrink-0">
<a
href={portalLink}
class="inline-flex items-center justify-center px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none"
>Ver portal</a
>
<a
class="flex items-center leading-none text-gray-600 gap-1 pt-2"
href={site.url}
target="_blank"
rel="noopener"
>
<ExternalLink fill="currentColor" class="h-4" />
Fuente
</a>
</div>
</div>
</li>
{/each}
</ul>
{:catch error}
Hubo un error intenando cargar este archivo. <pre>{error}</pre>
{/await}
</div>
</main>

View file

@ -0,0 +1,44 @@
<script lang="ts">
import { inject } from "regexparam";
import { routes } from "../router";
</script>
<main class="p-2">
<div class="mx-auto rounded-lg border bg-white py-5 px-6 prose">
<h1>Archivo de portales de datos abiertos</h1>
<p>
Esta herramienta permite ver datos en archivos de portales de datos
abiertos de <a
href="https://github.com/catdevnull/transicion-desordenada-diablo/"
rel="noopener">transicion-desordenada-diablo</a
>
(un mejor nombre sería genial), creada en el marco de
<a href="https://bit.ly/CartaDatosAbiertos">un pedido hecho</a> al gobierno
entrante el 10 de diciembre de 2023 por garantizar el mantenimiento de las
políticas de datos públicos en Argentina.
</p>
<div class="not-prose flex place-content-center">
<a
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent(
"https://archivos.nulo.ar/dump-2023-12-08/",
),
})}
class="flex items-center justify-center px-4 py-2 text-xl font-medium text-white transition-colors duration-200 bg-blue-600 rounded-md hover:bg-blue-700 focus:ring-2 focus:ring-offset-2 focus:ring-blue-700 focus:shadow-outline focus:outline-none text-center"
>
Acceder al archivo creado el 8 de diciembre de 2023
</a>
</div>
<p>
Los archivos y las herramientas fueron creados por
<a href="https://nulo.ar">Nulo</a> con ayuda de varias personas. El código
está disponible
<a
href="https://github.com/catdevnull/transicion-desordenada-diablo/"
rel="noopener">en GitHub</a
>.
</p>
</div>
</main>

View file

@ -2,14 +2,15 @@
import { inject } from "regexparam"; import { inject } from "regexparam";
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component"; import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
import ExternalLink from "eva-icons/outline/svg/external-link-outline.svg?component"; import ExternalLink from "eva-icons/outline/svg/external-link-outline.svg?component";
import { fetchData, fetchErrors } from "../dump"; import { fetchData, fetchErrors } from "../fetch";
import { routes } from "../router"; import { routes } from "../router";
import type { Dataset } from "../schema"; import type { Dataset } from "common/schema";
import Nav from "../nav/Nav.svelte";
export let params: { dumpUrl: string }; export let params: { dumpUrl: string; portal: string };
const url = decodeURIComponent(params.dumpUrl); $: url = `${decodeURIComponent(params.dumpUrl)}/${params.portal}`;
const data = Promise.all([fetchData(url), fetchErrors(url)]).then( $: data = Promise.all([fetchData(url), fetchErrors(url)]).then(
([data, errors]) => ({ data, errors }), ([data, errors]) => ({ data, errors }),
); );
@ -19,21 +20,36 @@
return url; return url;
} }
function processStringForSearch(str: string): string {
return str
.toLowerCase()
.replaceAll("á", "a")
.replaceAll("é", "e")
.replaceAll("í", "i")
.replaceAll("ó", "o")
.replaceAll("ú", "u")
.replaceAll("ñ", "n");
}
let query: string = ""; let query: string = "";
function filterDatasets(datasets: Dataset[], query: string): Dataset[] { function filterDatasets(datasets: Dataset[], query: string): Dataset[] {
const q = processStringForSearch(query);
return datasets.filter( return datasets.filter(
(dataset) => (dataset) =>
dataset.identifier.includes(query) || dataset.title.includes(query), processStringForSearch(dataset.identifier).includes(q) ||
processStringForSearch(dataset.title).includes(q),
); );
} }
</script> </script>
<main class="mx-auto max-w-3xl"> <main class="mx-auto max-w-3xl">
<Nav {params} />
<div class="rounded-lg border bg-white m-2"> <div class="rounded-lg border bg-white m-2">
{#await data} {#await data}
<p class="p-6">Cargando..</p> <p class="p-6">Cargando..</p>
{:then { data, errors }} {:then { data, errors }}
<header class="py-5 px-6 border-b border-b-gray-200"> <header class="py-5 px-6 border-b border-b-gray-200 leading-none">
<small> <small>
Viendo portal archivado de Viendo portal archivado de
<a <a
@ -71,6 +87,7 @@
{#each filterDatasets(data.dataset, query) as dataset} {#each filterDatasets(data.dataset, query) as dataset}
{@const datasetLink = inject(routes.Dataset, { {@const datasetLink = inject(routes.Dataset, {
dumpUrl: params.dumpUrl, dumpUrl: params.dumpUrl,
portal: params.portal,
id: dataset.identifier, id: dataset.identifier,
})} })}
<li> <li>
@ -92,7 +109,7 @@
{/each} {/each}
</ul> </ul>
{:catch error} {:catch error}
Hubo un error intenando cargar este dump. <pre>{error}</pre> Hubo un error intenando cargar este portal archivado. <pre>{error}</pre>
{/await} {/await}
</div> </div>
</main> </main>

View file

@ -4,5 +4,5 @@ export default {
theme: { theme: {
extend: {}, extend: {},
}, },
plugins: [], plugins: [require("@tailwindcss/typography")],
}; };

View file

@ -15,6 +15,12 @@
"checkJs": true, "checkJs": true,
"isolatedModules": true "isolatedModules": true
}, },
"include": ["src/**/*.ts", "src/**/*.js", "src/**/*.svelte"], "include": [
"src/**/*.ts",
"src/**/*.js",
"src/**/*.svelte",
// https://github.com/microsoft/TypeScript/issues/33136#issuecomment-578699134
"../common/**/*.js"
],
"references": [{ "path": "./tsconfig.node.json" }] "references": [{ "path": "./tsconfig.node.json" }]
} }

1779
pnpm-lock.yaml Normal file

File diff suppressed because it is too large Load diff

4
pnpm-workspace.yaml Normal file
View file

@ -0,0 +1,4 @@
packages:
- "frontend/"
- "downloader/"
- "common/"

17
readme.md Normal file
View file

@ -0,0 +1,17 @@
# Transicion Desordeanada (diablo)
Herramientas para descargar masivamente portales de datos abiertos y generar un archivo, que luego se puede ver en una página web.
## [Downloader](./downloader)
El descargador.
## [Frontend](./frontend)
La página web para ver el archivo generado.
## Glosario
- Portal (de datos): algo que tiene un data.json en un formato similar a [DCAT 2](https://www.w3.org/TR/vocab-dcat-2/) (suelen ser portales [CKAN](https://ckan.org/))
- Archivo (dump): una versión descargada de uno o varios portales de datos
- Dataset: conjunto de archivos que suelen estar relacionados