mirror of
https://github.com/catdevnull/transicion-desordenada-diablo
synced 2024-11-26 11:26:18 +00:00
Compare commits
10 commits
5aa96adc8d
...
edb1a03803
Author | SHA1 | Date | |
---|---|---|---|
edb1a03803 | |||
489934583e | |||
c4a86419fb | |||
5eb83fd746 | |||
ce9604b3d7 | |||
634b4255d7 | |||
5a62f78753 | |||
9a623133d3 | |||
5607d46726 | |||
49bde3947c |
9 changed files with 84 additions and 57 deletions
|
@ -9,7 +9,7 @@ import { zData } from "common/schema.js";
|
||||||
setGlobalDispatcher(
|
setGlobalDispatcher(
|
||||||
new Agent({
|
new Agent({
|
||||||
pipelining: 0,
|
pipelining: 0,
|
||||||
})
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
/** key es host
|
/** key es host
|
||||||
|
@ -43,7 +43,7 @@ const targets = urls.map((url) => {
|
||||||
});
|
});
|
||||||
for (const target of targets)
|
for (const target of targets)
|
||||||
downloadFromData(target).catch((error) =>
|
downloadFromData(target).catch((error) =>
|
||||||
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
|
console.error(`${target.type}+${target.url} FALLÓ CON`, error),
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,6 +68,8 @@ async function downloadFromData(target) {
|
||||||
await open(join(outputPath, "errors.jsonl"), "w")
|
await open(join(outputPath, "errors.jsonl"), "w")
|
||||||
).createWriteStream();
|
).createWriteStream();
|
||||||
try {
|
try {
|
||||||
|
let nFinished = 0;
|
||||||
|
let nErrors = 0;
|
||||||
/** @type {DownloadJob[]} */
|
/** @type {DownloadJob[]} */
|
||||||
const jobs = parsed.dataset.flatMap((dataset) =>
|
const jobs = parsed.dataset.flatMap((dataset) =>
|
||||||
dataset.distribution
|
dataset.distribution
|
||||||
|
@ -82,11 +84,12 @@ async function downloadFromData(target) {
|
||||||
return true;
|
return true;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
errorFile.write(
|
errorFile.write(
|
||||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n",
|
||||||
);
|
);
|
||||||
|
nErrors++;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
.map((dist) => ({
|
.map((dist) => ({
|
||||||
dataset,
|
dataset,
|
||||||
|
@ -94,11 +97,9 @@ async function downloadFromData(target) {
|
||||||
url: patchUrl(new URL(dist.downloadURL)),
|
url: patchUrl(new URL(dist.downloadURL)),
|
||||||
outputPath,
|
outputPath,
|
||||||
attempts: 0,
|
attempts: 0,
|
||||||
}))
|
})),
|
||||||
);
|
);
|
||||||
const totalJobs = jobs.length;
|
const totalJobs = jobs.length;
|
||||||
let nFinished = 0;
|
|
||||||
let nErrors = 0;
|
|
||||||
|
|
||||||
// por las dudas verificar que no hayan archivos duplicados
|
// por las dudas verificar que no hayan archivos duplicados
|
||||||
chequearIdsDuplicados(jobs, outputPath);
|
chequearIdsDuplicados(jobs, outputPath);
|
||||||
|
@ -126,7 +127,7 @@ async function downloadFromData(target) {
|
||||||
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
|
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
process.stderr.write(
|
process.stderr.write(
|
||||||
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`
|
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`,
|
||||||
);
|
);
|
||||||
}, 30000);
|
}, 30000);
|
||||||
await Promise.all(promises);
|
await Promise.all(promises);
|
||||||
|
@ -161,8 +162,8 @@ async function downloadDistWithRetries(job, attempts = 0) {
|
||||||
// intentar hasta 15 veces con 15 segundos de por medio
|
// intentar hasta 15 veces con 15 segundos de por medio
|
||||||
if (
|
if (
|
||||||
error instanceof StatusCodeError &&
|
error instanceof StatusCodeError &&
|
||||||
error.code === 403 &&
|
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
|
||||||
url.host === "minsegar-my.sharepoint.com" &&
|
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar")) &&
|
||||||
attempts < 15
|
attempts < 15
|
||||||
) {
|
) {
|
||||||
await wait(15000);
|
await wait(15000);
|
||||||
|
@ -204,12 +205,12 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
|
||||||
const fileDirPath = join(
|
const fileDirPath = join(
|
||||||
outputPath,
|
outputPath,
|
||||||
sanitizeSuffix(dataset.identifier),
|
sanitizeSuffix(dataset.identifier),
|
||||||
sanitizeSuffix(dist.identifier)
|
sanitizeSuffix(dist.identifier),
|
||||||
);
|
);
|
||||||
await mkdir(fileDirPath, { recursive: true });
|
await mkdir(fileDirPath, { recursive: true });
|
||||||
const filePath = join(
|
const filePath = join(
|
||||||
fileDirPath,
|
fileDirPath,
|
||||||
sanitizeSuffix(dist.fileName || dist.identifier)
|
sanitizeSuffix(dist.fileName || dist.identifier),
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!res.body) throw new Error("no body");
|
if (!res.body) throw new Error("no body");
|
||||||
|
@ -239,11 +240,11 @@ function sanitizeSuffix(path) {
|
||||||
*/
|
*/
|
||||||
function chequearIdsDuplicados(jobs, id) {
|
function chequearIdsDuplicados(jobs, id) {
|
||||||
const duplicated = hasDuplicates(
|
const duplicated = hasDuplicates(
|
||||||
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`)
|
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`),
|
||||||
);
|
);
|
||||||
if (duplicated) {
|
if (duplicated) {
|
||||||
console.error(
|
console.error(
|
||||||
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`
|
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,13 @@
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<a
|
<a
|
||||||
class="flex items-center leading-none text-gray-600 dark:text-gray-300 gap-1 pt-2"
|
class="flex items-center gap-1 leading-none text-gray-600 dark:text-gray-300"
|
||||||
{href}
|
{href}
|
||||||
target="_blank"
|
target="_blank"
|
||||||
rel="noopener"
|
rel="noopener"
|
||||||
>
|
>
|
||||||
<ExternalLink fill="currentColor" class="h-4" />
|
<ExternalLink fill="currentColor" class="h-4" />
|
||||||
<span class="underline">Fuente</span>
|
<span class="underline">
|
||||||
|
<slot>Fuente</slot>
|
||||||
|
</span>
|
||||||
</a>
|
</a>
|
||||||
|
|
|
@ -81,7 +81,7 @@
|
||||||
<small>{dist.fileName}</small>
|
<small>{dist.fileName}</small>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
<div class="flex flex-col items-center">
|
<div class="flex flex-col items-center gap-2">
|
||||||
{#if !error}
|
{#if !error}
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
|
|
|
@ -41,8 +41,9 @@
|
||||||
})}
|
})}
|
||||||
<li>
|
<li>
|
||||||
<div class="flex justify-between gap-3 px-6 py-5">
|
<div class="flex justify-between gap-3 px-6 py-5">
|
||||||
<div class="flex flex-col">
|
<div class="flex flex-col gap-2">
|
||||||
<h3 class="text-lg">{site.title}</h3>
|
<h3 class="text-lg leading-none">{site.title}</h3>
|
||||||
|
<SourceLink href={site.url}>{site.url}</SourceLink>
|
||||||
<p class="text-sm">{site.description}</p>
|
<p class="text-sm">{site.description}</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex shrink-0 flex-col items-center justify-center">
|
<div class="flex shrink-0 flex-col items-center justify-center">
|
||||||
|
@ -51,14 +52,16 @@
|
||||||
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
|
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
|
||||||
>Ver portal</a
|
>Ver portal</a
|
||||||
>
|
>
|
||||||
<SourceLink href={site.url} />
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
{/each}
|
{/each}
|
||||||
</ul>
|
</ul>
|
||||||
{:catch error}
|
{:catch error}
|
||||||
Hubo un error intenando cargar este archivo. <pre>{error}</pre>
|
<div class="p-6">
|
||||||
|
<p>Hubo un error intenando cargar este archivo.</p>
|
||||||
|
<p class="text-red-700 dark:text-red-500">{error}</p>
|
||||||
|
</div>
|
||||||
{/await}
|
{/await}
|
||||||
</Container>
|
</Container>
|
||||||
</main>
|
</main>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import Container from "../components/Container.svelte";
|
import Container from "../components/Container.svelte";
|
||||||
import Portal from "./Home/Portal.svelte";
|
import Dump from "./Home/Dump.svelte";
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<main class="prose mx-auto dark:prose-invert">
|
<main class="prose mx-auto dark:prose-invert">
|
||||||
|
@ -27,7 +27,22 @@
|
||||||
<ul
|
<ul
|
||||||
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
|
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
|
||||||
>
|
>
|
||||||
<Portal />
|
<!-- nPortales: find . -maxdepth 1 -mindepth 1 -type d | wc -l -->
|
||||||
|
<!-- nDatasets: jq '.dataset | length' */data.json | awk '{s+=$1} END {print s}' -->
|
||||||
|
<!-- size: du -sh -->
|
||||||
|
<Dump
|
||||||
|
dumpUrl="https://archivos.nulo.ar/portales-de-datos/dump-2023-12-09/"
|
||||||
|
nPortales={50}
|
||||||
|
nDatasets={4098}
|
||||||
|
size="147 GB"
|
||||||
|
glow>9 de diciembre de 2023</Dump
|
||||||
|
>
|
||||||
|
<Dump
|
||||||
|
dumpUrl="https://archivos.nulo.ar/dump-2023-12-08/"
|
||||||
|
nPortales={43}
|
||||||
|
nDatasets={3277}
|
||||||
|
size="100 GB">8 de diciembre de 2023</Dump
|
||||||
|
>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
34
frontend/src/lib/routes/Home/Dump.svelte
Normal file
34
frontend/src/lib/routes/Home/Dump.svelte
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
<script lang="ts">
|
||||||
|
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
|
||||||
|
import { inject } from "regexparam";
|
||||||
|
import { routes } from "../../router";
|
||||||
|
import DumpBadge from "./DumpBadge.svelte";
|
||||||
|
|
||||||
|
export let dumpUrl: string;
|
||||||
|
export let nPortales: number;
|
||||||
|
export let nDatasets: number;
|
||||||
|
export let size: string;
|
||||||
|
export let glow: boolean = false;
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<li>
|
||||||
|
<a
|
||||||
|
class="flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
|
||||||
|
class:shadow-glow={glow}
|
||||||
|
href={inject(routes.Dump, {
|
||||||
|
dumpUrl: encodeURIComponent(dumpUrl),
|
||||||
|
})}
|
||||||
|
>
|
||||||
|
<div>
|
||||||
|
<h3 class="text-lg"><slot /></h3>
|
||||||
|
<DumpBadge>{nPortales} portales</DumpBadge>
|
||||||
|
<DumpBadge>{nDatasets} datasets</DumpBadge>
|
||||||
|
<DumpBadge>{size}</DumpBadge>
|
||||||
|
</div>
|
||||||
|
<ArrowForward
|
||||||
|
fill="currentColor"
|
||||||
|
aria-hidden="true"
|
||||||
|
class="w-6 shrink-0 text-gray-600 dark:text-gray-400"
|
||||||
|
/>
|
||||||
|
</a>
|
||||||
|
</li>
|
5
frontend/src/lib/routes/Home/DumpBadge.svelte
Normal file
5
frontend/src/lib/routes/Home/DumpBadge.svelte
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
<span
|
||||||
|
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||||
|
>
|
||||||
|
<span><slot /></span>
|
||||||
|
</span>
|
|
@ -1,33 +0,0 @@
|
||||||
<script lang="ts">
|
|
||||||
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
|
|
||||||
import { inject } from "regexparam";
|
|
||||||
import { routes } from "../../router";
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<li>
|
|
||||||
<a
|
|
||||||
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
|
|
||||||
href={inject(routes.Dump, {
|
|
||||||
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
|
|
||||||
})}
|
|
||||||
>
|
|
||||||
<div>
|
|
||||||
<h3 class="text-lg">8 de diciembre de 2023</h3>
|
|
||||||
<!-- <span
|
|
||||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
|
||||||
>
|
|
||||||
<span>portales</span>
|
|
||||||
</span>
|
|
||||||
<span
|
|
||||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
|
||||||
>
|
|
||||||
<span>datasets</span>
|
|
||||||
</span> -->
|
|
||||||
</div>
|
|
||||||
<ArrowForward
|
|
||||||
fill="currentColor"
|
|
||||||
aria-hidden="true"
|
|
||||||
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
|
|
||||||
/>
|
|
||||||
</a>
|
|
||||||
</li>
|
|
|
@ -51,7 +51,7 @@
|
||||||
<p class="p-6">Cargando..</p>
|
<p class="p-6">Cargando..</p>
|
||||||
{:then { data, errors }}
|
{:then { data, errors }}
|
||||||
<header
|
<header
|
||||||
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
class="flex flex-col gap-1 border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
||||||
>
|
>
|
||||||
<small>
|
<small>
|
||||||
Viendo portal archivado de
|
Viendo portal archivado de
|
||||||
|
|
Loading…
Reference in a new issue