mirror of
https://github.com/catdevnull/transicion-desordenada-diablo
synced 2024-11-26 11:26:18 +00:00
Compare commits
10 commits
5aa96adc8d
...
edb1a03803
Author | SHA1 | Date | |
---|---|---|---|
edb1a03803 | |||
489934583e | |||
c4a86419fb | |||
5eb83fd746 | |||
ce9604b3d7 | |||
634b4255d7 | |||
5a62f78753 | |||
9a623133d3 | |||
5607d46726 | |||
49bde3947c |
9 changed files with 84 additions and 57 deletions
|
@ -9,7 +9,7 @@ import { zData } from "common/schema.js";
|
|||
setGlobalDispatcher(
|
||||
new Agent({
|
||||
pipelining: 0,
|
||||
})
|
||||
}),
|
||||
);
|
||||
|
||||
/** key es host
|
||||
|
@ -43,7 +43,7 @@ const targets = urls.map((url) => {
|
|||
});
|
||||
for (const target of targets)
|
||||
downloadFromData(target).catch((error) =>
|
||||
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
|
||||
console.error(`${target.type}+${target.url} FALLÓ CON`, error),
|
||||
);
|
||||
|
||||
/**
|
||||
|
@ -68,6 +68,8 @@ async function downloadFromData(target) {
|
|||
await open(join(outputPath, "errors.jsonl"), "w")
|
||||
).createWriteStream();
|
||||
try {
|
||||
let nFinished = 0;
|
||||
let nErrors = 0;
|
||||
/** @type {DownloadJob[]} */
|
||||
const jobs = parsed.dataset.flatMap((dataset) =>
|
||||
dataset.distribution
|
||||
|
@ -82,11 +84,12 @@ async function downloadFromData(target) {
|
|||
return true;
|
||||
} catch (error) {
|
||||
errorFile.write(
|
||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
|
||||
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n",
|
||||
);
|
||||
nErrors++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
.map((dist) => ({
|
||||
dataset,
|
||||
|
@ -94,11 +97,9 @@ async function downloadFromData(target) {
|
|||
url: patchUrl(new URL(dist.downloadURL)),
|
||||
outputPath,
|
||||
attempts: 0,
|
||||
}))
|
||||
})),
|
||||
);
|
||||
const totalJobs = jobs.length;
|
||||
let nFinished = 0;
|
||||
let nErrors = 0;
|
||||
|
||||
// por las dudas verificar que no hayan archivos duplicados
|
||||
chequearIdsDuplicados(jobs, outputPath);
|
||||
|
@ -126,7 +127,7 @@ async function downloadFromData(target) {
|
|||
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
|
||||
const interval = setInterval(() => {
|
||||
process.stderr.write(
|
||||
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`
|
||||
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`,
|
||||
);
|
||||
}, 30000);
|
||||
await Promise.all(promises);
|
||||
|
@ -161,8 +162,8 @@ async function downloadDistWithRetries(job, attempts = 0) {
|
|||
// intentar hasta 15 veces con 15 segundos de por medio
|
||||
if (
|
||||
error instanceof StatusCodeError &&
|
||||
error.code === 403 &&
|
||||
url.host === "minsegar-my.sharepoint.com" &&
|
||||
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
|
||||
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar")) &&
|
||||
attempts < 15
|
||||
) {
|
||||
await wait(15000);
|
||||
|
@ -204,12 +205,12 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
|
|||
const fileDirPath = join(
|
||||
outputPath,
|
||||
sanitizeSuffix(dataset.identifier),
|
||||
sanitizeSuffix(dist.identifier)
|
||||
sanitizeSuffix(dist.identifier),
|
||||
);
|
||||
await mkdir(fileDirPath, { recursive: true });
|
||||
const filePath = join(
|
||||
fileDirPath,
|
||||
sanitizeSuffix(dist.fileName || dist.identifier)
|
||||
sanitizeSuffix(dist.fileName || dist.identifier),
|
||||
);
|
||||
|
||||
if (!res.body) throw new Error("no body");
|
||||
|
@ -239,11 +240,11 @@ function sanitizeSuffix(path) {
|
|||
*/
|
||||
function chequearIdsDuplicados(jobs, id) {
|
||||
const duplicated = hasDuplicates(
|
||||
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`)
|
||||
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`),
|
||||
);
|
||||
if (duplicated) {
|
||||
console.error(
|
||||
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`
|
||||
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,11 +5,13 @@
|
|||
</script>
|
||||
|
||||
<a
|
||||
class="flex items-center leading-none text-gray-600 dark:text-gray-300 gap-1 pt-2"
|
||||
class="flex items-center gap-1 leading-none text-gray-600 dark:text-gray-300"
|
||||
{href}
|
||||
target="_blank"
|
||||
rel="noopener"
|
||||
>
|
||||
<ExternalLink fill="currentColor" class="h-4" />
|
||||
<span class="underline">Fuente</span>
|
||||
<span class="underline">
|
||||
<slot>Fuente</slot>
|
||||
</span>
|
||||
</a>
|
||||
|
|
|
@ -81,7 +81,7 @@
|
|||
<small>{dist.fileName}</small>
|
||||
{/if}
|
||||
</div>
|
||||
<div class="flex flex-col items-center">
|
||||
<div class="flex flex-col items-center gap-2">
|
||||
{#if !error}
|
||||
<button
|
||||
type="button"
|
||||
|
|
|
@ -41,8 +41,9 @@
|
|||
})}
|
||||
<li>
|
||||
<div class="flex justify-between gap-3 px-6 py-5">
|
||||
<div class="flex flex-col">
|
||||
<h3 class="text-lg">{site.title}</h3>
|
||||
<div class="flex flex-col gap-2">
|
||||
<h3 class="text-lg leading-none">{site.title}</h3>
|
||||
<SourceLink href={site.url}>{site.url}</SourceLink>
|
||||
<p class="text-sm">{site.description}</p>
|
||||
</div>
|
||||
<div class="flex shrink-0 flex-col items-center justify-center">
|
||||
|
@ -51,14 +52,16 @@
|
|||
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
|
||||
>Ver portal</a
|
||||
>
|
||||
<SourceLink href={site.url} />
|
||||
</div>
|
||||
</div>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{:catch error}
|
||||
Hubo un error intenando cargar este archivo. <pre>{error}</pre>
|
||||
<div class="p-6">
|
||||
<p>Hubo un error intenando cargar este archivo.</p>
|
||||
<p class="text-red-700 dark:text-red-500">{error}</p>
|
||||
</div>
|
||||
{/await}
|
||||
</Container>
|
||||
</main>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<script lang="ts">
|
||||
import Container from "../components/Container.svelte";
|
||||
import Portal from "./Home/Portal.svelte";
|
||||
import Dump from "./Home/Dump.svelte";
|
||||
</script>
|
||||
|
||||
<main class="prose mx-auto dark:prose-invert">
|
||||
|
@ -27,7 +27,22 @@
|
|||
<ul
|
||||
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
|
||||
>
|
||||
<Portal />
|
||||
<!-- nPortales: find . -maxdepth 1 -mindepth 1 -type d | wc -l -->
|
||||
<!-- nDatasets: jq '.dataset | length' */data.json | awk '{s+=$1} END {print s}' -->
|
||||
<!-- size: du -sh -->
|
||||
<Dump
|
||||
dumpUrl="https://archivos.nulo.ar/portales-de-datos/dump-2023-12-09/"
|
||||
nPortales={50}
|
||||
nDatasets={4098}
|
||||
size="147 GB"
|
||||
glow>9 de diciembre de 2023</Dump
|
||||
>
|
||||
<Dump
|
||||
dumpUrl="https://archivos.nulo.ar/dump-2023-12-08/"
|
||||
nPortales={43}
|
||||
nDatasets={3277}
|
||||
size="100 GB">8 de diciembre de 2023</Dump
|
||||
>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
|
|
34
frontend/src/lib/routes/Home/Dump.svelte
Normal file
34
frontend/src/lib/routes/Home/Dump.svelte
Normal file
|
@ -0,0 +1,34 @@
|
|||
<script lang="ts">
|
||||
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
|
||||
import { inject } from "regexparam";
|
||||
import { routes } from "../../router";
|
||||
import DumpBadge from "./DumpBadge.svelte";
|
||||
|
||||
export let dumpUrl: string;
|
||||
export let nPortales: number;
|
||||
export let nDatasets: number;
|
||||
export let size: string;
|
||||
export let glow: boolean = false;
|
||||
</script>
|
||||
|
||||
<li>
|
||||
<a
|
||||
class="flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
|
||||
class:shadow-glow={glow}
|
||||
href={inject(routes.Dump, {
|
||||
dumpUrl: encodeURIComponent(dumpUrl),
|
||||
})}
|
||||
>
|
||||
<div>
|
||||
<h3 class="text-lg"><slot /></h3>
|
||||
<DumpBadge>{nPortales} portales</DumpBadge>
|
||||
<DumpBadge>{nDatasets} datasets</DumpBadge>
|
||||
<DumpBadge>{size}</DumpBadge>
|
||||
</div>
|
||||
<ArrowForward
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
class="w-6 shrink-0 text-gray-600 dark:text-gray-400"
|
||||
/>
|
||||
</a>
|
||||
</li>
|
5
frontend/src/lib/routes/Home/DumpBadge.svelte
Normal file
5
frontend/src/lib/routes/Home/DumpBadge.svelte
Normal file
|
@ -0,0 +1,5 @@
|
|||
<span
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span><slot /></span>
|
||||
</span>
|
|
@ -1,33 +0,0 @@
|
|||
<script lang="ts">
|
||||
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
|
||||
import { inject } from "regexparam";
|
||||
import { routes } from "../../router";
|
||||
</script>
|
||||
|
||||
<li>
|
||||
<a
|
||||
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
|
||||
href={inject(routes.Dump, {
|
||||
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
|
||||
})}
|
||||
>
|
||||
<div>
|
||||
<h3 class="text-lg">8 de diciembre de 2023</h3>
|
||||
<!-- <span
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span>portales</span>
|
||||
</span>
|
||||
<span
|
||||
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
<span>datasets</span>
|
||||
</span> -->
|
||||
</div>
|
||||
<ArrowForward
|
||||
fill="currentColor"
|
||||
aria-hidden="true"
|
||||
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
|
||||
/>
|
||||
</a>
|
||||
</li>
|
|
@ -51,7 +51,7 @@
|
|||
<p class="p-6">Cargando..</p>
|
||||
{:then { data, errors }}
|
||||
<header
|
||||
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
||||
class="flex flex-col gap-1 border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
|
||||
>
|
||||
<small>
|
||||
Viendo portal archivado de
|
||||
|
|
Loading…
Reference in a new issue