Compare commits

...

10 commits

9 changed files with 84 additions and 57 deletions

View file

@ -9,7 +9,7 @@ import { zData } from "common/schema.js";
setGlobalDispatcher(
new Agent({
pipelining: 0,
})
}),
);
/** key es host
@ -43,7 +43,7 @@ const targets = urls.map((url) => {
});
for (const target of targets)
downloadFromData(target).catch((error) =>
console.error(`${target.type}+${target.url} FALLÓ CON`, error)
console.error(`${target.type}+${target.url} FALLÓ CON`, error),
);
/**
@ -68,6 +68,8 @@ async function downloadFromData(target) {
await open(join(outputPath, "errors.jsonl"), "w")
).createWriteStream();
try {
let nFinished = 0;
let nErrors = 0;
/** @type {DownloadJob[]} */
const jobs = parsed.dataset.flatMap((dataset) =>
dataset.distribution
@ -82,11 +84,12 @@ async function downloadFromData(target) {
return true;
} catch (error) {
errorFile.write(
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n"
JSON.stringify(encodeError({ dataset, dist }, error)) + "\n",
);
nErrors++;
return false;
}
}
},
)
.map((dist) => ({
dataset,
@ -94,11 +97,9 @@ async function downloadFromData(target) {
url: patchUrl(new URL(dist.downloadURL)),
outputPath,
attempts: 0,
}))
})),
);
const totalJobs = jobs.length;
let nFinished = 0;
let nErrors = 0;
// por las dudas verificar que no hayan archivos duplicados
chequearIdsDuplicados(jobs, outputPath);
@ -126,7 +127,7 @@ async function downloadFromData(target) {
process.stderr.write(`info[${outputPath}]: 0/${totalJobs} done\n`);
const interval = setInterval(() => {
process.stderr.write(
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`
`info[${outputPath}]: ${nFinished}/${totalJobs} done\n`,
);
}, 30000);
await Promise.all(promises);
@ -161,8 +162,8 @@ async function downloadDistWithRetries(job, attempts = 0) {
// intentar hasta 15 veces con 15 segundos de por medio
if (
error instanceof StatusCodeError &&
error.code === 403 &&
url.host === "minsegar-my.sharepoint.com" &&
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar")) &&
attempts < 15
) {
await wait(15000);
@ -204,12 +205,12 @@ async function downloadDist({ dist, dataset, url, outputPath }) {
const fileDirPath = join(
outputPath,
sanitizeSuffix(dataset.identifier),
sanitizeSuffix(dist.identifier)
sanitizeSuffix(dist.identifier),
);
await mkdir(fileDirPath, { recursive: true });
const filePath = join(
fileDirPath,
sanitizeSuffix(dist.fileName || dist.identifier)
sanitizeSuffix(dist.fileName || dist.identifier),
);
if (!res.body) throw new Error("no body");
@ -239,11 +240,11 @@ function sanitizeSuffix(path) {
*/
function chequearIdsDuplicados(jobs, id) {
const duplicated = hasDuplicates(
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`)
jobs.map((j) => `${j.dataset.identifier}/${j.dist.identifier}`),
);
if (duplicated) {
console.error(
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`
`ADVERTENCIA[${id}]: ¡encontré duplicados! es posible que se pisen archivos entre si`,
);
}
}

View file

@ -5,11 +5,13 @@
</script>
<a
class="flex items-center leading-none text-gray-600 dark:text-gray-300 gap-1 pt-2"
class="flex items-center gap-1 leading-none text-gray-600 dark:text-gray-300"
{href}
target="_blank"
rel="noopener"
>
<ExternalLink fill="currentColor" class="h-4" />
<span class="underline">Fuente</span>
<span class="underline">
<slot>Fuente</slot>
</span>
</a>

View file

@ -81,7 +81,7 @@
<small>{dist.fileName}</small>
{/if}
</div>
<div class="flex flex-col items-center">
<div class="flex flex-col items-center gap-2">
{#if !error}
<button
type="button"

View file

@ -41,8 +41,9 @@
})}
<li>
<div class="flex justify-between gap-3 px-6 py-5">
<div class="flex flex-col">
<h3 class="text-lg">{site.title}</h3>
<div class="flex flex-col gap-2">
<h3 class="text-lg leading-none">{site.title}</h3>
<SourceLink href={site.url}>{site.url}</SourceLink>
<p class="text-sm">{site.description}</p>
</div>
<div class="flex shrink-0 flex-col items-center justify-center">
@ -51,14 +52,16 @@
class="focus:shadow-outline inline-flex items-center justify-center rounded-md bg-blue-600 px-4 py-2 text-sm font-medium tracking-wide text-white transition-colors duration-200 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-700 focus:ring-offset-2"
>Ver portal</a
>
<SourceLink href={site.url} />
</div>
</div>
</li>
{/each}
</ul>
{:catch error}
Hubo un error intenando cargar este archivo. <pre>{error}</pre>
<div class="p-6">
<p>Hubo un error intenando cargar este archivo.</p>
<p class="text-red-700 dark:text-red-500">{error}</p>
</div>
{/await}
</Container>
</main>

View file

@ -1,6 +1,6 @@
<script lang="ts">
import Container from "../components/Container.svelte";
import Portal from "./Home/Portal.svelte";
import Dump from "./Home/Dump.svelte";
</script>
<main class="prose mx-auto dark:prose-invert">
@ -27,7 +27,22 @@
<ul
class="divide-y divide-gray-100 border-y border-y-gray-100 dark:divide-gray-700 dark:border-y-gray-700"
>
<Portal />
<!-- nPortales: find . -maxdepth 1 -mindepth 1 -type d | wc -l -->
<!-- nDatasets: jq '.dataset | length' */data.json | awk '{s+=$1} END {print s}' -->
<!-- size: du -sh -->
<Dump
dumpUrl="https://archivos.nulo.ar/portales-de-datos/dump-2023-12-09/"
nPortales={50}
nDatasets={4098}
size="147 GB"
glow>9 de diciembre de 2023</Dump
>
<Dump
dumpUrl="https://archivos.nulo.ar/dump-2023-12-08/"
nPortales={43}
nDatasets={3277}
size="100 GB">8 de diciembre de 2023</Dump
>
</ul>
</div>

View file

@ -0,0 +1,34 @@
<script lang="ts">
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
import { inject } from "regexparam";
import { routes } from "../../router";
import DumpBadge from "./DumpBadge.svelte";
export let dumpUrl: string;
export let nPortales: number;
export let nDatasets: number;
export let size: string;
export let glow: boolean = false;
</script>
<li>
<a
class="flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
class:shadow-glow={glow}
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent(dumpUrl),
})}
>
<div>
<h3 class="text-lg"><slot /></h3>
<DumpBadge>{nPortales} portales</DumpBadge>
<DumpBadge>{nDatasets} datasets</DumpBadge>
<DumpBadge>{size}</DumpBadge>
</div>
<ArrowForward
fill="currentColor"
aria-hidden="true"
class="w-6 shrink-0 text-gray-600 dark:text-gray-400"
/>
</a>
</li>

View file

@ -0,0 +1,5 @@
<span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span><slot /></span>
</span>

View file

@ -1,33 +0,0 @@
<script lang="ts">
import ArrowForward from "eva-icons/outline/svg/arrow-forward-outline.svg?component";
import { inject } from "regexparam";
import { routes } from "../../router";
</script>
<li>
<a
class="shadow-glow flex justify-between px-6 py-5 shadow-blue-300 hover:bg-gray-50 dark:hover:bg-gray-700"
href={inject(routes.Dump, {
dumpUrl: encodeURIComponent("https://archivos.nulo.ar/dump-2023-12-08/"),
})}
>
<div>
<h3 class="text-lg">8 de diciembre de 2023</h3>
<!-- <span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>portales</span>
</span>
<span
class="relative ml-1 inline-flex items-center rounded-full border border-current px-2 py-1 text-xs font-semibold text-blue-800 dark:text-blue-400"
>
<span>datasets</span>
</span> -->
</div>
<ArrowForward
fill="currentColor"
aria-hidden="true"
class="w-6 shrink-0 text-gray-600 dark:text-gray-400 "
/>
</a>
</li>

View file

@ -51,7 +51,7 @@
<p class="p-6">Cargando..</p>
{:then { data, errors }}
<header
class="border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
class="flex flex-col gap-1 border-b border-b-gray-200 px-6 py-5 leading-none dark:border-b-gray-700"
>
<small>
Viendo portal archivado de