script que descarga todos los dumps

This commit is contained in:
Cat /dev/Nulo 2024-09-12 18:47:43 -03:00
parent 97ff29186c
commit 02f63ab709
3 changed files with 43 additions and 5 deletions

Binary file not shown.

View file

@ -4,14 +4,17 @@ import { listDirectory } from "./b2";
import { isSameDay } from "date-fns"; import { isSameDay } from "date-fns";
import { indexResources } from "./index-resources"; import { indexResources } from "./index-resources";
const IndexEntry = z.object({ export const IndexEntry = z.object({
id: z.string(), id: z.string(),
warnings: z.string(), warnings: z.string(),
name: z.string().optional(), name: z.string().optional(),
link: z.string().optional(), link: z.string().optional(),
firstSeenAt: z.string(), firstSeenAt: z.coerce.date(),
}); });
type IndexEntry = z.infer<typeof IndexEntry>; export type IndexEntry = z.infer<typeof IndexEntry>;
export const IndexJson = z.record(z.string(), z.array(IndexEntry));
export type IndexJson = z.infer<typeof IndexJson>;
export async function generateIndexes() { export async function generateIndexes() {
const resourcesIndex = await indexResources(); const resourcesIndex = await indexResources();
@ -121,7 +124,7 @@ esto esta automáticamente generado por sepa-index-gen dentro de preciazo.`;
minute: "2-digit", minute: "2-digit",
}); });
let jsonIndex: Record<string, IndexEntry[]> = {}; let jsonIndex: IndexJson = {};
for (const dateStr of dates) { for (const dateStr of dates) {
const date = new Date(dateStr); const date = new Date(dateStr);
@ -154,7 +157,7 @@ esto esta automáticamente generado por sepa-index-gen dentro de preciazo.`;
warnings: warnings.trim(), warnings: warnings.trim(),
name: fileExists, name: fileExists,
link, link,
firstSeenAt: resource.firstSeenAt.toISOString(), firstSeenAt: resource.firstSeenAt,
}); });
} }
} }

View file

@ -0,0 +1,35 @@
import PQueue from "p-queue";
import { IndexEntry, IndexJson } from "../index-gen";
import { $ } from "bun";
import { existsSync } from "fs";
async function getIndex() {
const res = await fetch(
"https://raw.githubusercontent.com/catdevnull/sepa-precios-metadata/main/index.json"
);
return IndexJson.parse(await res.json());
}
const index = await getIndex();
const latestResources = Object.values(index)
.filter((a) => a.length > 0)
.map(
(a) =>
a
.filter(
(r): r is IndexEntry & { link: string } => !!(!r.warnings && r.link)
)
.sort((a, b) => +b.firstSeenAt - +a.firstSeenAt)[0]
);
const queue = new PQueue({ concurrency: 10 });
for (const resource of latestResources) {
queue.add(async () => {
const filename = resource.link.split("/").pop()!;
if (existsSync(filename)) return;
await $`curl ${resource.link} -o ${filename}.temp`;
await $`mv ${filename}.temp ${filename}`;
});
}