preciazo/sepa/index-gen/index.ts

167 lines
5.1 KiB
TypeScript
Raw Normal View History

2024-09-12 21:17:10 +00:00
import { zResource, type Resource } from "../ckan/schemas";
2024-09-07 23:41:44 +00:00
import { z } from "zod";
import { listDirectory } from "./b2";
import { isSameDay } from "date-fns";
import { indexResources } from "./index-resources";
2024-09-12 21:47:43 +00:00
export const IndexEntry = z.object({
2024-09-12 21:08:57 +00:00
id: z.string(),
warnings: z.string(),
name: z.string().optional(),
link: z.string().optional(),
2024-09-12 21:47:43 +00:00
firstSeenAt: z.coerce.date(),
2024-09-12 21:08:57 +00:00
});
2024-09-12 21:47:43 +00:00
export type IndexEntry = z.infer<typeof IndexEntry>;
export const IndexJson = z.record(z.string(), z.array(IndexEntry));
export type IndexJson = z.infer<typeof IndexJson>;
2024-09-12 21:08:57 +00:00
export async function generateIndexes() {
2024-09-07 23:41:44 +00:00
const resourcesIndex = await indexResources();
const datasets = z
.record(z.string(), z.array(zResource))
.parse(resourcesIndex);
const datasetsArray = Object.entries(datasets)
.map(([date, resources]) => ({
date: new Date(date),
resources,
}))
.sort((a, b) => +b.date - +a.date);
2024-09-12 01:14:55 +00:00
let latestResources = new Map<string, Resource & { firstSeenAt: Date }>();
2024-09-07 23:41:44 +00:00
for (const { date, resources } of datasetsArray) {
for (const resource of resources) {
const id = `${resource.id}-revID-${resource.revision_id}`;
2024-09-12 01:14:55 +00:00
const existing = latestResources.get(id);
if (existing && existing.firstSeenAt < date) continue;
latestResources.set(id, { ...resource, firstSeenAt: date });
2024-09-07 23:41:44 +00:00
}
}
function getDate(resource: Resource) {
{
const matches = resource.name.match(/precios_(\d{4})(\d{2})(\d{2})/);
if (matches) {
return new Date(
parseInt(matches[1]),
parseInt(matches[2]) - 1,
parseInt(matches[3])
);
}
}
{
const matches = resource.description.match(
/(?<day>\d{2})\/(?<month>\d{2})\/(?<year>\d{4})|(?<year2>\d{4})-(?<month2>\d{2})-(?<day2>\d{2})/
);
if (matches) {
const { day, month, year, day2, month2, year2 } = matches.groups!;
return new Date(
parseInt(year || year2),
parseInt(month || month2) - 1,
parseInt(day || day2)
);
}
}
console.debug(resource);
throw new Error(`No date found for ${resource.name}`);
}
const fileList = await listDirectory("");
const zipResources = [...latestResources.values()].filter(
(r) => r.format === "ZIP"
);
const dates = [
...new Set(
zipResources.map((r) => getDate(r).toISOString().split("T")[0]).sort()
),
];
// check if dates are missing in between min and max date
const minDate = new Date(
Math.min(...[...dates].map((d) => new Date(d).getTime()))
);
const maxDate = new Date(
Math.max(...[...dates].map((d) => new Date(d).getTime()))
);
for (let d = minDate; d <= maxDate; d.setDate(d.getDate() + 1)) {
const dateStr = d.toISOString().split("T")[0];
if (!dates.includes(dateStr)) dates.push(dateStr);
}
dates.sort();
function getWeekDay(searchIn: string) {
if (/domingo/iu.test(searchIn)) return 0;
if (/lunes/iu.test(searchIn)) return 1;
if (/martes/iu.test(searchIn)) return 2;
if (/mi[eé]rcoles/iu.test(searchIn)) return 3;
if (/jueves/iu.test(searchIn)) return 4;
if (/viernes/iu.test(searchIn)) return 5;
if (/s[aá]bado/iu.test(searchIn)) return 6;
return null;
}
function getWeekDayInResource(resource: Resource) {
return getWeekDay(resource.description) ?? getWeekDay(resource.name);
}
let markdown = `# index de archivo de datasets de precios SEPA
esto esta automáticamente generado por sepa-index-gen dentro de preciazo.`;
const formatter = Intl.DateTimeFormat("es-AR", {
year: "numeric",
month: "2-digit",
day: "2-digit",
weekday: "long",
});
const dateTimeFormatter = Intl.DateTimeFormat("es-AR", {
year: "numeric",
month: "2-digit",
day: "2-digit",
hour: "2-digit",
minute: "2-digit",
});
2024-09-12 21:08:57 +00:00
2024-09-12 21:47:43 +00:00
let jsonIndex: IndexJson = {};
2024-09-12 21:08:57 +00:00
2024-09-07 23:41:44 +00:00
for (const dateStr of dates) {
const date = new Date(dateStr);
markdown += `\n* ${formatter.format(date)}:`;
const resourcesInDate = zipResources.filter((r) =>
isSameDay(getDate(r), date)
);
if (!resourcesInDate.length) {
markdown += " ❌ no tengo recursos para esta fecha";
}
2024-09-12 21:08:57 +00:00
jsonIndex[dateStr] = [];
2024-09-07 23:41:44 +00:00
for (const resource of resourcesInDate) {
const id = `${resource.id}-revID-${resource.revision_id}`;
const fileExists = fileList.find((file) => file.startsWith(id));
const link =
2024-09-12 02:12:03 +00:00
fileExists &&
2024-09-07 23:41:44 +00:00
`https://f004.backblazeb2.com/file/precios-justos-datasets/${fileExists}`;
let warnings = "";
if (
getWeekDayInResource(resource) &&
date.getDay() !== getWeekDayInResource(resource)
) {
warnings +=
"⁉️⚠️ dia de semana incorrecto, puede haberse subido incorrectamente ";
}
2024-09-12 01:14:55 +00:00
markdown += `\n * ${id} ${warnings} ${fileExists ? `[✅ descargar](${link})` : "❌"} (primera vez visto: ${dateTimeFormatter.format(resource.firstSeenAt)})`;
2024-09-12 21:08:57 +00:00
jsonIndex[dateStr].push({
id,
warnings: warnings.trim(),
name: fileExists,
link,
2024-09-12 21:47:43 +00:00
firstSeenAt: resource.firstSeenAt,
2024-09-12 21:08:57 +00:00
});
2024-09-07 23:41:44 +00:00
}
}
2024-09-12 21:08:57 +00:00
return { markdown, jsonIndex };
2024-09-07 23:41:44 +00:00
}