guardar id de WARC y parser

This commit is contained in:
Cat /dev/Nulo 2023-12-23 20:35:43 -03:00
parent c0c02313f1
commit b66cb6782c
5 changed files with 96 additions and 3 deletions

View file

@ -0,0 +1,2 @@
ALTER TABLE precios ADD `warc_record_id` text;--> statement-breakpoint
ALTER TABLE precios ADD `parser_version` integer;

View file

@ -0,0 +1,79 @@
{
"version": "5",
"dialect": "sqlite",
"id": "a565621c-046e-4f4d-b505-104e2c4f2b6c",
"prevId": "88aa0254-106e-424e-ab66-417ff44bbf0b",
"tables": {
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
}

View file

@ -8,6 +8,13 @@
"when": 1703373860006, "when": 1703373860006,
"tag": "0000_blushing_sabretooth", "tag": "0000_blushing_sabretooth",
"breakpoints": true "breakpoints": true
},
{
"idx": 1,
"version": "5",
"when": 1703374278842,
"tag": "0001_spotty_red_hulk",
"breakpoints": true
} }
] ]
} }

View file

@ -7,8 +7,8 @@ export const precios = sqliteTable("precios", {
precioCentavos: integer("precio_centavos"), precioCentavos: integer("precio_centavos"),
inStock: integer("in_stock", { mode: "boolean" }), inStock: integer("in_stock", { mode: "boolean" }),
url: text("url").notNull(), url: text("url").notNull(),
// warcRecordId: text("warc_record_id"), warcRecordId: text("warc_record_id"),
// parserVersion: integer("parser_version"), parserVersion: integer("parser_version"),
}); });
export type Precio = typeof precios.$inferSelect; export type Precio = typeof precios.$inferSelect;

View file

@ -29,7 +29,10 @@ await pMap(process.argv.slice(2), (path) => parseWarc(path), {
}); });
export type Precio = typeof schema.precios.$inferInsert; export type Precio = typeof schema.precios.$inferInsert;
export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">; export type Precioish = Omit<
Precio,
"fetchedAt" | "url" | "id" | "warcRecordId" | "parserVersion"
>;
async function storePrecioPoint(point: Precio) { async function storePrecioPoint(point: Precio) {
await db.insert(schema.precios).values(point); await db.insert(schema.precios).values(point);
@ -63,6 +66,8 @@ async function parseWarc(path: string) {
...ish, ...ish,
fetchedAt: new Date(record.warcDate!), fetchedAt: new Date(record.warcDate!),
url: record.warcTargetURI, url: record.warcTargetURI,
warcRecordId: record.warcHeader("WARC-Record-ID"),
parserVersion: PARSER_VERSION,
}; };
if (ish) await storePrecioPoint(p); if (ish) await storePrecioPoint(p);