From b66cb6782cd8e3c0998f42172f1c05ee71952346 Mon Sep 17 00:00:00 2001 From: Nulo Date: Sat, 23 Dec 2023 20:35:43 -0300 Subject: [PATCH] guardar id de WARC y parser --- db-datos/drizzle/0001_spotty_red_hulk.sql | 2 + db-datos/drizzle/meta/0001_snapshot.json | 79 +++++++++++++++++++++++ db-datos/drizzle/meta/_journal.json | 7 ++ db-datos/schema.ts | 4 +- scraper/scrap.ts | 7 +- 5 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 db-datos/drizzle/0001_spotty_red_hulk.sql create mode 100644 db-datos/drizzle/meta/0001_snapshot.json diff --git a/db-datos/drizzle/0001_spotty_red_hulk.sql b/db-datos/drizzle/0001_spotty_red_hulk.sql new file mode 100644 index 0000000..abfb279 --- /dev/null +++ b/db-datos/drizzle/0001_spotty_red_hulk.sql @@ -0,0 +1,2 @@ +ALTER TABLE precios ADD `warc_record_id` text;--> statement-breakpoint +ALTER TABLE precios ADD `parser_version` integer; \ No newline at end of file diff --git a/db-datos/drizzle/meta/0001_snapshot.json b/db-datos/drizzle/meta/0001_snapshot.json new file mode 100644 index 0000000..b39b90e --- /dev/null +++ b/db-datos/drizzle/meta/0001_snapshot.json @@ -0,0 +1,79 @@ +{ + "version": "5", + "dialect": "sqlite", + "id": "a565621c-046e-4f4d-b505-104e2c4f2b6c", + "prevId": "88aa0254-106e-424e-ab66-417ff44bbf0b", + "tables": { + "precios": { + "name": "precios", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "ean": { + "name": "ean", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "fetched_at": { + "name": "fetched_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "precio_centavos": { + "name": "precio_centavos", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "in_stock": { + "name": "in_stock", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "warc_record_id": { + "name": "warc_record_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "parser_version": { + "name": "parser_version", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + } + }, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + } +} \ No newline at end of file diff --git a/db-datos/drizzle/meta/_journal.json b/db-datos/drizzle/meta/_journal.json index b7ea9e7..4425484 100644 --- a/db-datos/drizzle/meta/_journal.json +++ b/db-datos/drizzle/meta/_journal.json @@ -8,6 +8,13 @@ "when": 1703373860006, "tag": "0000_blushing_sabretooth", "breakpoints": true + }, + { + "idx": 1, + "version": "5", + "when": 1703374278842, + "tag": "0001_spotty_red_hulk", + "breakpoints": true } ] } \ No newline at end of file diff --git a/db-datos/schema.ts b/db-datos/schema.ts index 437b44f..4f15736 100644 --- a/db-datos/schema.ts +++ b/db-datos/schema.ts @@ -7,8 +7,8 @@ export const precios = sqliteTable("precios", { precioCentavos: integer("precio_centavos"), inStock: integer("in_stock", { mode: "boolean" }), url: text("url").notNull(), - // warcRecordId: text("warc_record_id"), - // parserVersion: integer("parser_version"), + warcRecordId: text("warc_record_id"), + parserVersion: integer("parser_version"), }); export type Precio = typeof precios.$inferSelect; diff --git a/scraper/scrap.ts b/scraper/scrap.ts index c9e0c3d..6a9a511 100644 --- a/scraper/scrap.ts +++ b/scraper/scrap.ts @@ -29,7 +29,10 @@ await pMap(process.argv.slice(2), (path) => parseWarc(path), { }); export type Precio = typeof schema.precios.$inferInsert; -export type Precioish = Omit; +export type Precioish = Omit< + Precio, + "fetchedAt" | "url" | "id" | "warcRecordId" | "parserVersion" +>; async function storePrecioPoint(point: Precio) { await db.insert(schema.precios).values(point); @@ -63,6 +66,8 @@ async function parseWarc(path: string) { ...ish, fetchedAt: new Date(record.warcDate!), url: record.warcTargetURI, + warcRecordId: record.warcHeader("WARC-Record-ID"), + parserVersion: PARSER_VERSION, }; if (ish) await storePrecioPoint(p);