Merge branch 'new-rust-api'

This commit is contained in:
Cat /dev/Nulo 2024-08-19 20:57:26 -03:00
commit 8f82631b77
46 changed files with 694 additions and 290 deletions

View file

@ -2,6 +2,8 @@ data/warcs/
data/carrefour/
*/*.db*
sqlite.db
db.db
db.db-wal
downloader/
node_modules/
*/node_modules/

4
.gitignore vendored
View file

@ -3,5 +3,7 @@ node_modules/
*.db-shm
*.db-wal
target/
.env.*
*.local
.DS_Store

View file

@ -0,0 +1 @@
DB_PATH=../db.db

View file

@ -1,4 +1,4 @@
export const DB_PATH = process.env.DB_PATH ?? "../sqlite.db";
export const DB_PATH = process.env.DB_PATH ?? "../db.db";
/** @type { import("drizzle-kit").Config } */
export default {

View file

@ -0,0 +1 @@
CREATE INDEX `precios_ean_fetched_at_idx` ON `precios` (`ean`,`fetched_at`);

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "88aa0254-106e-424e-ab66-417ff44bbf0b",
"prevId": "00000000-0000-0000-0000-000000000000",
"tables": {
"precios": {
"name": "precios",
@ -58,8 +56,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "88aa0254-106e-424e-ab66-417ff44bbf0b",
"prevId": "00000000-0000-0000-0000-000000000000"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "a565621c-046e-4f4d-b505-104e2c4f2b6c",
"prevId": "88aa0254-106e-424e-ab66-417ff44bbf0b",
"tables": {
"precios": {
"name": "precios",
@ -72,8 +70,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "a565621c-046e-4f4d-b505-104e2c4f2b6c",
"prevId": "88aa0254-106e-424e-ab66-417ff44bbf0b"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "cbd90a60-7568-489f-ac45-95bd8818ffbd",
"prevId": "a565621c-046e-4f4d-b505-104e2c4f2b6c",
"tables": {
"precios": {
"name": "precios",
@ -86,8 +84,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "cbd90a60-7568-489f-ac45-95bd8818ffbd",
"prevId": "a565621c-046e-4f4d-b505-104e2c4f2b6c"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "e1217fdb-6f54-44c5-a04b-c5aebf202102",
"prevId": "cbd90a60-7568-489f-ac45-95bd8818ffbd",
"tables": {
"precios": {
"name": "precios",
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "e1217fdb-6f54-44c5-a04b-c5aebf202102",
"prevId": "cbd90a60-7568-489f-ac45-95bd8818ffbd"
}

View file

@ -1,7 +1,5 @@
{
"id": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
"prevId": "e1217fdb-6f54-44c5-a04b-c5aebf202102",
"version": "5",
"version": "6",
"dialect": "sqlite",
"tables": {
"precios": {
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
"prevId": "e1217fdb-6f54-44c5-a04b-c5aebf202102"
}

View file

@ -1,7 +1,5 @@
{
"id": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
"prevId": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958",
"version": "5",
"version": "6",
"dialect": "sqlite",
"tables": {
"precios": {
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
"prevId": "bf90a1cd-ae6a-4dba-a1aa-79f14a11d958"
}

View file

@ -1,7 +1,5 @@
{
"id": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
"prevId": "f2cf47b9-e137-41c9-b7fb-6bc016588db0",
"version": "5",
"version": "6",
"dialect": "sqlite",
"tables": {
"precios": {
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
"prevId": "f2cf47b9-e137-41c9-b7fb-6bc016588db0"
}

View file

@ -1,7 +1,5 @@
{
"id": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
"prevId": "ac099405-ecd0-4637-ae5e-fb29c9847e45",
"version": "5",
"version": "6",
"dialect": "sqlite",
"tables": {
"precios": {
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
"prevId": "ac099405-ecd0-4637-ae5e-fb29c9847e45"
}

View file

@ -1,7 +1,5 @@
{
"id": "082630a9-3744-4e33-bde5-06045ca57d36",
"prevId": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25",
"version": "5",
"version": "6",
"dialect": "sqlite",
"tables": {
"precios": {
@ -94,8 +92,9 @@
},
"enums": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
"tables": {},
"columns": {}
},
"id": "082630a9-3744-4e33-bde5-06045ca57d36",
"prevId": "9d2f23bf-dc60-4adb-b1bd-ec75e90dda25"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "2e398920-ffaf-4d55-ae13-d906cb9e0efa",
"prevId": "082630a9-3744-4e33-bde5-06045ca57d36",
"tables": {
"precios": {
"name": "precios",
@ -139,8 +137,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "2e398920-ffaf-4d55-ae13-d906cb9e0efa",
"prevId": "082630a9-3744-4e33-bde5-06045ca57d36"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "c8297337-4ed8-432e-8782-65d41be42e00",
"prevId": "2e398920-ffaf-4d55-ae13-d906cb9e0efa",
"tables": {
"db_best_selling": {
"name": "db_best_selling",
@ -176,8 +174,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "c8297337-4ed8-432e-8782-65d41be42e00",
"prevId": "2e398920-ffaf-4d55-ae13-d906cb9e0efa"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
"prevId": "c8297337-4ed8-432e-8782-65d41be42e00",
"tables": {
"db_best_selling": {
"name": "db_best_selling",
@ -183,8 +181,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
"prevId": "c8297337-4ed8-432e-8782-65d41be42e00"
}

View file

@ -1,8 +1,6 @@
{
"version": "5",
"version": "6",
"dialect": "sqlite",
"id": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
"prevId": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
"tables": {
"db_best_selling": {
"name": "db_best_selling",
@ -190,8 +188,9 @@
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
},
"id": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
"prevId": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84"
}

View file

@ -0,0 +1,208 @@
{
"version": "6",
"dialect": "sqlite",
"id": "c95c6547-d540-45cf-aa9d-9d828efb468e",
"prevId": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
"tables": {
"db_best_selling": {
"name": "db_best_selling",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"category": {
"name": "category",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"eans_json": {
"name": "eans_json",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
},
"precios_url_idx": {
"name": "precios_url_idx",
"columns": [
"url"
],
"isUnique": false
},
"precios_fetched_at_idx": {
"name": "precios_fetched_at_idx",
"columns": [
"fetched_at"
],
"isUnique": false
},
"precios_ean_fetched_at_idx": {
"name": "precios_ean_fetched_at_idx",
"columns": [
"ean",
"fetched_at"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"producto_urls": {
"name": "producto_urls",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"first_seen": {
"name": "first_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"last_seen": {
"name": "last_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"producto_urls_url_unique": {
"name": "producto_urls_url_unique",
"columns": [
"url"
],
"isUnique": true
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View file

@ -92,6 +92,13 @@
"when": 1719680946811,
"tag": "0012_hard_red_wolf",
"breakpoints": true
},
{
"idx": 13,
"version": "6",
"when": 1722796469056,
"tag": "0013_harsh_starbolt",
"breakpoints": true
}
]
}

View file

@ -5,7 +5,7 @@
"description": "",
"main": "index.js",
"scripts": {
"generate": "drizzle-kit generate:sqlite",
"generate": "drizzle-kit generate",
"migrate": "node migrate-cli.js"
},
"keywords": [],

View file

@ -22,6 +22,10 @@ export const precios = sqliteTable(
preciosFetchedAtIdx: index("precios_fetched_at_idx").on(
precios.fetchedAt
),
preciosEanFetchedAtIdx: index("precios_ean_fetched_at_idx").on(
precios.ean,
precios.fetchedAt
),
};
}
);

View file

@ -39,12 +39,12 @@ importers:
dayjs:
specifier: ^1.11.10
version: 1.11.10
drizzle-kit:
specifier: ^0.23.0
version: 0.23.0
drizzle-orm:
specifier: ^0.32.0
version: 0.32.0(@types/better-sqlite3@7.6.9)(better-sqlite3@11.1.2)
ky:
specifier: ^1.5.0
version: 1.5.0
zod:
specifier: ^3.22.4
version: 3.22.4
@ -1227,6 +1227,10 @@ packages:
resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==}
engines: {node: '>=6'}
ky@1.5.0:
resolution: {integrity: sha512-bkQo+UqryW6Zmo/DsixYZE4Z9t2mzvNMhceyIhuMuInb3knm5Q+GNGMKveydJAj+Z6piN1SwI6eR/V0G+Z0BtA==}
engines: {node: '>=18'}
lilconfig@2.1.0:
resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==}
engines: {node: '>=10'}
@ -2678,6 +2682,8 @@ snapshots:
kleur@4.1.5: {}
ky@1.5.0: {}
lilconfig@2.1.0: {}
lilconfig@3.1.1: {}

View file

@ -1 +1 @@
DATABASE_URL=sqlite://../sqlite.db
DATABASE_URL=sqlite://../db.db

View file

@ -6,7 +6,7 @@
{
"name": "count",
"ordinal": 0,
"type_info": "Int"
"type_info": "Integer"
}
],
"parameters": {

View file

@ -6,7 +6,7 @@
{
"name": "count",
"ordinal": 0,
"type_info": "Int"
"type_info": "Integer"
}
],
"parameters": {

135
rust/Cargo.lock generated
View file

@ -35,7 +35,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"getrandom 0.2.15",
"once_cell",
"version_check",
"zerocopy",
@ -163,7 +162,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -354,6 +353,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets 0.52.6",
]
@ -386,10 +386,10 @@ version = "4.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
dependencies = [
"heck 0.5.0",
"heck",
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -404,6 +404,15 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "concurrent-queue"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "const-oid"
version = "0.9.6"
@ -552,9 +561,14 @@ dependencies = [
[[package]]
name = "event-listener"
version = "2.5.3"
version = "5.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba"
dependencies = [
"concurrent-queue",
"parking",
"pin-project-lite",
]
[[package]]
name = "fastrand"
@ -665,7 +679,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -748,22 +762,13 @@ dependencies = [
[[package]]
name = "hashlink"
version = "0.8.4"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
dependencies = [
"hashbrown",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -1033,9 +1038,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libsqlite3-sys"
version = "0.27.0"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716"
checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
dependencies = [
"cc",
"pkg-config",
@ -1234,6 +1239,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae"
[[package]]
name = "parking_lot"
version = "0.11.2"
@ -1320,7 +1331,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -1794,7 +1805,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -1900,6 +1911,9 @@ name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
dependencies = [
"serde",
]
[[package]]
name = "socket2"
@ -1942,9 +1956,9 @@ dependencies = [
[[package]]
name = "sqlx"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa"
checksum = "27144619c6e5802f1380337a209d2ac1c431002dd74c6e60aebff3c506dc4f0c"
dependencies = [
"sqlx-core",
"sqlx-macros",
@ -1955,11 +1969,10 @@ dependencies = [
[[package]]
name = "sqlx-core"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6"
checksum = "a999083c1af5b5d6c071d34a708a19ba3e02106ad82ef7bbd69f5e48266b613b"
dependencies = [
"ahash",
"atoi",
"byteorder",
"bytes",
@ -1973,6 +1986,7 @@ dependencies = [
"futures-intrusive",
"futures-io",
"futures-util",
"hashbrown",
"hashlink",
"hex",
"indexmap",
@ -1995,26 +2009,26 @@ dependencies = [
[[package]]
name = "sqlx-macros"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127"
checksum = "a23217eb7d86c584b8cbe0337b9eacf12ab76fe7673c513141ec42565698bb88"
dependencies = [
"proc-macro2",
"quote",
"sqlx-core",
"sqlx-macros-core",
"syn 1.0.109",
"syn",
]
[[package]]
name = "sqlx-macros-core"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
checksum = "1a099220ae541c5db479c6424bdf1b200987934033c2584f79a0e1693601e776"
dependencies = [
"dotenvy",
"either",
"heck 0.4.1",
"heck",
"hex",
"once_cell",
"proc-macro2",
@ -2026,7 +2040,7 @@ dependencies = [
"sqlx-mysql",
"sqlx-postgres",
"sqlx-sqlite",
"syn 1.0.109",
"syn",
"tempfile",
"tokio",
"url",
@ -2034,12 +2048,12 @@ dependencies = [
[[package]]
name = "sqlx-mysql"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418"
checksum = "5afe4c38a9b417b6a9a5eeffe7235d0a106716495536e7727d1c7f4b1ff3eba6"
dependencies = [
"atoi",
"base64 0.21.7",
"base64 0.22.1",
"bitflags 2.6.0",
"byteorder",
"bytes",
@ -2077,12 +2091,12 @@ dependencies = [
[[package]]
name = "sqlx-postgres"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e"
checksum = "b1dbb157e65f10dbe01f729339c06d239120221c9ad9fa0ba8408c4cc18ecf21"
dependencies = [
"atoi",
"base64 0.21.7",
"base64 0.22.1",
"bitflags 2.6.0",
"byteorder",
"chrono",
@ -2116,9 +2130,9 @@ dependencies = [
[[package]]
name = "sqlx-sqlite"
version = "0.7.4"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
checksum = "9b2cdd83c008a622d94499c0006d8ee5f821f36c89b7d625c900e5dc30b5c5ee"
dependencies = [
"atoi",
"chrono",
@ -2132,10 +2146,10 @@ dependencies = [
"log",
"percent-encoding",
"serde",
"serde_urlencoded",
"sqlx-core",
"tracing",
"url",
"urlencoding",
]
[[package]]
@ -2161,17 +2175,6 @@ version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.71"
@ -2224,7 +2227,7 @@ checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -2285,7 +2288,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -2383,7 +2386,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]
@ -2464,12 +2467,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291"
[[package]]
name = "unicode-segmentation"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode_categories"
version = "0.1.1"
@ -2493,12 +2490,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf8-width"
version = "0.1.7"
@ -2577,7 +2568,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
"wasm-bindgen-shared",
]
@ -2611,7 +2602,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -2863,7 +2854,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
"syn",
]
[[package]]

View file

@ -9,10 +9,10 @@ edition = "2021"
again = "0.1.2"
anyhow = "1.0.79"
base64 = "0.21.7"
chrono = "0.4"
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4.15", features = ["derive"] }
cron = "0.12.0"
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] }
sqlx = { version = "0.8", features = [ "runtime-tokio", "sqlite", "chrono", "json" ] }
futures = "0.3.30"
html-escape = "0.2.13"
itertools = "0.12.0"

View file

@ -1,8 +1,16 @@
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
use axum::{
extract::{Path, State},
http::StatusCode,
response::IntoResponse,
routing::get,
Json, Router,
};
use chrono::{DateTime, Utc};
use clap::ValueEnum;
use futures::future::join_all;
use itertools::Itertools;
use preciazo::supermercado::Supermercado;
use serde::Serialize;
use sqlx::{
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
SqlitePool,
@ -94,31 +102,220 @@ async fn healthcheck(State(pool): State<SqlitePool>) -> impl IntoResponse {
}
}
#[derive(Serialize)]
struct CategoryWithProducts {
category: String,
products: Vec<Product>,
}
#[derive(Serialize)]
struct Product {
ean: String,
name: Option<String>,
image_url: Option<String>,
}
async fn get_best_selling(State(pool): State<SqlitePool>) -> impl IntoResponse {
#[derive(sqlx::FromRow, Debug)]
struct ProductWithCategory {
category: String,
ean: String,
name: Option<String>,
image_url: Option<String>,
}
let products_with_category = sqlx::query_as::<_, ProductWithCategory>(
"with latest_best_selling as (
select category, eans_json
from db_best_selling
group by category
having max(fetched_at)
),
extracted_eans as (
select latest_best_selling.category, json.value as ean
from latest_best_selling, json_each(latest_best_selling.eans_json) json
)
select extracted_eans.category, extracted_eans.ean, precios.image_url, name
from extracted_eans
join precios
on extracted_eans.ean = precios.ean
where
precios.fetched_at = (
SELECT MAX(fetched_at)
FROM precios
WHERE ean = extracted_eans.ean
)",
)
.fetch_all(&pool)
.await
.unwrap();
let categories = products_with_category
.iter()
.map(|p| p.category.clone())
.unique()
.collect_vec();
let categories_with_products = categories
.into_iter()
.map(|c| CategoryWithProducts {
category: c.clone(),
products: products_with_category
.iter()
.filter(|p| p.category == c)
.map(|p| Product {
ean: p.ean.clone(),
image_url: p.image_url.clone(),
name: p.name.clone(),
})
.collect_vec(),
})
.collect_vec();
Json(categories_with_products)
}
async fn get_product_history(
State(pool): State<SqlitePool>,
Path(ean): Path<String>,
) -> impl IntoResponse {
#[derive(sqlx::FromRow, Debug, Serialize)]
struct Precio {
ean: String,
fetched_at: chrono::DateTime<Utc>,
precio_centavos: Option<i64>,
in_stock: Option<bool>,
url: String,
name: Option<String>,
image_url: Option<String>,
}
let precios = sqlx::query!(
"
select ean,fetched_at,precio_centavos,in_stock,url,name,image_url from precios
where ean = ?
order by fetched_at
",
ean
)
.map(|r| Precio {
ean: r.ean,
url: r.url,
fetched_at: DateTime::from_timestamp(r.fetched_at, 0).unwrap(),
image_url: r.image_url,
name: r.name,
in_stock: r.in_stock.map(|x| x == 1),
precio_centavos: r.precio_centavos,
})
.fetch_all(&pool)
.await
.unwrap();
Json(precios)
}
async fn search(State(pool): State<SqlitePool>, Path(query): Path<String>) -> impl IntoResponse {
let sql_query = query
.clone()
.replace("\"", "\"\"")
.split(" ")
.map(|x| format!("\"{}\"", x))
.join(" ");
#[derive(Serialize)]
struct Result {
ean: String,
name: String,
image_url: String,
}
let results = sqlx::query!(
"with search_results as (
select f.ean from precios_fts f
where f.name match ? and f.ean != ''
group by f.ean
limit 100
)
select p.id, p.ean, p.name, p.image_url from search_results as s
join precios as p
on p.ean = s.ean
where p.fetched_at = (
SELECT MAX(fetched_at)
FROM precios as pf
WHERE pf.ean = s.ean and pf.name is not null
);",
sql_query
)
.fetch_all(&pool)
.await
.unwrap()
.into_iter()
.map(|r| Result {
ean: r.ean,
image_url: r.image_url.unwrap(),
name: r.name.unwrap(),
})
.collect_vec();
Json(results)
}
async fn get_info(State(pool): State<SqlitePool>) -> impl IntoResponse {
#[derive(Serialize)]
struct Info {
count: i64,
}
let count = sqlx::query!("select count(distinct ean) as count from precios")
.fetch_one(&pool)
.await
.unwrap()
.count;
Json(Info { count })
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let pool = SqlitePoolOptions::new()
.max_connections(1)
.max_connections(10)
.connect_with(
SqliteConnectOptions::from_str(&format!(
"sqlite://{}",
env::var("DB_PATH").unwrap_or("../sqlite.db".to_string())
env::var("DB_PATH").unwrap_or("../db.db".to_string())
))
.unwrap()
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
.busy_timeout(Duration::from_secs(15))
.synchronous(sqlx::sqlite::SqliteSynchronous::Normal)
.busy_timeout(Duration::from_secs(30))
.optimize_on_close(true, None),
)
.await
.expect("can't connect to database");
sqlx::query("pragma temp_store = memory;")
.execute(&pool)
.await
.unwrap();
sqlx::query("pragma mmap_size = 30000000000;")
.execute(&pool)
.await
.unwrap();
sqlx::query("pragma page_size = 4096;")
.execute(&pool)
.await
.unwrap();
let app = Router::new()
.route("/", get(index))
.route("/api/healthcheck", get(healthcheck))
.route("/api/0/best-selling-products", get(get_best_selling))
.route("/api/0/ean/:ean/history", get(get_product_history))
.route("/api/0/info", get(get_info))
.route("/api/0/search/:query", get(search))
.with_state(pool);
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
tracing::debug!("listening on {}", listener.local_addr().unwrap());
tracing::info!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.unwrap();
}

View file

@ -1,3 +1,5 @@
use std::env;
use super::now_sec;
use super::AutoArgs;
use super::AutoTelegram;
@ -64,7 +66,16 @@ impl Auto {
// }
{
let t0 = now_sec();
let counters = self.scraper.fetch_list(&self.db, links).await;
let n_coroutines = if supermercado == Supermercado::Coto {
50
} else {
env::var("N_COROUTINES")
.map_or(Ok(24), |s| s.parse::<usize>())
.expect("N_COROUTINES no es un número")
};
let counters = self.scraper.fetch_list(&self.db, links, n_coroutines).await;
self.inform(&format!(
"Downloaded {:?}: {:?} (took {})",
&supermercado,

View file

@ -17,7 +17,7 @@ pub struct Db {
impl Db {
pub async fn connect() -> anyhow::Result<Self> {
let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string());
let db_path = env::var("DB_PATH").unwrap_or("../db.db".to_string());
info!("Opening DB at {}", db_path);
let read_pool = connect_to_db(&db_path, 32).await?;
let write_pool = connect_to_db(&db_path, 1).await?;

View file

@ -128,11 +128,7 @@ impl Scraper {
counters
}
pub async fn fetch_list(&self, db: &Db, links: Vec<String>) -> Counters {
let n_coroutines = env::var("N_COROUTINES")
.map_or(Ok(24), |s| s.parse::<usize>())
.expect("N_COROUTINES no es un número");
pub async fn fetch_list(&self, db: &Db, links: Vec<String>, n_coroutines: usize) -> Counters {
stream::iter(links)
.map(|url| {
let db = db.clone();

View file

@ -56,7 +56,11 @@ pub fn parse(url: String, dom: &tl::VDom) -> Result<PrecioPoint, anyhow::Error>
.find_map(|n| n.as_tag())
.map(|t| t.inner_text(dom.parser()))
// https://github.com/catdevnull/preciazo/issues/24
.map(|s| html_escape::decode_html_entities(s.trim()).to_string());
.map(|s| {
html_escape::decode_html_entities(s.trim())
.trim()
.to_string()
});
let image_url = dom
.query_selector(".zoomImage1")

2
sitio/.env.development Normal file
View file

@ -0,0 +1,2 @@
DB_PATH=../db.db
VITE_API_HOST=http://localhost:8000

2
sitio/.gitignore vendored
View file

@ -4,7 +4,7 @@ node_modules
/.svelte-kit
/package
.env
.env.*
*.local
!.env.example
vite.config.js.timestamp-*
vite.config.ts.timestamp-*

View file

@ -40,6 +40,7 @@
"chartjs-adapter-dayjs-4": "^1.0.4",
"dayjs": "^1.11.10",
"drizzle-orm": "^0.32.0",
"ky": "^1.5.0",
"zod": "^3.22.4"
},
"packageManager": "pnpm@9.5.0+sha512.140036830124618d624a2187b50d04289d5a087f326c9edfc0ccd733d76c4f52c3a313d4fc148794a2a9d81553016004e6742e8cf850670268a7387fc220c903"

View file

@ -1,5 +1,9 @@
<script lang="ts" context="module">
export type Product = { ean: string; name: string; imageUrl: string | null };
export type Product = {
ean: string;
name: string | null;
image_url: string | null;
};
</script>
<script lang="ts">
@ -7,9 +11,9 @@
</script>
<a href={`/ean/${product.ean}`} class="flex gap-2">
{#if product.imageUrl}
{#if product.image_url}
<img
src={product.imageUrl}
src={product.image_url}
alt={product.name}
class="max-h-48"
loading="lazy"

View file

@ -1 +1,2 @@
// place files you want to import through the `$lib` alias in this folder.
export const API_HOST = import.meta.env.VITE_API_HOST;

View file

@ -1,2 +0,0 @@
export { getDb } from "db-datos/db.js";
export * as schema from "db-datos/schema.js";

View file

@ -1,15 +1,17 @@
import { countDistinct } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
const { precios } = schema;
import { z } from "zod";
import ky from "ky";
import { API_HOST } from "$lib";
async function getInfo() {
return z
.object({
count: z.number(),
})
.parse(await ky.get(`${API_HOST}/api/0/info`).json());
}
export const load: PageServerLoad = async () => {
const db = await getDb();
const nProductosR = await db
.select({
count: countDistinct(precios.ean),
})
.from(precios);
const nProductos = nProductosR[0].count;
const nProductos = (await getInfo()).count;
return { nProductos };
};

View file

@ -1,68 +1,29 @@
import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
const { precios, bestSelling } = schema;
import { max, sql } from "drizzle-orm";
import z from "zod";
import type { Product } from "$lib/ProductPreview.svelte";
type Data = {
category: string;
products: Product[];
}[];
let cache: Promise<{ key: Date; data: Data }> = doQuery();
async function doQuery() {
const db = await getDb();
const categories = await db
.select({
fetchedAt: bestSelling.fetchedAt,
category: bestSelling.category,
eansJson: bestSelling.eansJson,
})
.from(bestSelling)
.groupBy(bestSelling.category)
.having(max(bestSelling.fetchedAt));
const categoriesWithProducts = await Promise.all(
categories.map(async (category) => {
const eans = z.array(z.string()).parse(JSON.parse(category.eansJson));
const products = await db
.select({
ean: precios.ean,
name: precios.name,
imageUrl: precios.imageUrl,
})
.from(precios)
.where(sql`${precios.ean} in ${eans}`)
.groupBy(precios.ean)
.having(max(precios.fetchedAt));
return {
category: category.category,
products: eans
.map((ean) => products.find((p) => p.ean === ean))
.filter((x): x is Product => !!x && !!x.name),
};
}),
async function getBestSelling() {
const res = await fetch(
`${import.meta.env.VITE_API_HOST}/api/0/best-selling-products`,
);
return { key: new Date(), data: categoriesWithProducts };
const json = await res.json();
return z
.array(
z.object({
category: z.string(),
products: z.array(
z.object({
ean: z.string(),
name: z.string().nullable(),
image_url: z.string().nullable(),
}),
),
}),
)
.parse(json);
}
console.log("setting up interval");
setInterval(
async () => {
const c = await doQuery();
cache = Promise.resolve(c);
},
4 * 60 * 60 * 1000,
);
export const load: PageServerLoad = async ({
params,
}): Promise<{ data: Data }> => {
return { data: (await cache).data };
export const load: PageServerLoad = async ({ params }) => {
return {
data: await getBestSelling(),
};
};

View file

@ -1,20 +1,23 @@
import { error } from "@sveltejs/kit";
import { eq } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { getDb, schema } from "$lib/server/db";
const { precios } = schema;
import { z } from "zod";
import { zPrecio, type Precio } from "./common";
import { API_HOST } from "$lib";
async function getProductHistory(ean: string) {
const res = await fetch(`${API_HOST}/api/0/ean/${ean}/history`);
const json = await res.json();
return z.array(zPrecio).parse(json);
}
export const load: PageServerLoad = async ({ params }) => {
const db = await getDb();
const q = db
.select()
.from(precios)
.where(eq(precios.ean, params.ean))
.orderBy(precios.fetchedAt);
const res = await q;
const res = await getProductHistory(params.ean);
if (res.length === 0) return error(404, "Not Found");
const meta = res.findLast((p) => p.name);
const meta = res.findLast(
(p): p is Precio & { name: string; image_url: string } =>
!!(p.name && p.image_url),
);
return { precios: res, meta };
};

View file

@ -1,18 +1,18 @@
<script lang="ts">
import { Supermercado, hosts } from "db-datos/supermercado";
import * as schema from "db-datos/schema";
import type { PageData } from "./$types";
import Chart from "./Chart.svelte";
import type { Precio } from "./common";
export let data: PageData;
let urls: Map<Supermercado, schema.Precio>;
let urls: Map<Supermercado, Precio>;
$: urls = data.precios.reduce((prev, curr) => {
const url = new URL(curr.url);
const supermercado = hosts[url.hostname];
prev.set(supermercado, curr);
return prev;
}, new Map<Supermercado, schema.Precio>());
}, new Map<Supermercado, Precio>());
const classBySupermercado: { [supermercado in Supermercado]: string } = {
[Supermercado.Dia]: "bg-[#d52b1e] focus:ring-[#d52b1e]",
@ -30,18 +30,18 @@
{#if data.meta}
<h1 class="text-3xl font-bold">{data.meta.name}</h1>
<img src={data.meta.imageUrl} alt={data.meta.name} class="max-h-48" />
<img src={data.meta.image_url} alt={data.meta.name} class="max-h-48" />
<div class="flex gap-2">
{#each urls as [supermercado, { url, precioCentavos }]}
{#each urls as [supermercado, { url, precio_centavos }]}
<a
href={url}
rel="noreferrer noopener"
target="_blank"
class={`focus:shadow-outline inline-flex flex-col items-center justify-center rounded-md ${classBySupermercado[supermercado]} px-4 py-2 font-medium tracking-wide text-white transition-colors duration-200 hover:bg-opacity-80 focus:outline-none focus:ring-2 focus:ring-offset-2`}
>
{#if precioCentavos}
{#if precio_centavos}
<span class="text-lg font-bold"
>{formatter.format(precioCentavos / 100)}</span
>{formatter.format(precio_centavos / 100)}</span
>
{/if}
<span class="text-sm">{supermercado}</span>

View file

@ -1,8 +1,8 @@
<script lang="ts">
import type { Precio } from "db-datos/schema";
// import dayjs from "dayjs";
import ChartJs from "./ChartJs.svelte";
import { hosts, colorBySupermercado } from "db-datos/supermercado";
import type { Precio } from "./common";
export let precios: Precio[];
@ -15,15 +15,15 @@
const ps = precios
.filter((p) => new URL(p.url!).hostname === host)
.filter(
(p): p is Precio & { precioCentavos: number } =>
p.precioCentavos !== null,
(p): p is Precio & { precio_centavos: number } =>
p.precio_centavos !== null,
);
return {
label: supermercado,
data: [
...ps.map((p) => ({
x: p.fetchedAt,
y: p.precioCentavos / 100,
x: p.fetched_at,
y: p.precio_centavos / 100,
})),
// lie
// ...ps.map((p) => ({

View file

@ -0,0 +1,12 @@
import { z } from "zod";
export const zPrecio = z.object({
ean: z.string(),
fetched_at: z.coerce.date(),
precio_centavos: z.number().nullable(),
in_stock: z.boolean().nullable(),
url: z.string(),
name: z.string().nullable(),
image_url: z.string().nullable(),
});
export type Precio = z.infer<typeof zPrecio>;

View file

@ -1,26 +1,29 @@
import { sql } from "drizzle-orm";
import { z } from "zod";
import type { PageServerLoad } from "./$types";
import { getDb } from "$lib/server/db";
import { API_HOST } from "$lib";
import ky from "ky";
const zProductResult = z.object({
ean: z.string(),
name: z.string(),
image_url: z.string(),
});
async function search(query: string) {
return z
.array(zProductResult)
.parse(
await ky
.get(`${API_HOST}/api/0/search/${encodeURIComponent(query)}`)
.json(),
);
}
export const load: PageServerLoad = async ({ url }) => {
const db = await getDb();
const query = url.searchParams.get("q");
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
if (query) {
const sQuery = query
.replaceAll(`"`, `""`)
.split(" ")
.map((s) => `"${s}"`)
.join(" ");
console.debug(sQuery);
const sqlQuery = sql`select p.ean, p.name, p.image_url as imageUrl from precios_fts f
join precios p on p.ean = f.ean
where f.name match ${sQuery}
group by p.ean
having max(p.fetched_at)
order by p.in_stock desc;`;
results = db.all(sqlQuery);
}
let results: null | { ean: string; name: string; image_url: string }[] = query
? await search(query)
: null;
return { query, results };
};