mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-23 06:36:19 +00:00
Compare commits
No commits in common. "91c7087bdc18f01554ed9e8a1076e51fd32569e5" and "6d32c897acc535401ae54571c769009579a66131" have entirely different histories.
91c7087bdc
...
6d32c897ac
50 changed files with 218 additions and 1679 deletions
|
@ -2,8 +2,6 @@ data/warcs/
|
||||||
data/carrefour/
|
data/carrefour/
|
||||||
*/*.db*
|
*/*.db*
|
||||||
sqlite.db
|
sqlite.db
|
||||||
db.db
|
|
||||||
db.db-wal
|
|
||||||
downloader/
|
downloader/
|
||||||
node_modules/
|
node_modules/
|
||||||
*/node_modules/
|
*/node_modules/
|
||||||
|
|
31
.github/workflows/sepa-precios-archiver.yml
vendored
31
.github/workflows/sepa-precios-archiver.yml
vendored
|
@ -1,31 +0,0 @@
|
||||||
name: Sepa Precios Archiver
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: "0 */12 * * *" # Run every 6 hours
|
|
||||||
workflow_dispatch: # Allow manual trigger
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
archive-prices:
|
|
||||||
runs-on: ubicloud-standard-4
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- uses: oven-sh/setup-bun@v2
|
|
||||||
with:
|
|
||||||
bun-version: latest
|
|
||||||
# - name: Setup tmate session
|
|
||||||
# uses: mxschmitt/action-tmate@v3
|
|
||||||
# with:
|
|
||||||
# limit-access-to-actor: true
|
|
||||||
|
|
||||||
- name: Run archiver script
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.ARCHIVE_GITHUB_TOKEN }}
|
|
||||||
B2_BUCKET_NAME: ${{ secrets.B2_BUCKET_NAME }}
|
|
||||||
B2_BUCKET_KEY_ID: ${{ secrets.B2_BUCKET_KEY_ID }}
|
|
||||||
B2_BUCKET_KEY: ${{ secrets.B2_BUCKET_KEY }}
|
|
||||||
run: |
|
|
||||||
cd sepa-precios-archiver
|
|
||||||
bun install --frozen-lockfile
|
|
||||||
bun index.ts
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -3,7 +3,5 @@ node_modules/
|
||||||
*.db-shm
|
*.db-shm
|
||||||
*.db-wal
|
*.db-wal
|
||||||
target/
|
target/
|
||||||
*.local
|
.env.*
|
||||||
|
|
||||||
|
|
||||||
.DS_Store
|
|
|
@ -1 +0,0 @@
|
||||||
DB_PATH=../db.db
|
|
|
@ -1,4 +1,4 @@
|
||||||
export const DB_PATH = process.env.DB_PATH ?? "../db.db";
|
export const DB_PATH = process.env.DB_PATH ?? "../sqlite.db";
|
||||||
|
|
||||||
/** @type { import("drizzle-kit").Config } */
|
/** @type { import("drizzle-kit").Config } */
|
||||||
export default {
|
export default {
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
-- Custom SQL migration file, put you code below! --
|
|
||||||
CREATE VIRTUAL TABLE productos_fts USING fts5 (ean, name, content = precios, content_rowid = idd);
|
|
|
@ -1,208 +0,0 @@
|
||||||
{
|
|
||||||
"id": "f981b295-c9eb-4df5-88b1-d3765e4cc314",
|
|
||||||
"prevId": "c95c6547-d540-45cf-aa9d-9d828efb468e",
|
|
||||||
"version": "6",
|
|
||||||
"dialect": "sqlite",
|
|
||||||
"tables": {
|
|
||||||
"db_best_selling": {
|
|
||||||
"name": "db_best_selling",
|
|
||||||
"columns": {
|
|
||||||
"id": {
|
|
||||||
"name": "id",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": true,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": true
|
|
||||||
},
|
|
||||||
"fetched_at": {
|
|
||||||
"name": "fetched_at",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"category": {
|
|
||||||
"name": "category",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"eans_json": {
|
|
||||||
"name": "eans_json",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"indexes": {},
|
|
||||||
"foreignKeys": {},
|
|
||||||
"compositePrimaryKeys": {},
|
|
||||||
"uniqueConstraints": {}
|
|
||||||
},
|
|
||||||
"precios": {
|
|
||||||
"name": "precios",
|
|
||||||
"columns": {
|
|
||||||
"id": {
|
|
||||||
"name": "id",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": true,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": true
|
|
||||||
},
|
|
||||||
"ean": {
|
|
||||||
"name": "ean",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"fetched_at": {
|
|
||||||
"name": "fetched_at",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"precio_centavos": {
|
|
||||||
"name": "precio_centavos",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"in_stock": {
|
|
||||||
"name": "in_stock",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"url": {
|
|
||||||
"name": "url",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"warc_record_id": {
|
|
||||||
"name": "warc_record_id",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"parser_version": {
|
|
||||||
"name": "parser_version",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"name": {
|
|
||||||
"name": "name",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"image_url": {
|
|
||||||
"name": "image_url",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": false,
|
|
||||||
"autoincrement": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"indexes": {
|
|
||||||
"precios_ean_idx": {
|
|
||||||
"name": "precios_ean_idx",
|
|
||||||
"columns": [
|
|
||||||
"ean"
|
|
||||||
],
|
|
||||||
"isUnique": false
|
|
||||||
},
|
|
||||||
"precios_url_idx": {
|
|
||||||
"name": "precios_url_idx",
|
|
||||||
"columns": [
|
|
||||||
"url"
|
|
||||||
],
|
|
||||||
"isUnique": false
|
|
||||||
},
|
|
||||||
"precios_fetched_at_idx": {
|
|
||||||
"name": "precios_fetched_at_idx",
|
|
||||||
"columns": [
|
|
||||||
"fetched_at"
|
|
||||||
],
|
|
||||||
"isUnique": false
|
|
||||||
},
|
|
||||||
"precios_ean_fetched_at_idx": {
|
|
||||||
"name": "precios_ean_fetched_at_idx",
|
|
||||||
"columns": [
|
|
||||||
"ean",
|
|
||||||
"fetched_at"
|
|
||||||
],
|
|
||||||
"isUnique": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"foreignKeys": {},
|
|
||||||
"compositePrimaryKeys": {},
|
|
||||||
"uniqueConstraints": {}
|
|
||||||
},
|
|
||||||
"producto_urls": {
|
|
||||||
"name": "producto_urls",
|
|
||||||
"columns": {
|
|
||||||
"id": {
|
|
||||||
"name": "id",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": true,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": true
|
|
||||||
},
|
|
||||||
"url": {
|
|
||||||
"name": "url",
|
|
||||||
"type": "text",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"first_seen": {
|
|
||||||
"name": "first_seen",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
},
|
|
||||||
"last_seen": {
|
|
||||||
"name": "last_seen",
|
|
||||||
"type": "integer",
|
|
||||||
"primaryKey": false,
|
|
||||||
"notNull": true,
|
|
||||||
"autoincrement": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"indexes": {
|
|
||||||
"producto_urls_url_unique": {
|
|
||||||
"name": "producto_urls_url_unique",
|
|
||||||
"columns": [
|
|
||||||
"url"
|
|
||||||
],
|
|
||||||
"isUnique": true
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"foreignKeys": {},
|
|
||||||
"compositePrimaryKeys": {},
|
|
||||||
"uniqueConstraints": {}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"enums": {},
|
|
||||||
"_meta": {
|
|
||||||
"columns": {},
|
|
||||||
"schemas": {},
|
|
||||||
"tables": {}
|
|
||||||
},
|
|
||||||
"internal": {
|
|
||||||
"indexes": {}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -39,12 +39,12 @@ importers:
|
||||||
dayjs:
|
dayjs:
|
||||||
specifier: ^1.11.10
|
specifier: ^1.11.10
|
||||||
version: 1.11.10
|
version: 1.11.10
|
||||||
|
drizzle-kit:
|
||||||
|
specifier: ^0.23.0
|
||||||
|
version: 0.23.0
|
||||||
drizzle-orm:
|
drizzle-orm:
|
||||||
specifier: ^0.32.0
|
specifier: ^0.32.0
|
||||||
version: 0.32.0(@types/better-sqlite3@7.6.9)(better-sqlite3@11.1.2)
|
version: 0.32.0(@types/better-sqlite3@7.6.9)(better-sqlite3@11.1.2)
|
||||||
ky:
|
|
||||||
specifier: ^1.5.0
|
|
||||||
version: 1.5.0
|
|
||||||
zod:
|
zod:
|
||||||
specifier: ^3.22.4
|
specifier: ^3.22.4
|
||||||
version: 3.22.4
|
version: 3.22.4
|
||||||
|
@ -1227,10 +1227,6 @@ packages:
|
||||||
resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==}
|
resolution: {integrity: sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==}
|
||||||
engines: {node: '>=6'}
|
engines: {node: '>=6'}
|
||||||
|
|
||||||
ky@1.5.0:
|
|
||||||
resolution: {integrity: sha512-bkQo+UqryW6Zmo/DsixYZE4Z9t2mzvNMhceyIhuMuInb3knm5Q+GNGMKveydJAj+Z6piN1SwI6eR/V0G+Z0BtA==}
|
|
||||||
engines: {node: '>=18'}
|
|
||||||
|
|
||||||
lilconfig@2.1.0:
|
lilconfig@2.1.0:
|
||||||
resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==}
|
resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==}
|
||||||
engines: {node: '>=10'}
|
engines: {node: '>=10'}
|
||||||
|
@ -2682,8 +2678,6 @@ snapshots:
|
||||||
|
|
||||||
kleur@4.1.5: {}
|
kleur@4.1.5: {}
|
||||||
|
|
||||||
ky@1.5.0: {}
|
|
||||||
|
|
||||||
lilconfig@2.1.0: {}
|
lilconfig@2.1.0: {}
|
||||||
|
|
||||||
lilconfig@3.1.1: {}
|
lilconfig@3.1.1: {}
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
DATABASE_URL=sqlite://../db.db
|
DATABASE_URL=sqlite://../sqlite.db
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
{
|
|
||||||
"db_name": "SQLite",
|
|
||||||
"query": "select count(distinct ean) as count from precios",
|
|
||||||
"describe": {
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"name": "count",
|
|
||||||
"ordinal": 0,
|
|
||||||
"type_info": "Integer"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"Right": 0
|
|
||||||
},
|
|
||||||
"nullable": [
|
|
||||||
false
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"hash": "2e632fbda989abf0d8a88a1d3bc1de0a9aefb0d3f3cdc33d26158d09faed97b2"
|
|
||||||
}
|
|
|
@ -1,38 +0,0 @@
|
||||||
{
|
|
||||||
"db_name": "SQLite",
|
|
||||||
"query": "with search_results as (\n select f.ean from precios_fts f\n where f.name match ? and f.ean != ''\n group by f.ean\n\t\t\tlimit 100\n )\n select p.id, p.ean, p.name, p.image_url from search_results as s\n join precios as p\n on p.ean = s.ean\n where p.fetched_at = (\n SELECT MAX(fetched_at)\n FROM precios as pf\n WHERE pf.ean = s.ean and pf.name is not null\n );",
|
|
||||||
"describe": {
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"name": "id",
|
|
||||||
"ordinal": 0,
|
|
||||||
"type_info": "Integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ean",
|
|
||||||
"ordinal": 1,
|
|
||||||
"type_info": "Text"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "name",
|
|
||||||
"ordinal": 2,
|
|
||||||
"type_info": "Text"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "image_url",
|
|
||||||
"ordinal": 3,
|
|
||||||
"type_info": "Text"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"Right": 1
|
|
||||||
},
|
|
||||||
"nullable": [
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
true
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"hash": "3ee249afda554bbffe736257af05aba689c71188ce1a869e01988ac7ca1220a2"
|
|
||||||
}
|
|
|
@ -6,7 +6,7 @@
|
||||||
{
|
{
|
||||||
"name": "count",
|
"name": "count",
|
||||||
"ordinal": 0,
|
"ordinal": 0,
|
||||||
"type_info": "Integer"
|
"type_info": "Int"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"parameters": {
|
"parameters": {
|
||||||
|
|
|
@ -1,56 +0,0 @@
|
||||||
{
|
|
||||||
"db_name": "SQLite",
|
|
||||||
"query": "\nselect ean,fetched_at,precio_centavos,in_stock,url,name,image_url from precios\nwhere ean = ?\norder by fetched_at\n",
|
|
||||||
"describe": {
|
|
||||||
"columns": [
|
|
||||||
{
|
|
||||||
"name": "ean",
|
|
||||||
"ordinal": 0,
|
|
||||||
"type_info": "Text"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "fetched_at",
|
|
||||||
"ordinal": 1,
|
|
||||||
"type_info": "Integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "precio_centavos",
|
|
||||||
"ordinal": 2,
|
|
||||||
"type_info": "Integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "in_stock",
|
|
||||||
"ordinal": 3,
|
|
||||||
"type_info": "Integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "url",
|
|
||||||
"ordinal": 4,
|
|
||||||
"type_info": "Text"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "name",
|
|
||||||
"ordinal": 5,
|
|
||||||
"type_info": "Text"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "image_url",
|
|
||||||
"ordinal": 6,
|
|
||||||
"type_info": "Text"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"parameters": {
|
|
||||||
"Right": 1
|
|
||||||
},
|
|
||||||
"nullable": [
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
true
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"hash": "88a597e29390fb04bbc48d9f88303551e068ddc478b037354c62bc77bc70ad96"
|
|
||||||
}
|
|
|
@ -6,7 +6,7 @@
|
||||||
{
|
{
|
||||||
"name": "count",
|
"name": "count",
|
||||||
"ordinal": 0,
|
"ordinal": 0,
|
||||||
"type_info": "Integer"
|
"type_info": "Int"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"parameters": {
|
"parameters": {
|
||||||
|
|
135
rust/Cargo.lock
generated
135
rust/Cargo.lock
generated
|
@ -35,6 +35,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
"getrandom 0.2.15",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"version_check",
|
"version_check",
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
|
@ -162,7 +163,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -353,7 +354,6 @@ dependencies = [
|
||||||
"iana-time-zone",
|
"iana-time-zone",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"serde",
|
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
"windows-targets 0.52.6",
|
"windows-targets 0.52.6",
|
||||||
]
|
]
|
||||||
|
@ -386,10 +386,10 @@ version = "4.5.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
|
checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck 0.5.0",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -404,15 +404,6 @@ version = "1.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
|
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "concurrent-queue"
|
|
||||||
version = "2.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
|
|
||||||
dependencies = [
|
|
||||||
"crossbeam-utils",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "const-oid"
|
name = "const-oid"
|
||||||
version = "0.9.6"
|
version = "0.9.6"
|
||||||
|
@ -561,14 +552,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "event-listener"
|
name = "event-listener"
|
||||||
version = "5.3.1"
|
version = "2.5.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba"
|
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
|
||||||
dependencies = [
|
|
||||||
"concurrent-queue",
|
|
||||||
"parking",
|
|
||||||
"pin-project-lite",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fastrand"
|
name = "fastrand"
|
||||||
|
@ -679,7 +665,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -762,13 +748,22 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hashlink"
|
name = "hashlink"
|
||||||
version = "0.9.1"
|
version = "0.8.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
|
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
|
@ -1038,9 +1033,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libsqlite3-sys"
|
name = "libsqlite3-sys"
|
||||||
version = "0.28.0"
|
version = "0.27.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
|
checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"pkg-config",
|
"pkg-config",
|
||||||
|
@ -1239,12 +1234,6 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "parking"
|
|
||||||
version = "2.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parking_lot"
|
name = "parking_lot"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
|
@ -1331,7 +1320,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1805,7 +1794,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1911,9 +1900,6 @@ name = "smallvec"
|
||||||
version = "1.13.2"
|
version = "1.13.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||||
dependencies = [
|
|
||||||
"serde",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socket2"
|
name = "socket2"
|
||||||
|
@ -1956,9 +1942,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx"
|
name = "sqlx"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "27144619c6e5802f1380337a209d2ac1c431002dd74c6e60aebff3c506dc4f0c"
|
checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"sqlx-core",
|
"sqlx-core",
|
||||||
"sqlx-macros",
|
"sqlx-macros",
|
||||||
|
@ -1969,10 +1955,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-core"
|
name = "sqlx-core"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a999083c1af5b5d6c071d34a708a19ba3e02106ad82ef7bbd69f5e48266b613b"
|
checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"ahash",
|
||||||
"atoi",
|
"atoi",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
@ -1986,7 +1973,6 @@ dependencies = [
|
||||||
"futures-intrusive",
|
"futures-intrusive",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"hashbrown",
|
|
||||||
"hashlink",
|
"hashlink",
|
||||||
"hex",
|
"hex",
|
||||||
"indexmap",
|
"indexmap",
|
||||||
|
@ -2009,26 +1995,26 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-macros"
|
name = "sqlx-macros"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a23217eb7d86c584b8cbe0337b9eacf12ab76fe7673c513141ec42565698bb88"
|
checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"sqlx-core",
|
"sqlx-core",
|
||||||
"sqlx-macros-core",
|
"sqlx-macros-core",
|
||||||
"syn",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-macros-core"
|
name = "sqlx-macros-core"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1a099220ae541c5db479c6424bdf1b200987934033c2584f79a0e1693601e776"
|
checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
"either",
|
"either",
|
||||||
"heck",
|
"heck 0.4.1",
|
||||||
"hex",
|
"hex",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -2040,7 +2026,7 @@ dependencies = [
|
||||||
"sqlx-mysql",
|
"sqlx-mysql",
|
||||||
"sqlx-postgres",
|
"sqlx-postgres",
|
||||||
"sqlx-sqlite",
|
"sqlx-sqlite",
|
||||||
"syn",
|
"syn 1.0.109",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"tokio",
|
"tokio",
|
||||||
"url",
|
"url",
|
||||||
|
@ -2048,12 +2034,12 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-mysql"
|
name = "sqlx-mysql"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5afe4c38a9b417b6a9a5eeffe7235d0a106716495536e7727d1c7f4b1ff3eba6"
|
checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"base64 0.22.1",
|
"base64 0.21.7",
|
||||||
"bitflags 2.6.0",
|
"bitflags 2.6.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
@ -2091,12 +2077,12 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-postgres"
|
name = "sqlx-postgres"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b1dbb157e65f10dbe01f729339c06d239120221c9ad9fa0ba8408c4cc18ecf21"
|
checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"base64 0.22.1",
|
"base64 0.21.7",
|
||||||
"bitflags 2.6.0",
|
"bitflags 2.6.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -2130,9 +2116,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sqlx-sqlite"
|
name = "sqlx-sqlite"
|
||||||
version = "0.8.0"
|
version = "0.7.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9b2cdd83c008a622d94499c0006d8ee5f821f36c89b7d625c900e5dc30b5c5ee"
|
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -2146,10 +2132,10 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_urlencoded",
|
|
||||||
"sqlx-core",
|
"sqlx-core",
|
||||||
"tracing",
|
"tracing",
|
||||||
"url",
|
"url",
|
||||||
|
"urlencoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2175,6 +2161,17 @@ version = "2.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "1.0.109"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.71"
|
version = "2.0.71"
|
||||||
|
@ -2227,7 +2224,7 @@ checksum = "d20468752b09f49e909e55a5d338caa8bedf615594e9d80bc4c565d30faf798c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2288,7 +2285,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2386,7 +2383,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2467,6 +2464,12 @@ version = "0.1.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291"
|
checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-segmentation"
|
||||||
|
version = "1.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode_categories"
|
name = "unicode_categories"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
|
@ -2490,6 +2493,12 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urlencoding"
|
||||||
|
version = "2.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8-width"
|
name = "utf8-width"
|
||||||
version = "0.1.7"
|
version = "0.1.7"
|
||||||
|
@ -2568,7 +2577,7 @@ dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -2602,7 +2611,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
"wasm-bindgen-backend",
|
"wasm-bindgen-backend",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
@ -2854,7 +2863,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn 2.0.71",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -9,10 +9,10 @@ edition = "2021"
|
||||||
again = "0.1.2"
|
again = "0.1.2"
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.79"
|
||||||
base64 = "0.21.7"
|
base64 = "0.21.7"
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = "0.4"
|
||||||
clap = { version = "4.4.15", features = ["derive"] }
|
clap = { version = "4.4.15", features = ["derive"] }
|
||||||
cron = "0.12.0"
|
cron = "0.12.0"
|
||||||
sqlx = { version = "0.8", features = [ "runtime-tokio", "sqlite", "chrono", "json" ] }
|
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] }
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
html-escape = "0.2.13"
|
html-escape = "0.2.13"
|
||||||
itertools = "0.12.0"
|
itertools = "0.12.0"
|
||||||
|
|
|
@ -1,16 +1,8 @@
|
||||||
use axum::{
|
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
|
||||||
extract::{Path, State},
|
|
||||||
http::StatusCode,
|
|
||||||
response::IntoResponse,
|
|
||||||
routing::get,
|
|
||||||
Json, Router,
|
|
||||||
};
|
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use clap::ValueEnum;
|
use clap::ValueEnum;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use preciazo::supermercado::Supermercado;
|
use preciazo::supermercado::Supermercado;
|
||||||
use serde::Serialize;
|
|
||||||
use sqlx::{
|
use sqlx::{
|
||||||
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
|
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
|
||||||
SqlitePool,
|
SqlitePool,
|
||||||
|
@ -102,220 +94,31 @@ async fn healthcheck(State(pool): State<SqlitePool>) -> impl IntoResponse {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct CategoryWithProducts {
|
|
||||||
category: String,
|
|
||||||
products: Vec<Product>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Product {
|
|
||||||
ean: String,
|
|
||||||
name: Option<String>,
|
|
||||||
image_url: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_best_selling(State(pool): State<SqlitePool>) -> impl IntoResponse {
|
|
||||||
#[derive(sqlx::FromRow, Debug)]
|
|
||||||
struct ProductWithCategory {
|
|
||||||
category: String,
|
|
||||||
ean: String,
|
|
||||||
name: Option<String>,
|
|
||||||
image_url: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let products_with_category = sqlx::query_as::<_, ProductWithCategory>(
|
|
||||||
"with latest_best_selling as (
|
|
||||||
select category, eans_json
|
|
||||||
from db_best_selling
|
|
||||||
group by category
|
|
||||||
having max(fetched_at)
|
|
||||||
),
|
|
||||||
extracted_eans as (
|
|
||||||
select latest_best_selling.category, json.value as ean
|
|
||||||
from latest_best_selling, json_each(latest_best_selling.eans_json) json
|
|
||||||
)
|
|
||||||
select extracted_eans.category, extracted_eans.ean, precios.image_url, name
|
|
||||||
from extracted_eans
|
|
||||||
join precios
|
|
||||||
on extracted_eans.ean = precios.ean
|
|
||||||
where
|
|
||||||
precios.fetched_at = (
|
|
||||||
SELECT MAX(fetched_at)
|
|
||||||
FROM precios
|
|
||||||
WHERE ean = extracted_eans.ean
|
|
||||||
)",
|
|
||||||
)
|
|
||||||
.fetch_all(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let categories = products_with_category
|
|
||||||
.iter()
|
|
||||||
.map(|p| p.category.clone())
|
|
||||||
.unique()
|
|
||||||
.collect_vec();
|
|
||||||
|
|
||||||
let categories_with_products = categories
|
|
||||||
.into_iter()
|
|
||||||
.map(|c| CategoryWithProducts {
|
|
||||||
category: c.clone(),
|
|
||||||
products: products_with_category
|
|
||||||
.iter()
|
|
||||||
.filter(|p| p.category == c)
|
|
||||||
.map(|p| Product {
|
|
||||||
ean: p.ean.clone(),
|
|
||||||
image_url: p.image_url.clone(),
|
|
||||||
name: p.name.clone(),
|
|
||||||
})
|
|
||||||
.collect_vec(),
|
|
||||||
})
|
|
||||||
.collect_vec();
|
|
||||||
|
|
||||||
Json(categories_with_products)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_product_history(
|
|
||||||
State(pool): State<SqlitePool>,
|
|
||||||
Path(ean): Path<String>,
|
|
||||||
) -> impl IntoResponse {
|
|
||||||
#[derive(sqlx::FromRow, Debug, Serialize)]
|
|
||||||
struct Precio {
|
|
||||||
ean: String,
|
|
||||||
fetched_at: chrono::DateTime<Utc>,
|
|
||||||
precio_centavos: Option<i64>,
|
|
||||||
in_stock: Option<bool>,
|
|
||||||
url: String,
|
|
||||||
name: Option<String>,
|
|
||||||
image_url: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let precios = sqlx::query!(
|
|
||||||
"
|
|
||||||
select ean,fetched_at,precio_centavos,in_stock,url,name,image_url from precios
|
|
||||||
where ean = ?
|
|
||||||
order by fetched_at
|
|
||||||
",
|
|
||||||
ean
|
|
||||||
)
|
|
||||||
.map(|r| Precio {
|
|
||||||
ean: r.ean,
|
|
||||||
url: r.url,
|
|
||||||
fetched_at: DateTime::from_timestamp(r.fetched_at, 0).unwrap(),
|
|
||||||
image_url: r.image_url,
|
|
||||||
name: r.name,
|
|
||||||
in_stock: r.in_stock.map(|x| x == 1),
|
|
||||||
precio_centavos: r.precio_centavos,
|
|
||||||
})
|
|
||||||
.fetch_all(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Json(precios)
|
|
||||||
}
|
|
||||||
async fn search(State(pool): State<SqlitePool>, Path(query): Path<String>) -> impl IntoResponse {
|
|
||||||
let sql_query = query
|
|
||||||
.clone()
|
|
||||||
.replace("\"", "\"\"")
|
|
||||||
.split(" ")
|
|
||||||
.map(|x| format!("\"{}\"", x))
|
|
||||||
.join(" ");
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Result {
|
|
||||||
ean: String,
|
|
||||||
name: String,
|
|
||||||
image_url: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
let results = sqlx::query!(
|
|
||||||
"with search_results as (
|
|
||||||
select f.ean from precios_fts f
|
|
||||||
where f.name match ? and f.ean != ''
|
|
||||||
group by f.ean
|
|
||||||
limit 100
|
|
||||||
)
|
|
||||||
select p.id, p.ean, p.name, p.image_url from search_results as s
|
|
||||||
join precios as p
|
|
||||||
on p.ean = s.ean
|
|
||||||
where p.fetched_at = (
|
|
||||||
SELECT MAX(fetched_at)
|
|
||||||
FROM precios as pf
|
|
||||||
WHERE pf.ean = s.ean and pf.name is not null
|
|
||||||
);",
|
|
||||||
sql_query
|
|
||||||
)
|
|
||||||
.fetch_all(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.into_iter()
|
|
||||||
.map(|r| Result {
|
|
||||||
ean: r.ean,
|
|
||||||
image_url: r.image_url.unwrap(),
|
|
||||||
name: r.name.unwrap(),
|
|
||||||
})
|
|
||||||
.collect_vec();
|
|
||||||
|
|
||||||
Json(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn get_info(State(pool): State<SqlitePool>) -> impl IntoResponse {
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Info {
|
|
||||||
count: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
let count = sqlx::query!("select count(distinct ean) as count from precios")
|
|
||||||
.fetch_one(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap()
|
|
||||||
.count;
|
|
||||||
Json(Info { count })
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
tracing_subscriber::fmt::init();
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
let pool = SqlitePoolOptions::new()
|
let pool = SqlitePoolOptions::new()
|
||||||
.max_connections(10)
|
.max_connections(1)
|
||||||
.connect_with(
|
.connect_with(
|
||||||
SqliteConnectOptions::from_str(&format!(
|
SqliteConnectOptions::from_str(&format!(
|
||||||
"sqlite://{}",
|
"sqlite://{}",
|
||||||
env::var("DB_PATH").unwrap_or("../db.db".to_string())
|
env::var("DB_PATH").unwrap_or("../sqlite.db".to_string())
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||||
.synchronous(sqlx::sqlite::SqliteSynchronous::Normal)
|
.busy_timeout(Duration::from_secs(15))
|
||||||
.busy_timeout(Duration::from_secs(30))
|
|
||||||
.optimize_on_close(true, None),
|
.optimize_on_close(true, None),
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.expect("can't connect to database");
|
.expect("can't connect to database");
|
||||||
|
|
||||||
sqlx::query("pragma temp_store = memory;")
|
|
||||||
.execute(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
sqlx::query("pragma mmap_size = 30000000000;")
|
|
||||||
.execute(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
sqlx::query("pragma page_size = 4096;")
|
|
||||||
.execute(&pool)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
.route("/", get(index))
|
.route("/", get(index))
|
||||||
.route("/api/healthcheck", get(healthcheck))
|
.route("/api/healthcheck", get(healthcheck))
|
||||||
.route("/api/0/best-selling-products", get(get_best_selling))
|
|
||||||
.route("/api/0/ean/:ean/history", get(get_product_history))
|
|
||||||
.route("/api/0/info", get(get_info))
|
|
||||||
.route("/api/0/search/:query", get(search))
|
|
||||||
.with_state(pool);
|
.with_state(pool);
|
||||||
|
|
||||||
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
|
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
|
||||||
tracing::info!("listening on {}", listener.local_addr().unwrap());
|
tracing::debug!("listening on {}", listener.local_addr().unwrap());
|
||||||
axum::serve(listener, app).await.unwrap();
|
axum::serve(listener, app).await.unwrap();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
use std::env;
|
|
||||||
|
|
||||||
use super::now_sec;
|
use super::now_sec;
|
||||||
use super::AutoArgs;
|
use super::AutoArgs;
|
||||||
use super::AutoTelegram;
|
use super::AutoTelegram;
|
||||||
|
@ -66,16 +64,7 @@ impl Auto {
|
||||||
// }
|
// }
|
||||||
{
|
{
|
||||||
let t0 = now_sec();
|
let t0 = now_sec();
|
||||||
|
let counters = self.scraper.fetch_list(&self.db, links).await;
|
||||||
let n_coroutines = if supermercado == Supermercado::Coto {
|
|
||||||
50
|
|
||||||
} else {
|
|
||||||
env::var("N_COROUTINES")
|
|
||||||
.map_or(Ok(24), |s| s.parse::<usize>())
|
|
||||||
.expect("N_COROUTINES no es un número")
|
|
||||||
};
|
|
||||||
|
|
||||||
let counters = self.scraper.fetch_list(&self.db, links, n_coroutines).await;
|
|
||||||
self.inform(&format!(
|
self.inform(&format!(
|
||||||
"Downloaded {:?}: {:?} (took {})",
|
"Downloaded {:?}: {:?} (took {})",
|
||||||
&supermercado,
|
&supermercado,
|
||||||
|
|
|
@ -17,7 +17,7 @@ pub struct Db {
|
||||||
|
|
||||||
impl Db {
|
impl Db {
|
||||||
pub async fn connect() -> anyhow::Result<Self> {
|
pub async fn connect() -> anyhow::Result<Self> {
|
||||||
let db_path = env::var("DB_PATH").unwrap_or("../db.db".to_string());
|
let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string());
|
||||||
info!("Opening DB at {}", db_path);
|
info!("Opening DB at {}", db_path);
|
||||||
let read_pool = connect_to_db(&db_path, 32).await?;
|
let read_pool = connect_to_db(&db_path, 32).await?;
|
||||||
let write_pool = connect_to_db(&db_path, 1).await?;
|
let write_pool = connect_to_db(&db_path, 1).await?;
|
||||||
|
|
|
@ -99,7 +99,7 @@ async fn fetch_list_cli(links_list_path: String) -> anyhow::Result<()> {
|
||||||
|
|
||||||
let db = Db::connect().await?;
|
let db = Db::connect().await?;
|
||||||
let scraper = Scraper::from_env().await?;
|
let scraper = Scraper::from_env().await?;
|
||||||
let counters = scraper.fetch_list(&db, links, 100).await;
|
let counters = scraper.fetch_list(&db, links).await;
|
||||||
|
|
||||||
println!("Finished: {:?}", counters);
|
println!("Finished: {:?}", counters);
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -128,7 +128,11 @@ impl Scraper {
|
||||||
counters
|
counters
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn fetch_list(&self, db: &Db, links: Vec<String>, n_coroutines: usize) -> Counters {
|
pub async fn fetch_list(&self, db: &Db, links: Vec<String>) -> Counters {
|
||||||
|
let n_coroutines = env::var("N_COROUTINES")
|
||||||
|
.map_or(Ok(24), |s| s.parse::<usize>())
|
||||||
|
.expect("N_COROUTINES no es un número");
|
||||||
|
|
||||||
stream::iter(links)
|
stream::iter(links)
|
||||||
.map(|url| {
|
.map(|url| {
|
||||||
let db = db.clone();
|
let db = db.clone();
|
||||||
|
|
|
@ -56,11 +56,7 @@ pub fn parse(url: String, dom: &tl::VDom) -> Result<PrecioPoint, anyhow::Error>
|
||||||
.find_map(|n| n.as_tag())
|
.find_map(|n| n.as_tag())
|
||||||
.map(|t| t.inner_text(dom.parser()))
|
.map(|t| t.inner_text(dom.parser()))
|
||||||
// https://github.com/catdevnull/preciazo/issues/24
|
// https://github.com/catdevnull/preciazo/issues/24
|
||||||
.map(|s| {
|
.map(|s| html_escape::decode_html_entities(s.trim()).to_string());
|
||||||
html_escape::decode_html_entities(s.trim())
|
|
||||||
.trim()
|
|
||||||
.to_string()
|
|
||||||
});
|
|
||||||
|
|
||||||
let image_url = dom
|
let image_url = dom
|
||||||
.query_selector(".zoomImage1")
|
.query_selector(".zoomImage1")
|
||||||
|
|
|
@ -207,15 +207,14 @@ pub async fn get_best_selling_by_category(
|
||||||
.append_pair("extensions", &{
|
.append_pair("extensions", &{
|
||||||
let variables_obj = json!({"hideUnavailableItems":true,"skusFilter":"FIRST_AVAILABLE","simulationBehavior":"default","installmentCriteria":"MAX_WITHOUT_INTEREST","productOriginVtex":false,"map":"c","query":query,"orderBy":"OrderByTopSaleDESC","from":0,"to":99,"selectedFacets":
|
let variables_obj = json!({"hideUnavailableItems":true,"skusFilter":"FIRST_AVAILABLE","simulationBehavior":"default","installmentCriteria":"MAX_WITHOUT_INTEREST","productOriginVtex":false,"map":"c","query":query,"orderBy":"OrderByTopSaleDESC","from":0,"to":99,"selectedFacets":
|
||||||
query.split('/').map(|f| json!({"key":"c","value":f})).collect::<Vec<_>>()
|
query.split('/').map(|f| json!({"key":"c","value":f})).collect::<Vec<_>>()
|
||||||
,"facetsBehavior":"Static","categoryTreeBehavior":"default",
|
,"facetsBehavior":"Static","categoryTreeBehavior":"default","withFacets":false,"showSponsored":false});
|
||||||
"withFacets":false,"showSponsored":false,"advertisementOptions":{"showSponsored":false,"sponsoredCount":0,"advertisementPlacement":"top_search","repeatSponsoredProducts":true}});
|
|
||||||
let b64=base64::prelude::BASE64_STANDARD.encode(variables_obj.to_string());
|
let b64=base64::prelude::BASE64_STANDARD.encode(variables_obj.to_string());
|
||||||
|
|
||||||
format!(
|
format!(
|
||||||
r#"{{
|
r#"{{
|
||||||
"persistedQuery": {{
|
"persistedQuery": {{
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"sha256Hash": "8e3fd5f65d7d83516bfea23051b11e7aa469d85f26906f27e18afbee52c56ce4",
|
"sha256Hash": "fd92698fe375e8e4fa55d26fa62951d979b790fcf1032a6f02926081d199f550",
|
||||||
"sender": "vtex.store-resources@0.x",
|
"sender": "vtex.store-resources@0.x",
|
||||||
"provider": "vtex.search-graphql@0.x"
|
"provider": "vtex.search-graphql@0.x"
|
||||||
}},
|
}},
|
||||||
|
|
175
sepa-precios-archiver/.gitignore
vendored
175
sepa-precios-archiver/.gitignore
vendored
|
@ -1,175 +0,0 @@
|
||||||
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
|
|
||||||
logs
|
|
||||||
_.log
|
|
||||||
npm-debug.log_
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
.pnpm-debug.log*
|
|
||||||
|
|
||||||
# Caches
|
|
||||||
|
|
||||||
.cache
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
|
|
||||||
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
|
|
||||||
pids
|
|
||||||
_.pid
|
|
||||||
_.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
|
|
||||||
node_modules/
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# Snowpack dependency directory (https://snowpack.dev/)
|
|
||||||
|
|
||||||
web_modules/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
|
|
||||||
.stylelintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variable files
|
|
||||||
|
|
||||||
.env
|
|
||||||
.env.development.local
|
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
|
|
||||||
.parcel-cache
|
|
||||||
|
|
||||||
# Next.js build output
|
|
||||||
|
|
||||||
.next
|
|
||||||
out
|
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
|
||||||
|
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
|
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
|
||||||
|
|
||||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
|
||||||
|
|
||||||
# public
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# vuepress v2.x temp and cache directory
|
|
||||||
|
|
||||||
.temp
|
|
||||||
|
|
||||||
# Docusaurus cache and generated files
|
|
||||||
|
|
||||||
.docusaurus
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# yarn v2
|
|
||||||
|
|
||||||
.yarn/cache
|
|
||||||
.yarn/unplugged
|
|
||||||
.yarn/build-state.yml
|
|
||||||
.yarn/install-state.gz
|
|
||||||
.pnp.*
|
|
||||||
|
|
||||||
# IntelliJ based IDEs
|
|
||||||
.idea
|
|
||||||
|
|
||||||
# Finder (MacOS) folder config
|
|
||||||
.DS_Store
|
|
|
@ -1,19 +0,0 @@
|
||||||
# sepa-precios-archiver
|
|
||||||
|
|
||||||
Archivador del dataset de precios de [Precios Claros - Base SEPA](https://datos.produccion.gob.ar/dataset/sepa-precios). Recomprime para utilizar ~8 veces menos espacio, y resube a un bucket mio de Backblaze B2.
|
|
||||||
|
|
||||||
## Instalación
|
|
||||||
|
|
||||||
Para instalar las dependencias:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bun install
|
|
||||||
```
|
|
||||||
|
|
||||||
Para ejecutarlo:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bun run index.ts
|
|
||||||
```
|
|
||||||
|
|
||||||
This project was created using `bun init` in bun v1.1.25. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
|
|
Binary file not shown.
|
@ -1,190 +0,0 @@
|
||||||
import { z } from "zod";
|
|
||||||
import { zDatasetInfo } from "./schemas";
|
|
||||||
import { mkdtemp, writeFile, readdir, mkdir, rm } from "fs/promises";
|
|
||||||
import { basename, extname, join } from "path";
|
|
||||||
import { $, write } from "bun";
|
|
||||||
import { S3Client, HeadObjectCommand } from "@aws-sdk/client-s3";
|
|
||||||
import { Upload } from "@aws-sdk/lib-storage";
|
|
||||||
|
|
||||||
function checkEnvVariable(variableName: string) {
|
|
||||||
const value = process.env[variableName];
|
|
||||||
if (value) {
|
|
||||||
console.log(`✅ ${variableName} is set`);
|
|
||||||
return value;
|
|
||||||
} else {
|
|
||||||
console.log(`❌ ${variableName} is not set`);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const GITHUB_TOKEN = checkEnvVariable("GITHUB_TOKEN");
|
|
||||||
const B2_BUCKET_NAME = checkEnvVariable("B2_BUCKET_NAME");
|
|
||||||
const B2_BUCKET_KEY_ID = checkEnvVariable("B2_BUCKET_KEY_ID");
|
|
||||||
const B2_BUCKET_KEY = checkEnvVariable("B2_BUCKET_KEY");
|
|
||||||
|
|
||||||
const s3 = new S3Client({
|
|
||||||
endpoint: "https://s3.us-west-004.backblazeb2.com",
|
|
||||||
region: "us-west-004",
|
|
||||||
credentials: {
|
|
||||||
accessKeyId: B2_BUCKET_KEY_ID,
|
|
||||||
secretAccessKey: B2_BUCKET_KEY,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
async function getRawDatasetInfo() {
|
|
||||||
const response = await fetchWithRetry(
|
|
||||||
"https://datos.produccion.gob.ar/api/3/action/package_show?id=sepa-precios",
|
|
||||||
);
|
|
||||||
const json = await response.json();
|
|
||||||
return json;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function saveDatasetInfoIntoRepo(datasetInfo: any) {
|
|
||||||
const dir = await mkdtemp("/tmp/sepa-precios-archiver-metadata-repo-");
|
|
||||||
try {
|
|
||||||
await $`git clone https://catdevnull:${GITHUB_TOKEN}@github.com/catdevnull/sepa-precios-metadata.git ${dir}`;
|
|
||||||
await writeFile(
|
|
||||||
dir + "/dataset-info.json",
|
|
||||||
JSON.stringify(datasetInfo, null, 2),
|
|
||||||
);
|
|
||||||
await $`cd ${dir} && git add dataset-info.json`;
|
|
||||||
await $`cd ${dir} && git config user.email "git@nulo.in" && git config user.name "github actions"`;
|
|
||||||
await $`cd ${dir} && git diff --staged --quiet || git commit -m "Update dataset info"`;
|
|
||||||
await $`cd ${dir} && git push origin main`;
|
|
||||||
} finally {
|
|
||||||
await $`rm -rf ${dir}`;
|
|
||||||
}
|
|
||||||
console.log(`✅ Saved dataset info into repo`);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function checkFileExistsInB2(fileName: string): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
await s3.send(
|
|
||||||
new HeadObjectCommand({
|
|
||||||
Bucket: B2_BUCKET_NAME,
|
|
||||||
Key: fileName,
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
return true;
|
|
||||||
} catch (error) {
|
|
||||||
if ((error as any).name === "NotFound") {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function uploadToB2Bucket(
|
|
||||||
fileName: string,
|
|
||||||
fileContent: ReadableStream | Blob | string,
|
|
||||||
) {
|
|
||||||
const upload = new Upload({
|
|
||||||
client: s3,
|
|
||||||
params: {
|
|
||||||
Bucket: B2_BUCKET_NAME,
|
|
||||||
Key: fileName,
|
|
||||||
Body: fileContent,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
await upload.done();
|
|
||||||
}
|
|
||||||
|
|
||||||
const rawDatasetInfo = await getRawDatasetInfo();
|
|
||||||
|
|
||||||
await saveDatasetInfoIntoRepo(rawDatasetInfo);
|
|
||||||
|
|
||||||
let errored = false;
|
|
||||||
|
|
||||||
async function fetchWithRetry(
|
|
||||||
url: string,
|
|
||||||
maxRetries = 3,
|
|
||||||
waitTime = 15000,
|
|
||||||
): Promise<Response> {
|
|
||||||
let retries = 0;
|
|
||||||
while (retries < maxRetries) {
|
|
||||||
try {
|
|
||||||
const response = await fetch(url, {
|
|
||||||
signal: AbortSignal.timeout(waitTime),
|
|
||||||
});
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP error! status: ${response.status}`);
|
|
||||||
}
|
|
||||||
return response;
|
|
||||||
} catch (error) {
|
|
||||||
console.error(`Attempt ${retries + 1} failed: ${error}`);
|
|
||||||
retries++;
|
|
||||||
if (retries >= maxRetries) {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 1000 * retries));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new Error("Max retries reached");
|
|
||||||
}
|
|
||||||
|
|
||||||
function checkRes(
|
|
||||||
res: Response,
|
|
||||||
): res is Response & { body: ReadableStream<Uint8Array> } {
|
|
||||||
if (!res.ok) {
|
|
||||||
console.error(`❌ Error downloading ${res.url}`);
|
|
||||||
errored = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!res.body) throw new Error(`❌ No body in response`);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
await uploadToB2Bucket(
|
|
||||||
`timestamped-metadata/${new Date().toISOString()}.json`,
|
|
||||||
JSON.stringify(rawDatasetInfo, null, 2),
|
|
||||||
);
|
|
||||||
|
|
||||||
const datasetInfo = z.object({ result: zDatasetInfo }).parse(rawDatasetInfo);
|
|
||||||
for (const resource of datasetInfo.result.resources) {
|
|
||||||
if (extname(resource.url) === ".zip") {
|
|
||||||
const fileName = `${resource.id}-${basename(resource.url)}-repackaged.tar.zst`;
|
|
||||||
if (await checkFileExistsInB2(fileName)) continue;
|
|
||||||
console.log(`⬇️ Downloading, repackaging and uploading ${resource.url}`);
|
|
||||||
const dir = await mkdtemp("/tmp/sepa-precios-archiver-repackage-");
|
|
||||||
console.info(dir);
|
|
||||||
try {
|
|
||||||
const zip = join(dir, "zip");
|
|
||||||
await $`curl --retry 8 --retry-delay 5 --retry-all-errors -L -o ${zip} ${resource.url}`;
|
|
||||||
await $`unzip ${zip} -d ${dir}`;
|
|
||||||
await rm(zip);
|
|
||||||
|
|
||||||
for (const file of await readdir(dir)) {
|
|
||||||
const path = join(dir, file);
|
|
||||||
if (extname(file) !== ".zip") continue;
|
|
||||||
const extractDir = join(dir, basename(file, ".zip"));
|
|
||||||
await mkdir(extractDir, { recursive: true });
|
|
||||||
await $`cd ${dir} && unzip ${path} -d ${extractDir}`;
|
|
||||||
await rm(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
await writeFile(
|
|
||||||
join(dir, "dataset-info.json"),
|
|
||||||
JSON.stringify(rawDatasetInfo, null, 2),
|
|
||||||
);
|
|
||||||
|
|
||||||
const compressed =
|
|
||||||
await $`tar -c -C ${dir} . | zstd -15 --long -T0`.blob();
|
|
||||||
await uploadToB2Bucket(fileName, compressed);
|
|
||||||
} finally {
|
|
||||||
await $`rm -rf ${dir}`;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const fileName = `${resource.id}-${basename(resource.url)}`;
|
|
||||||
if (await checkFileExistsInB2(fileName)) continue;
|
|
||||||
console.log(`⬇️ Downloading and reuploading ${resource.url}`);
|
|
||||||
const response = await fetchWithRetry(resource.url, 3, 60 * 1000);
|
|
||||||
if (!checkRes(response)) continue;
|
|
||||||
|
|
||||||
await uploadToB2Bucket(fileName, response.body);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (errored) {
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
|
@ -1,16 +0,0 @@
|
||||||
{
|
|
||||||
"name": "sepa-precios-archiver",
|
|
||||||
"module": "index.ts",
|
|
||||||
"type": "module",
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/bun": "latest"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"typescript": "^5.5.4"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"@aws-sdk/client-s3": "^3.637.0",
|
|
||||||
"@aws-sdk/lib-storage": "^3.637.0",
|
|
||||||
"zod": "^3.23.8"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,17 +0,0 @@
|
||||||
import { z } from "zod";
|
|
||||||
|
|
||||||
export const zDatasetInfo = z.object({
|
|
||||||
metadata_modified: z.coerce.date(),
|
|
||||||
metadata_created: z.coerce.date(),
|
|
||||||
resources: z.array(
|
|
||||||
z.object({
|
|
||||||
id: z.string(),
|
|
||||||
size: z.number(),
|
|
||||||
format: z.string(),
|
|
||||||
created: z.coerce.date(),
|
|
||||||
url: z.string(),
|
|
||||||
modified: z.coerce.date().optional(),
|
|
||||||
description: z.string(),
|
|
||||||
}),
|
|
||||||
),
|
|
||||||
});
|
|
|
@ -1,27 +0,0 @@
|
||||||
{
|
|
||||||
"compilerOptions": {
|
|
||||||
// Enable latest features
|
|
||||||
"lib": ["ESNext", "DOM"],
|
|
||||||
"target": "ESNext",
|
|
||||||
"module": "ESNext",
|
|
||||||
"moduleDetection": "force",
|
|
||||||
"jsx": "react-jsx",
|
|
||||||
"allowJs": true,
|
|
||||||
|
|
||||||
// Bundler mode
|
|
||||||
"moduleResolution": "bundler",
|
|
||||||
"allowImportingTsExtensions": true,
|
|
||||||
"verbatimModuleSyntax": true,
|
|
||||||
"noEmit": true,
|
|
||||||
|
|
||||||
// Best practices
|
|
||||||
"strict": true,
|
|
||||||
"skipLibCheck": true,
|
|
||||||
"noFallthroughCasesInSwitch": true,
|
|
||||||
|
|
||||||
// Some stricter flags (disabled by default)
|
|
||||||
"noUnusedLocals": false,
|
|
||||||
"noUnusedParameters": false,
|
|
||||||
"noPropertyAccessFromIndexSignature": false
|
|
||||||
}
|
|
||||||
}
|
|
175
sepa-precios-importer/.gitignore
vendored
175
sepa-precios-importer/.gitignore
vendored
|
@ -1,175 +0,0 @@
|
||||||
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
|
|
||||||
logs
|
|
||||||
_.log
|
|
||||||
npm-debug.log_
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
lerna-debug.log*
|
|
||||||
.pnpm-debug.log*
|
|
||||||
|
|
||||||
# Caches
|
|
||||||
|
|
||||||
.cache
|
|
||||||
|
|
||||||
# Diagnostic reports (https://nodejs.org/api/report.html)
|
|
||||||
|
|
||||||
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
|
|
||||||
pids
|
|
||||||
_.pid
|
|
||||||
_.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
|
|
||||||
coverage
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
|
|
||||||
node_modules/
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# Snowpack dependency directory (https://snowpack.dev/)
|
|
||||||
|
|
||||||
web_modules/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Optional stylelint cache
|
|
||||||
|
|
||||||
.stylelintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
|
|
||||||
*.tgz
|
|
||||||
|
|
||||||
# Yarn Integrity file
|
|
||||||
|
|
||||||
.yarn-integrity
|
|
||||||
|
|
||||||
# dotenv environment variable files
|
|
||||||
|
|
||||||
.env
|
|
||||||
.env.development.local
|
|
||||||
.env.test.local
|
|
||||||
.env.production.local
|
|
||||||
.env.local
|
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
|
||||||
|
|
||||||
.parcel-cache
|
|
||||||
|
|
||||||
# Next.js build output
|
|
||||||
|
|
||||||
.next
|
|
||||||
out
|
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
|
||||||
|
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
|
|
||||||
# Comment in the public line in if your project uses Gatsby and not Next.js
|
|
||||||
|
|
||||||
# https://nextjs.org/blog/next-9-1#public-directory-support
|
|
||||||
|
|
||||||
# public
|
|
||||||
|
|
||||||
# vuepress build output
|
|
||||||
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# vuepress v2.x temp and cache directory
|
|
||||||
|
|
||||||
.temp
|
|
||||||
|
|
||||||
# Docusaurus cache and generated files
|
|
||||||
|
|
||||||
.docusaurus
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# yarn v2
|
|
||||||
|
|
||||||
.yarn/cache
|
|
||||||
.yarn/unplugged
|
|
||||||
.yarn/build-state.yml
|
|
||||||
.yarn/install-state.gz
|
|
||||||
.pnp.*
|
|
||||||
|
|
||||||
# IntelliJ based IDEs
|
|
||||||
.idea
|
|
||||||
|
|
||||||
# Finder (MacOS) folder config
|
|
||||||
.DS_Store
|
|
|
@ -1,14 +0,0 @@
|
||||||
# sepa-precios-importer
|
|
||||||
|
|
||||||
Importador de [datasets de precios de SEPA](https://datos.produccion.gob.ar/dataset/sepa-precios/archivo/d076720f-a7f0-4af8-b1d6-1b99d5a90c14) a una base de datos PostgreSQL.
|
|
||||||
|
|
||||||
Vease [Errores en el formato de los datos SEPA](https://gist.github.com/catdevnull/587d5c63c4bab11b9798861c917db93b)
|
|
||||||
|
|
||||||
To install dependencies:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
bun install
|
|
||||||
bun run index.ts ~/carpeta-con-datasets-descomprimidos
|
|
||||||
```
|
|
||||||
|
|
||||||
This project was created using `bun init` in bun v1.1.26. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
|
|
Binary file not shown.
|
@ -1,231 +0,0 @@
|
||||||
import { readFile } from "fs/promises";
|
|
||||||
import Papa from "papaparse";
|
|
||||||
import { basename, join, dirname } from "path";
|
|
||||||
import postgres from "postgres";
|
|
||||||
import { Readable } from "stream";
|
|
||||||
import { pipeline } from "node:stream/promises";
|
|
||||||
import { Glob } from "bun";
|
|
||||||
|
|
||||||
const sql = postgres({
|
|
||||||
database: "sepa-precios",
|
|
||||||
});
|
|
||||||
|
|
||||||
// await sql`
|
|
||||||
// drop table if exists precios;`;
|
|
||||||
// await sql`
|
|
||||||
// drop table if exists datasets;`;
|
|
||||||
await sql`
|
|
||||||
CREATE TABLE if not exists datasets (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
name TEXT UNIQUE,
|
|
||||||
date DATE
|
|
||||||
);`;
|
|
||||||
await sql`
|
|
||||||
CREATE TABLE if not exists sucursales (
|
|
||||||
id_dataset INTEGER REFERENCES datasets(id),
|
|
||||||
id_comercio INTEGER,
|
|
||||||
id_bandera INTEGER,
|
|
||||||
id_sucursal INTEGER,
|
|
||||||
sucursales_nombre TEXT,
|
|
||||||
sucursales_tipo TEXT,
|
|
||||||
sucursales_calle TEXT,
|
|
||||||
sucursales_numero TEXT,
|
|
||||||
sucursales_latitud NUMERIC,
|
|
||||||
sucursales_longitud NUMERIC,
|
|
||||||
sucursales_observaciones TEXT,
|
|
||||||
sucursales_barrio TEXT,
|
|
||||||
sucursales_codigo_postal TEXT,
|
|
||||||
sucursales_localidad TEXT,
|
|
||||||
sucursales_provincia TEXT,
|
|
||||||
sucursales_lunes_horario_atencion TEXT,
|
|
||||||
sucursales_martes_horario_atencion TEXT,
|
|
||||||
sucursales_miercoles_horario_atencion TEXT,
|
|
||||||
sucursales_jueves_horario_atencion TEXT,
|
|
||||||
sucursales_viernes_horario_atencion TEXT,
|
|
||||||
sucursales_sabado_horario_atencion TEXT,
|
|
||||||
sucursales_domingo_horario_atencion TEXT,
|
|
||||||
UNIQUE (id_dataset, id_comercio, id_bandera, id_sucursal)
|
|
||||||
);`;
|
|
||||||
await sql`
|
|
||||||
CREATE TABLE if not exists precios (
|
|
||||||
id_dataset INTEGER REFERENCES datasets(id),
|
|
||||||
id_comercio INTEGER,
|
|
||||||
id_bandera INTEGER,
|
|
||||||
id_sucursal INTEGER,
|
|
||||||
id_producto BIGINT,
|
|
||||||
productos_ean INTEGER,
|
|
||||||
productos_descripcion TEXT,
|
|
||||||
productos_cantidad_presentacion NUMERIC(10, 2),
|
|
||||||
productos_unidad_medida_presentacion TEXT,
|
|
||||||
productos_marca TEXT,
|
|
||||||
productos_precio_lista NUMERIC(10, 2),
|
|
||||||
productos_precio_referencia NUMERIC(10, 2),
|
|
||||||
productos_cantidad_referencia NUMERIC(10, 2),
|
|
||||||
productos_unidad_medida_referencia TEXT,
|
|
||||||
productos_precio_unitario_promo1 NUMERIC(10, 2),
|
|
||||||
productos_leyenda_promo1 TEXT,
|
|
||||||
productos_precio_unitario_promo2 NUMERIC(10, 2),
|
|
||||||
productos_leyenda_promo2 TEXT,
|
|
||||||
FOREIGN KEY (id_dataset, id_comercio, id_bandera, id_sucursal) REFERENCES sucursales(id_dataset, id_comercio, id_bandera, id_sucursal)
|
|
||||||
);
|
|
||||||
`;
|
|
||||||
|
|
||||||
async function importSucursales(
|
|
||||||
sql: postgres.Sql,
|
|
||||||
datasetId: number,
|
|
||||||
dir: string,
|
|
||||||
) {
|
|
||||||
const sucursales: Papa.ParseResult<any> = Papa.parse(
|
|
||||||
await readFile(join(dir, "sucursales.csv"), "utf-8"),
|
|
||||||
{
|
|
||||||
header: true,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
const objs = sucursales.data
|
|
||||||
.filter((data) => data.id_comercio && data.id_bandera && data.id_sucursal)
|
|
||||||
.map((data) => {
|
|
||||||
// Megatone
|
|
||||||
if ("sucursales_domingohorario_atencion" in data) {
|
|
||||||
data.sucursales_domingo_horario_atencion =
|
|
||||||
data.sucursales_domingohorario_atencion;
|
|
||||||
delete data.sucursales_domingohorario_atencion;
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
id_dataset: datasetId,
|
|
||||||
...data,
|
|
||||||
};
|
|
||||||
});
|
|
||||||
const keys = Object.keys(objs[0]);
|
|
||||||
const lines = Readable.from(
|
|
||||||
objs.map((data) => keys.map((key) => (data as any)[key]).join("\t") + "\n"),
|
|
||||||
);
|
|
||||||
const writable =
|
|
||||||
await sql`copy sucursales (${sql.unsafe(keys.join(", "))}) from stdin with CSV DELIMITER E'\t' QUOTE E'\b'`.writable();
|
|
||||||
await pipeline(lines, writable);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function importDataset(dir: string) {
|
|
||||||
const date = basename(dir).match(/(\d{4}-\d{2}-\d{2})/)![1];
|
|
||||||
// TODO: parsear "Ultima actualizacion" al final del CSV y insertarlo en la tabla datasets
|
|
||||||
|
|
||||||
// {
|
|
||||||
// const res =
|
|
||||||
// await sql`select id from datasets where name = ${basename(dir)}`;
|
|
||||||
// await importSucursales(sql, res[0].id, dir);
|
|
||||||
// }
|
|
||||||
|
|
||||||
try {
|
|
||||||
await sql.begin(async (sql) => {
|
|
||||||
let datasetId: number;
|
|
||||||
const res =
|
|
||||||
await sql`insert into datasets (name, date) values (${basename(dir)}, ${date}) returning id`;
|
|
||||||
datasetId = res[0].id;
|
|
||||||
const datas: any[] = [];
|
|
||||||
|
|
||||||
const comercios: Papa.ParseResult<{ comercio_cuit: string }> = Papa.parse(
|
|
||||||
await readFile(join(dir, "comercio.csv"), "utf-8"),
|
|
||||||
{ header: true },
|
|
||||||
);
|
|
||||||
const comercioCuit = comercios.data[0].comercio_cuit;
|
|
||||||
console.log(`dataset ${datasetId}, comercio ${comercioCuit}`);
|
|
||||||
|
|
||||||
await importSucursales(sql, datasetId, dir);
|
|
||||||
|
|
||||||
let file = await readFile(join(dir, "productos.csv"), "utf-8");
|
|
||||||
// WALL OF SHAME: estos proveedores no saben producir CSVs correctos
|
|
||||||
if (comercioCuit == "30612929455") {
|
|
||||||
// Libertad S.A.
|
|
||||||
file = file.replaceAll("|RAPTOR 6X16X45", "/RAPTOR 6X16X45");
|
|
||||||
} else if (comercioCuit == "30578411174") {
|
|
||||||
// Alberdi S.A.
|
|
||||||
file = file.replaceAll(";", "|");
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
["33504047089", "30707429468", "30589621499"].includes(comercioCuit)
|
|
||||||
) {
|
|
||||||
// TODO: si tienen los valores, pero con otros nombres, por ejemplo
|
|
||||||
// productos_precio_lista seria precio_unitario_bulto_por_unidad_venta_con_iva.
|
|
||||||
// pero no quiero mentir, asi que por ahora no lo importo
|
|
||||||
console.error(
|
|
||||||
`No voy a importar el dataset ${dir} porque el formato está mal. Pero se podría importar. Pero por ahora no lo voy a hacer. Véase https://gist.github.com/catdevnull/587d5c63c4bab11b9798861c917db93b`,
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
console.time("parse");
|
|
||||||
return await new Promise((resolve, reject) => {
|
|
||||||
Papa.parse(file, {
|
|
||||||
header: true,
|
|
||||||
step: function (result: any) {
|
|
||||||
const { data } = result;
|
|
||||||
if (
|
|
||||||
data.id_comercio &&
|
|
||||||
data.id_bandera &&
|
|
||||||
data.id_sucursal &&
|
|
||||||
data.id_producto
|
|
||||||
)
|
|
||||||
datas.push(data);
|
|
||||||
},
|
|
||||||
complete: async function () {
|
|
||||||
try {
|
|
||||||
console.timeEnd("parse");
|
|
||||||
console.time("map");
|
|
||||||
const objs = datas.map((data) => {
|
|
||||||
delete data.id_dun_14;
|
|
||||||
return {
|
|
||||||
id_dataset: datasetId,
|
|
||||||
...data,
|
|
||||||
productos_descripcion: data.productos_descripcion.replaceAll(
|
|
||||||
"\t",
|
|
||||||
" ",
|
|
||||||
),
|
|
||||||
};
|
|
||||||
});
|
|
||||||
if (!objs.length) {
|
|
||||||
console.error(`No hay datos para el dataset ${dir}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const keys = Object.keys(objs[0]);
|
|
||||||
const lines = Readable.from(
|
|
||||||
objs.map(
|
|
||||||
(data) => keys.map((key) => data[key]).join("\t") + "\n",
|
|
||||||
),
|
|
||||||
);
|
|
||||||
console.timeEnd("map");
|
|
||||||
console.time("copy");
|
|
||||||
const writable =
|
|
||||||
await sql`copy precios (${sql.unsafe(keys.join(", "))}) from stdin with CSV DELIMITER E'\t' QUOTE E'\b'`.writable();
|
|
||||||
await pipeline(lines, writable);
|
|
||||||
console.timeEnd("copy");
|
|
||||||
console.info(`saved ${objs.length} rows`);
|
|
||||||
} catch (e) {
|
|
||||||
reject(e);
|
|
||||||
return;
|
|
||||||
} finally {
|
|
||||||
Bun.gc(true);
|
|
||||||
resolve(void 0);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
skipEmptyLines: true,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
} catch (e) {
|
|
||||||
if ((e as any).code == "23505") {
|
|
||||||
console.log(`dataset ${basename(dir)} already exists`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const glob = new Glob("**/productos.csv");
|
|
||||||
for await (const file of glob.scan(process.argv[2])) {
|
|
||||||
const dir = join(process.argv[2], dirname(file));
|
|
||||||
console.log(dir);
|
|
||||||
await importDataset(dir);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
await sql.end();
|
|
||||||
}
|
|
|
@ -1,17 +0,0 @@
|
||||||
{
|
|
||||||
"name": "sepa-precios-importer",
|
|
||||||
"module": "index.ts",
|
|
||||||
"type": "module",
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/bun": "^1.1.7",
|
|
||||||
"@types/papaparse": "^5.3.14"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"typescript": "^5.0.0"
|
|
||||||
},
|
|
||||||
"dependencies": {
|
|
||||||
"p-queue": "^8.0.1",
|
|
||||||
"papaparse": "^5.4.1",
|
|
||||||
"postgres": "^3.4.4"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,27 +0,0 @@
|
||||||
{
|
|
||||||
"compilerOptions": {
|
|
||||||
// Enable latest features
|
|
||||||
"lib": ["ESNext", "DOM"],
|
|
||||||
"target": "ESNext",
|
|
||||||
"module": "ESNext",
|
|
||||||
"moduleDetection": "force",
|
|
||||||
"jsx": "react-jsx",
|
|
||||||
"allowJs": true,
|
|
||||||
|
|
||||||
// Bundler mode
|
|
||||||
"moduleResolution": "bundler",
|
|
||||||
"allowImportingTsExtensions": true,
|
|
||||||
"verbatimModuleSyntax": true,
|
|
||||||
"noEmit": true,
|
|
||||||
|
|
||||||
// Best practices
|
|
||||||
"strict": true,
|
|
||||||
"skipLibCheck": true,
|
|
||||||
"noFallthroughCasesInSwitch": true,
|
|
||||||
|
|
||||||
// Some stricter flags (disabled by default)
|
|
||||||
"noUnusedLocals": false,
|
|
||||||
"noUnusedParameters": false,
|
|
||||||
"noPropertyAccessFromIndexSignature": false
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,2 +0,0 @@
|
||||||
DB_PATH=../db.db
|
|
||||||
VITE_API_HOST=http://localhost:8000
|
|
2
sitio/.gitignore
vendored
2
sitio/.gitignore
vendored
|
@ -4,7 +4,7 @@ node_modules
|
||||||
/.svelte-kit
|
/.svelte-kit
|
||||||
/package
|
/package
|
||||||
.env
|
.env
|
||||||
*.local
|
.env.*
|
||||||
!.env.example
|
!.env.example
|
||||||
vite.config.js.timestamp-*
|
vite.config.js.timestamp-*
|
||||||
vite.config.ts.timestamp-*
|
vite.config.ts.timestamp-*
|
||||||
|
|
|
@ -40,7 +40,6 @@
|
||||||
"chartjs-adapter-dayjs-4": "^1.0.4",
|
"chartjs-adapter-dayjs-4": "^1.0.4",
|
||||||
"dayjs": "^1.11.10",
|
"dayjs": "^1.11.10",
|
||||||
"drizzle-orm": "^0.32.0",
|
"drizzle-orm": "^0.32.0",
|
||||||
"ky": "^1.5.0",
|
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
"packageManager": "pnpm@9.5.0+sha512.140036830124618d624a2187b50d04289d5a087f326c9edfc0ccd733d76c4f52c3a313d4fc148794a2a9d81553016004e6742e8cf850670268a7387fc220c903"
|
"packageManager": "pnpm@9.5.0+sha512.140036830124618d624a2187b50d04289d5a087f326c9edfc0ccd733d76c4f52c3a313d4fc148794a2a9d81553016004e6742e8cf850670268a7387fc220c903"
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
<script lang="ts" context="module">
|
<script lang="ts" context="module">
|
||||||
export type Product = {
|
export type Product = { ean: string; name: string; imageUrl: string | null };
|
||||||
ean: string;
|
|
||||||
name: string | null;
|
|
||||||
image_url: string | null;
|
|
||||||
};
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
@ -11,9 +7,9 @@
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<a href={`/ean/${product.ean}`} class="flex gap-2">
|
<a href={`/ean/${product.ean}`} class="flex gap-2">
|
||||||
{#if product.image_url}
|
{#if product.imageUrl}
|
||||||
<img
|
<img
|
||||||
src={product.image_url}
|
src={product.imageUrl}
|
||||||
alt={product.name}
|
alt={product.name}
|
||||||
class="max-h-48"
|
class="max-h-48"
|
||||||
loading="lazy"
|
loading="lazy"
|
||||||
|
|
|
@ -1,2 +1 @@
|
||||||
// place files you want to import through the `$lib` alias in this folder.
|
// place files you want to import through the `$lib` alias in this folder.
|
||||||
export const API_HOST = import.meta.env.VITE_API_HOST;
|
|
||||||
|
|
2
sitio/src/lib/server/db.ts
Normal file
2
sitio/src/lib/server/db.ts
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
export { getDb } from "db-datos/db.js";
|
||||||
|
export * as schema from "db-datos/schema.js";
|
|
@ -1,17 +1,15 @@
|
||||||
|
import { countDistinct } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { z } from "zod";
|
import { getDb, schema } from "$lib/server/db";
|
||||||
import ky from "ky";
|
const { precios } = schema;
|
||||||
import { API_HOST } from "$lib";
|
|
||||||
|
|
||||||
async function getInfo() {
|
|
||||||
return z
|
|
||||||
.object({
|
|
||||||
count: z.number(),
|
|
||||||
})
|
|
||||||
.parse(await ky.get(`${API_HOST}/api/0/info`).json());
|
|
||||||
}
|
|
||||||
|
|
||||||
export const load: PageServerLoad = async () => {
|
export const load: PageServerLoad = async () => {
|
||||||
const nProductos = (await getInfo()).count;
|
const db = await getDb();
|
||||||
|
const nProductosR = await db
|
||||||
|
.select({
|
||||||
|
count: countDistinct(precios.ean),
|
||||||
|
})
|
||||||
|
.from(precios);
|
||||||
|
const nProductos = nProductosR[0].count;
|
||||||
return { nProductos };
|
return { nProductos };
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,29 +1,68 @@
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
|
import { getDb, schema } from "$lib/server/db";
|
||||||
|
const { precios, bestSelling } = schema;
|
||||||
|
import { max, sql } from "drizzle-orm";
|
||||||
import z from "zod";
|
import z from "zod";
|
||||||
|
import type { Product } from "$lib/ProductPreview.svelte";
|
||||||
|
|
||||||
async function getBestSelling() {
|
type Data = {
|
||||||
const res = await fetch(
|
category: string;
|
||||||
`${import.meta.env.VITE_API_HOST}/api/0/best-selling-products`,
|
products: Product[];
|
||||||
|
}[];
|
||||||
|
|
||||||
|
let cache: Promise<{ key: Date; data: Data }> = doQuery();
|
||||||
|
|
||||||
|
async function doQuery() {
|
||||||
|
const db = await getDb();
|
||||||
|
|
||||||
|
const categories = await db
|
||||||
|
.select({
|
||||||
|
fetchedAt: bestSelling.fetchedAt,
|
||||||
|
category: bestSelling.category,
|
||||||
|
eansJson: bestSelling.eansJson,
|
||||||
|
})
|
||||||
|
.from(bestSelling)
|
||||||
|
.groupBy(bestSelling.category)
|
||||||
|
.having(max(bestSelling.fetchedAt));
|
||||||
|
|
||||||
|
const categoriesWithProducts = await Promise.all(
|
||||||
|
categories.map(async (category) => {
|
||||||
|
const eans = z.array(z.string()).parse(JSON.parse(category.eansJson));
|
||||||
|
|
||||||
|
const products = await db
|
||||||
|
.select({
|
||||||
|
ean: precios.ean,
|
||||||
|
name: precios.name,
|
||||||
|
imageUrl: precios.imageUrl,
|
||||||
|
})
|
||||||
|
.from(precios)
|
||||||
|
.where(sql`${precios.ean} in ${eans}`)
|
||||||
|
.groupBy(precios.ean)
|
||||||
|
.having(max(precios.fetchedAt));
|
||||||
|
|
||||||
|
return {
|
||||||
|
category: category.category,
|
||||||
|
products: eans
|
||||||
|
.map((ean) => products.find((p) => p.ean === ean))
|
||||||
|
.filter((x): x is Product => !!x && !!x.name),
|
||||||
|
};
|
||||||
|
}),
|
||||||
);
|
);
|
||||||
const json = await res.json();
|
|
||||||
return z
|
return { key: new Date(), data: categoriesWithProducts };
|
||||||
.array(
|
|
||||||
z.object({
|
|
||||||
category: z.string(),
|
|
||||||
products: z.array(
|
|
||||||
z.object({
|
|
||||||
ean: z.string(),
|
|
||||||
name: z.string().nullable(),
|
|
||||||
image_url: z.string().nullable(),
|
|
||||||
}),
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
.parse(json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({ params }) => {
|
console.log("setting up interval");
|
||||||
return {
|
setInterval(
|
||||||
data: await getBestSelling(),
|
async () => {
|
||||||
};
|
const c = await doQuery();
|
||||||
|
cache = Promise.resolve(c);
|
||||||
|
},
|
||||||
|
4 * 60 * 60 * 1000,
|
||||||
|
);
|
||||||
|
|
||||||
|
export const load: PageServerLoad = async ({
|
||||||
|
params,
|
||||||
|
}): Promise<{ data: Data }> => {
|
||||||
|
return { data: (await cache).data };
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,23 +1,20 @@
|
||||||
import { error } from "@sveltejs/kit";
|
import { error } from "@sveltejs/kit";
|
||||||
|
import { eq } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { z } from "zod";
|
import { getDb, schema } from "$lib/server/db";
|
||||||
import { zPrecio, type Precio } from "./common";
|
const { precios } = schema;
|
||||||
import { API_HOST } from "$lib";
|
|
||||||
|
|
||||||
async function getProductHistory(ean: string) {
|
|
||||||
const res = await fetch(`${API_HOST}/api/0/ean/${ean}/history`);
|
|
||||||
const json = await res.json();
|
|
||||||
return z.array(zPrecio).parse(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({ params }) => {
|
export const load: PageServerLoad = async ({ params }) => {
|
||||||
const res = await getProductHistory(params.ean);
|
const db = await getDb();
|
||||||
|
const q = db
|
||||||
|
.select()
|
||||||
|
.from(precios)
|
||||||
|
.where(eq(precios.ean, params.ean))
|
||||||
|
.orderBy(precios.fetchedAt);
|
||||||
|
const res = await q;
|
||||||
if (res.length === 0) return error(404, "Not Found");
|
if (res.length === 0) return error(404, "Not Found");
|
||||||
|
|
||||||
const meta = res.findLast(
|
const meta = res.findLast((p) => p.name);
|
||||||
(p): p is Precio & { name: string; image_url: string } =>
|
|
||||||
!!(p.name && p.image_url),
|
|
||||||
);
|
|
||||||
|
|
||||||
return { precios: res, meta };
|
return { precios: res, meta };
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { Supermercado, hosts } from "db-datos/supermercado";
|
import { Supermercado, hosts } from "db-datos/supermercado";
|
||||||
|
import * as schema from "db-datos/schema";
|
||||||
import type { PageData } from "./$types";
|
import type { PageData } from "./$types";
|
||||||
import Chart from "./Chart.svelte";
|
import Chart from "./Chart.svelte";
|
||||||
import type { Precio } from "./common";
|
|
||||||
|
|
||||||
export let data: PageData;
|
export let data: PageData;
|
||||||
|
|
||||||
let urls: Map<Supermercado, Precio>;
|
let urls: Map<Supermercado, schema.Precio>;
|
||||||
$: urls = data.precios.reduce((prev, curr) => {
|
$: urls = data.precios.reduce((prev, curr) => {
|
||||||
const url = new URL(curr.url);
|
const url = new URL(curr.url);
|
||||||
const supermercado = hosts[url.hostname];
|
const supermercado = hosts[url.hostname];
|
||||||
prev.set(supermercado, curr);
|
prev.set(supermercado, curr);
|
||||||
return prev;
|
return prev;
|
||||||
}, new Map<Supermercado, Precio>());
|
}, new Map<Supermercado, schema.Precio>());
|
||||||
|
|
||||||
const classBySupermercado: { [supermercado in Supermercado]: string } = {
|
const classBySupermercado: { [supermercado in Supermercado]: string } = {
|
||||||
[Supermercado.Dia]: "bg-[#d52b1e] focus:ring-[#d52b1e]",
|
[Supermercado.Dia]: "bg-[#d52b1e] focus:ring-[#d52b1e]",
|
||||||
|
@ -30,18 +30,18 @@
|
||||||
|
|
||||||
{#if data.meta}
|
{#if data.meta}
|
||||||
<h1 class="text-3xl font-bold">{data.meta.name}</h1>
|
<h1 class="text-3xl font-bold">{data.meta.name}</h1>
|
||||||
<img src={data.meta.image_url} alt={data.meta.name} class="max-h-48" />
|
<img src={data.meta.imageUrl} alt={data.meta.name} class="max-h-48" />
|
||||||
<div class="flex gap-2">
|
<div class="flex gap-2">
|
||||||
{#each urls as [supermercado, { url, precio_centavos }]}
|
{#each urls as [supermercado, { url, precioCentavos }]}
|
||||||
<a
|
<a
|
||||||
href={url}
|
href={url}
|
||||||
rel="noreferrer noopener"
|
rel="noreferrer noopener"
|
||||||
target="_blank"
|
target="_blank"
|
||||||
class={`focus:shadow-outline inline-flex flex-col items-center justify-center rounded-md ${classBySupermercado[supermercado]} px-4 py-2 font-medium tracking-wide text-white transition-colors duration-200 hover:bg-opacity-80 focus:outline-none focus:ring-2 focus:ring-offset-2`}
|
class={`focus:shadow-outline inline-flex flex-col items-center justify-center rounded-md ${classBySupermercado[supermercado]} px-4 py-2 font-medium tracking-wide text-white transition-colors duration-200 hover:bg-opacity-80 focus:outline-none focus:ring-2 focus:ring-offset-2`}
|
||||||
>
|
>
|
||||||
{#if precio_centavos}
|
{#if precioCentavos}
|
||||||
<span class="text-lg font-bold"
|
<span class="text-lg font-bold"
|
||||||
>{formatter.format(precio_centavos / 100)}</span
|
>{formatter.format(precioCentavos / 100)}</span
|
||||||
>
|
>
|
||||||
{/if}
|
{/if}
|
||||||
<span class="text-sm">{supermercado}</span>
|
<span class="text-sm">{supermercado}</span>
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import type { Precio } from "db-datos/schema";
|
||||||
// import dayjs from "dayjs";
|
// import dayjs from "dayjs";
|
||||||
import ChartJs from "./ChartJs.svelte";
|
import ChartJs from "./ChartJs.svelte";
|
||||||
import { hosts, colorBySupermercado } from "db-datos/supermercado";
|
import { hosts, colorBySupermercado } from "db-datos/supermercado";
|
||||||
import type { Precio } from "./common";
|
|
||||||
|
|
||||||
export let precios: Precio[];
|
export let precios: Precio[];
|
||||||
|
|
||||||
|
@ -15,15 +15,15 @@
|
||||||
const ps = precios
|
const ps = precios
|
||||||
.filter((p) => new URL(p.url!).hostname === host)
|
.filter((p) => new URL(p.url!).hostname === host)
|
||||||
.filter(
|
.filter(
|
||||||
(p): p is Precio & { precio_centavos: number } =>
|
(p): p is Precio & { precioCentavos: number } =>
|
||||||
p.precio_centavos !== null,
|
p.precioCentavos !== null,
|
||||||
);
|
);
|
||||||
return {
|
return {
|
||||||
label: supermercado,
|
label: supermercado,
|
||||||
data: [
|
data: [
|
||||||
...ps.map((p) => ({
|
...ps.map((p) => ({
|
||||||
x: p.fetched_at,
|
x: p.fetchedAt,
|
||||||
y: p.precio_centavos / 100,
|
y: p.precioCentavos / 100,
|
||||||
})),
|
})),
|
||||||
// lie
|
// lie
|
||||||
// ...ps.map((p) => ({
|
// ...ps.map((p) => ({
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
import { z } from "zod";
|
|
||||||
|
|
||||||
export const zPrecio = z.object({
|
|
||||||
ean: z.string(),
|
|
||||||
fetched_at: z.coerce.date(),
|
|
||||||
precio_centavos: z.number().nullable(),
|
|
||||||
in_stock: z.boolean().nullable(),
|
|
||||||
url: z.string(),
|
|
||||||
name: z.string().nullable(),
|
|
||||||
image_url: z.string().nullable(),
|
|
||||||
});
|
|
||||||
export type Precio = z.infer<typeof zPrecio>;
|
|
|
@ -1,29 +1,26 @@
|
||||||
import { z } from "zod";
|
import { sql } from "drizzle-orm";
|
||||||
import type { PageServerLoad } from "./$types";
|
import type { PageServerLoad } from "./$types";
|
||||||
import { API_HOST } from "$lib";
|
import { getDb } from "$lib/server/db";
|
||||||
import ky from "ky";
|
|
||||||
|
|
||||||
const zProductResult = z.object({
|
|
||||||
ean: z.string(),
|
|
||||||
name: z.string(),
|
|
||||||
image_url: z.string(),
|
|
||||||
});
|
|
||||||
|
|
||||||
async function search(query: string) {
|
|
||||||
return z
|
|
||||||
.array(zProductResult)
|
|
||||||
.parse(
|
|
||||||
await ky
|
|
||||||
.get(`${API_HOST}/api/0/search/${encodeURIComponent(query)}`)
|
|
||||||
.json(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
export const load: PageServerLoad = async ({ url }) => {
|
export const load: PageServerLoad = async ({ url }) => {
|
||||||
|
const db = await getDb();
|
||||||
const query = url.searchParams.get("q");
|
const query = url.searchParams.get("q");
|
||||||
let results: null | { ean: string; name: string; image_url: string }[] = query
|
let results: null | { ean: string; name: string; imageUrl: string }[] = null;
|
||||||
? await search(query)
|
if (query) {
|
||||||
: null;
|
const sQuery = query
|
||||||
|
.replaceAll(`"`, `""`)
|
||||||
|
.split(" ")
|
||||||
|
.map((s) => `"${s}"`)
|
||||||
|
.join(" ");
|
||||||
|
console.debug(sQuery);
|
||||||
|
const sqlQuery = sql`select p.ean, p.name, p.image_url as imageUrl from precios_fts f
|
||||||
|
join precios p on p.ean = f.ean
|
||||||
|
where f.name match ${sQuery}
|
||||||
|
group by p.ean
|
||||||
|
having max(p.fetched_at)
|
||||||
|
order by p.in_stock desc;`;
|
||||||
|
results = db.all(sqlQuery);
|
||||||
|
}
|
||||||
|
|
||||||
return { query, results };
|
return { query, results };
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue