mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 03:26:19 +00:00
Compare commits
3 commits
d495acfc9d
...
e64b993069
Author | SHA1 | Date | |
---|---|---|---|
e64b993069 | |||
1b55f47815 | |||
ccb5b2c2ef |
40 changed files with 2637 additions and 1696 deletions
36
.github/workflows/container.yml
vendored
36
.github/workflows/container.yml
vendored
|
@ -86,8 +86,40 @@ jobs:
|
|||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:scraper-rs/"
|
||||
file: Dockerfile
|
||||
context: "{{defaultContext}}:rust/"
|
||||
file: scraper.Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||
cache-to: type=inline
|
||||
platforms: linux/amd64
|
||||
|
||||
build-and-push-api-amd64:
|
||||
name: "[amd64] oci:api"
|
||||
runs-on: ubicloud-standard-16
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/api
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: "{{defaultContext}}:rust/"
|
||||
file: api.Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
|
1
db-datos/drizzle/0012_hard_red_wolf.sql
Normal file
1
db-datos/drizzle/0012_hard_red_wolf.sql
Normal file
|
@ -0,0 +1 @@
|
|||
CREATE INDEX `precios_fetched_at_idx` ON `precios` (`fetched_at`);
|
197
db-datos/drizzle/meta/0012_snapshot.json
Normal file
197
db-datos/drizzle/meta/0012_snapshot.json
Normal file
|
@ -0,0 +1,197 @@
|
|||
{
|
||||
"version": "5",
|
||||
"dialect": "sqlite",
|
||||
"id": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
|
||||
"prevId": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
|
||||
"tables": {
|
||||
"db_best_selling": {
|
||||
"name": "db_best_selling",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"category": {
|
||||
"name": "category",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"eans_json": {
|
||||
"name": "eans_json",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
},
|
||||
"precios": {
|
||||
"name": "precios",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"ean": {
|
||||
"name": "ean",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"fetched_at": {
|
||||
"name": "fetched_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"precio_centavos": {
|
||||
"name": "precio_centavos",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"in_stock": {
|
||||
"name": "in_stock",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"warc_record_id": {
|
||||
"name": "warc_record_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"parser_version": {
|
||||
"name": "parser_version",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"image_url": {
|
||||
"name": "image_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"precios_ean_idx": {
|
||||
"name": "precios_ean_idx",
|
||||
"columns": [
|
||||
"ean"
|
||||
],
|
||||
"isUnique": false
|
||||
},
|
||||
"precios_url_idx": {
|
||||
"name": "precios_url_idx",
|
||||
"columns": [
|
||||
"url"
|
||||
],
|
||||
"isUnique": false
|
||||
},
|
||||
"precios_fetched_at_idx": {
|
||||
"name": "precios_fetched_at_idx",
|
||||
"columns": [
|
||||
"fetched_at"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
},
|
||||
"producto_urls": {
|
||||
"name": "producto_urls",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": true
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"first_seen": {
|
||||
"name": "first_seen",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_seen": {
|
||||
"name": "last_seen",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"producto_urls_url_unique": {
|
||||
"name": "producto_urls_url_unique",
|
||||
"columns": [
|
||||
"url"
|
||||
],
|
||||
"isUnique": true
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {}
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
}
|
||||
}
|
|
@ -85,6 +85,13 @@
|
|||
"when": 1706628184254,
|
||||
"tag": "0011_huge_next_avengers",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 12,
|
||||
"version": "5",
|
||||
"when": 1719680946811,
|
||||
"tag": "0012_hard_red_wolf",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
|
@ -19,5 +19,6 @@
|
|||
"@types/better-sqlite3": "^7.6.9",
|
||||
"@types/node": "^20.12.7",
|
||||
"drizzle-kit": "^0.20.14"
|
||||
}
|
||||
},
|
||||
"packageManager": "pnpm@9.0.6+sha256.0624e30eff866cdeb363b15061bdb7fd9425b17bc1bb42c22f5f4efdea21f6b3"
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@ export const precios = sqliteTable(
|
|||
return {
|
||||
preciosEanIdx: index("precios_ean_idx").on(precios.ean),
|
||||
preciosUrlIdx: index("precios_url_idx").on(precios.url),
|
||||
preciosFetchedAtIdx: index("precios_fetched_at_idx").on(
|
||||
precios.fetchedAt
|
||||
),
|
||||
};
|
||||
}
|
||||
);
|
||||
|
|
3515
pnpm-lock.yaml
3515
pnpm-lock.yaml
File diff suppressed because it is too large
Load diff
3
rust/.dockerignore
Normal file
3
rust/.dockerignore
Normal file
|
@ -0,0 +1,3 @@
|
|||
.env
|
||||
target
|
||||
*.Dockerfile
|
1
rust/.env
Normal file
1
rust/.env
Normal file
|
@ -0,0 +1 @@
|
|||
DATABASE_URL=sqlite://../sqlite.db
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"db_name": "SQLite",
|
||||
"query": "SELECT count(id) as count FROM precios\n WHERE fetched_at > ?\n AND url LIKE ?",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "count",
|
||||
"ordinal": 0,
|
||||
"type_info": "Int"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Right": 2
|
||||
},
|
||||
"nullable": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "71faba058f0a18e9aff6a12cc78353d3007dea8830088b07b67bfe86084a8ee2"
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"db_name": "SQLite",
|
||||
"query": "SELECT count(id) as count FROM db_best_selling\n WHERE fetched_at > ?",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"name": "count",
|
||||
"ordinal": 0,
|
||||
"type_info": "Int"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Right": 1
|
||||
},
|
||||
"nullable": [
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "e683ce875cc7e84586de163cdfd8d0bca2a1e679aebce4644fe0b31d639a1be4"
|
||||
}
|
239
scraper-rs/Cargo.lock → rust/Cargo.lock
generated
239
scraper-rs/Cargo.lock → rust/Cargo.lock
generated
|
@ -41,6 +41,15 @@ dependencies = [
|
|||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
|
@ -146,6 +155,17 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.66",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atoi"
|
||||
version = "2.0.0"
|
||||
|
@ -161,6 +181,61 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.73"
|
||||
|
@ -795,6 +870,12 @@ version = "1.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9"
|
||||
|
||||
[[package]]
|
||||
name = "httpdate"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.3.1"
|
||||
|
@ -807,6 +888,7 @@ dependencies = [
|
|||
"http",
|
||||
"http-body",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"smallvec",
|
||||
|
@ -994,6 +1076,21 @@ version = "0.4.21"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
|
||||
dependencies = [
|
||||
"regex-automata 0.1.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
|
@ -1283,6 +1380,36 @@ version = "0.2.17"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "preciazo"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"again",
|
||||
"anyhow",
|
||||
"axum",
|
||||
"base64 0.21.7",
|
||||
"chrono",
|
||||
"clap",
|
||||
"cron",
|
||||
"futures",
|
||||
"html-escape",
|
||||
"itertools",
|
||||
"nanoid",
|
||||
"quick-xml",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simple-error",
|
||||
"sqlx",
|
||||
"thiserror",
|
||||
"tl",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.85"
|
||||
|
@ -1455,6 +1582,50 @@ dependencies = [
|
|||
"bitflags 2.5.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata 0.4.7",
|
||||
"regex-syntax 0.8.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
dependencies = [
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax 0.8.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.5"
|
||||
|
@ -1486,7 +1657,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tokio-socks",
|
||||
|
@ -1615,6 +1786,12 @@ dependencies = [
|
|||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.18"
|
||||
|
@ -1627,35 +1804,6 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scraper-rs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"again",
|
||||
"anyhow",
|
||||
"base64 0.21.7",
|
||||
"chrono",
|
||||
"clap",
|
||||
"cron",
|
||||
"futures",
|
||||
"html-escape",
|
||||
"itertools",
|
||||
"nanoid",
|
||||
"quick-xml",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simple-error",
|
||||
"sqlx",
|
||||
"thiserror",
|
||||
"tl",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.203"
|
||||
|
@ -1687,6 +1835,16 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_path_to_error"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_urlencoded"
|
||||
version = "0.7.1"
|
||||
|
@ -1838,6 +1996,7 @@ dependencies = [
|
|||
"atoi",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc",
|
||||
"crossbeam-queue",
|
||||
"either",
|
||||
|
@ -1898,6 +2057,7 @@ dependencies = [
|
|||
"sha2",
|
||||
"sqlx-core",
|
||||
"sqlx-mysql",
|
||||
"sqlx-postgres",
|
||||
"sqlx-sqlite",
|
||||
"syn 1.0.109",
|
||||
"tempfile",
|
||||
|
@ -1916,6 +2076,7 @@ dependencies = [
|
|||
"bitflags 2.5.0",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc",
|
||||
"digest",
|
||||
"dotenvy",
|
||||
|
@ -1957,6 +2118,7 @@ dependencies = [
|
|||
"base64 0.21.7",
|
||||
"bitflags 2.5.0",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"crc",
|
||||
"dotenvy",
|
||||
"etcetera",
|
||||
|
@ -1992,6 +2154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
|
||||
dependencies = [
|
||||
"atoi",
|
||||
"chrono",
|
||||
"flume",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -2053,6 +2216,12 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "1.0.1"
|
||||
|
@ -2118,8 +2287,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|||
|
||||
[[package]]
|
||||
name = "tl"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/evertedsphere/tl?branch=patch-1#56711166588fa6c7729a08e5740dca2526436316"
|
||||
version = "0.7.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
|
@ -2211,6 +2381,7 @@ dependencies = [
|
|||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2275,10 +2446,14 @@ version = "0.3.18"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "scraper-rs"
|
||||
name = "preciazo"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
|
@ -9,10 +9,10 @@ edition = "2021"
|
|||
again = "0.1.2"
|
||||
anyhow = "1.0.79"
|
||||
base64 = "0.21.7"
|
||||
chrono = "0.4.32"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.4.15", features = ["derive"] }
|
||||
cron = "0.12.0"
|
||||
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite" ] }
|
||||
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] }
|
||||
futures = "0.3.30"
|
||||
html-escape = "0.2.13"
|
||||
itertools = "0.12.0"
|
||||
|
@ -27,11 +27,24 @@ reqwest = { version = "0.12", default-features = false, features = [
|
|||
"json",
|
||||
] }
|
||||
rusqlite = "0.30.0"
|
||||
serde = { version = "1.0.193", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0.109"
|
||||
simple-error = "0.3.0"
|
||||
thiserror = "1.0.56"
|
||||
tl = { git = "https://github.com/evertedsphere/tl", branch = "patch-1" }
|
||||
tokio = { version = "1.35.1", features = ["full"] }
|
||||
tl = "0.7.8"
|
||||
tokio = { version = "1.35", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
axum = "0.7.5"
|
||||
|
||||
#[dependencies.rocket_db_pools]
|
||||
#version = "0.2.0"
|
||||
#features = ["sqlx_sqlite"]
|
||||
|
||||
[[bin]]
|
||||
name = "api"
|
||||
path = "src/api/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "scraper"
|
||||
path = "src/scraper/main.rs"
|
3
rust/Rocket.toml
Normal file
3
rust/Rocket.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
[default.databases.precios]
|
||||
url = "../sqlite.db"
|
||||
|
25
rust/api.Dockerfile
Normal file
25
rust/api.Dockerfile
Normal file
|
@ -0,0 +1,25 @@
|
|||
FROM cgr.dev/chainguard/wolfi-base AS base
|
||||
WORKDIR /usr/src/app
|
||||
RUN apk add --no-cache libgcc
|
||||
|
||||
FROM docker.io/rust:1 AS rs-build
|
||||
# RUN apt-get update && apt-get install -y openssl-dev libsqlite3-dev && rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
COPY . .
|
||||
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
|
||||
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
|
||||
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
|
||||
cargo install --bin api --locked --path .
|
||||
|
||||
FROM base
|
||||
RUN apk add --no-cache sqlite sqlite-libs tini
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
|
||||
# api
|
||||
COPY --from=rs-build /usr/local/cargo/bin/api /usr/local/bin/api
|
||||
|
||||
ENV DB_PATH=/db/db.db
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["api"]
|
|
@ -10,14 +10,14 @@ COPY . .
|
|||
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
|
||||
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
|
||||
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
|
||||
cargo install --locked --path .
|
||||
cargo install --bin scraper --locked --path .
|
||||
|
||||
FROM base
|
||||
RUN apk add --no-cache sqlite sqlite-libs
|
||||
|
||||
# Scraper
|
||||
COPY --from=rs-build /usr/local/cargo/bin/scraper-rs /usr/local/bin/scraper-rs
|
||||
COPY --from=rs-build /usr/local/cargo/bin/scraper /usr/local/bin/scraper
|
||||
|
||||
ENV DB_PATH=/db/db.db
|
||||
|
||||
CMD ["scraper-rs", "cron"]
|
||||
CMD ["scraper", "cron"]
|
126
rust/src/api/main.rs
Normal file
126
rust/src/api/main.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
|
||||
use clap::ValueEnum;
|
||||
use futures::future::join_all;
|
||||
use itertools::Itertools;
|
||||
use preciazo::supermercado::Supermercado;
|
||||
use sqlx::{
|
||||
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
|
||||
SqlitePool,
|
||||
};
|
||||
use std::{env, str::FromStr, time::Duration};
|
||||
|
||||
async fn index() -> &'static str {
|
||||
"Hello, world! <a href=https://github.com/catdevnull/preciazo>catdevnull/preciazo</a>"
|
||||
}
|
||||
async fn healthcheck(State(pool): State<SqlitePool>) -> impl IntoResponse {
|
||||
let one_day_ago = chrono::Utc::now() - chrono::Duration::hours(25);
|
||||
let timestamp = one_day_ago.timestamp();
|
||||
|
||||
let supermercados_checks =
|
||||
join_all(Supermercado::value_variants().iter().map(|supermercado| {
|
||||
let value = pool.clone();
|
||||
async move {
|
||||
let url_query = format!("%{}%", supermercado.host());
|
||||
let count = sqlx::query!(
|
||||
"SELECT count(id) as count FROM precios
|
||||
WHERE fetched_at > ?
|
||||
AND url LIKE ?",
|
||||
timestamp,
|
||||
url_query
|
||||
)
|
||||
.fetch_one(&value)
|
||||
.await
|
||||
.unwrap()
|
||||
.count;
|
||||
let expected_count = match *supermercado {
|
||||
Supermercado::Carrefour => 45000,
|
||||
Supermercado::Coto => 32000,
|
||||
Supermercado::Jumbo => 20000,
|
||||
Supermercado::Farmacity => 8000,
|
||||
Supermercado::Dia => 4000,
|
||||
};
|
||||
if count < expected_count {
|
||||
Err(format!(
|
||||
"[{:?}] last 25h: expected at least {}, got {}",
|
||||
supermercado, expected_count, count
|
||||
))
|
||||
} else {
|
||||
Ok(format!("[{:?}] last 25h: {} precios", supermercado, count))
|
||||
}
|
||||
}
|
||||
}))
|
||||
.await
|
||||
.into_iter()
|
||||
.collect_vec();
|
||||
|
||||
let best_selling_check = {
|
||||
let record = sqlx::query!(
|
||||
"SELECT count(id) as count FROM db_best_selling
|
||||
WHERE fetched_at > ?",
|
||||
timestamp,
|
||||
)
|
||||
.fetch_one(&pool)
|
||||
.await
|
||||
.unwrap();
|
||||
let count = record.count;
|
||||
let expected_count = 3;
|
||||
if count < expected_count {
|
||||
Err(format!(
|
||||
"[best_selling] last 25h: expected at least {}, got {}",
|
||||
expected_count, count
|
||||
))
|
||||
} else {
|
||||
Ok(format!("[best_selling] last 25h: {}", count))
|
||||
}
|
||||
};
|
||||
|
||||
let list = format!(
|
||||
"{}\n- {:?}",
|
||||
supermercados_checks
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(|c| format!("- {:?}", c))
|
||||
.join("\n"),
|
||||
best_selling_check
|
||||
);
|
||||
|
||||
if supermercados_checks.into_iter().all(|r| r.is_ok()) && best_selling_check.is_ok() {
|
||||
(StatusCode::OK, format!("all is ok\n{}", list))
|
||||
} else {
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
format!("errors:\n{}", list),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let pool = SqlitePoolOptions::new()
|
||||
.max_connections(1)
|
||||
.connect_with(
|
||||
SqliteConnectOptions::from_str(&format!(
|
||||
"sqlite://{}",
|
||||
env::var("DB_PATH").unwrap_or("../sqlite.db".to_string())
|
||||
))
|
||||
.unwrap()
|
||||
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||
.synchronous(sqlx::sqlite::SqliteSynchronous::Normal)
|
||||
.busy_timeout(Duration::from_secs(15))
|
||||
.pragma("cache_size", "1000000000")
|
||||
.optimize_on_close(true, None),
|
||||
)
|
||||
.await
|
||||
.expect("can't connect to database");
|
||||
|
||||
let app = Router::new()
|
||||
.route("/", get(index))
|
||||
.route("/api/healthcheck", get(healthcheck))
|
||||
.with_state(pool);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
|
||||
tracing::debug!("listening on {}", listener.local_addr().unwrap());
|
||||
axum::serve(listener, app).await.unwrap();
|
||||
}
|
1
rust/src/lib.rs
Normal file
1
rust/src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
|||
pub mod supermercado;
|
|
@ -1,11 +1,11 @@
|
|||
use super::now_sec;
|
||||
use super::supermercado::Supermercado;
|
||||
use super::AutoArgs;
|
||||
use super::AutoTelegram;
|
||||
use crate::best_selling;
|
||||
use crate::db::Db;
|
||||
use crate::scraper::Scraper;
|
||||
use futures::Future;
|
||||
use preciazo::supermercado::Supermercado;
|
||||
use reqwest::Url;
|
||||
|
||||
#[derive(Clone)]
|
|
@ -1,10 +1,11 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use crate::{build_client, db::Db, sites::vtex, supermercado::Supermercado};
|
||||
use crate::{build_client, db::Db, sites::vtex};
|
||||
use chrono::{DateTime, Utc};
|
||||
use clap::ValueEnum;
|
||||
use futures::{stream, FutureExt, StreamExt};
|
||||
use itertools::Itertools;
|
||||
use preciazo::supermercado::Supermercado;
|
||||
use simple_error::SimpleError;
|
||||
use tracing::warn;
|
||||
|
|
@ -15,8 +15,7 @@ use std::{
|
|||
};
|
||||
use thiserror::Error;
|
||||
|
||||
mod supermercado;
|
||||
use supermercado::Supermercado;
|
||||
use preciazo::supermercado::Supermercado;
|
||||
mod auto;
|
||||
use auto::Auto;
|
||||
mod proxy_client;
|
||||
|
@ -58,7 +57,7 @@ struct AutoArgs {
|
|||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> () {
|
||||
async fn main() {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
match Args::parse() {
|
|
@ -7,9 +7,9 @@ use tokio::fs;
|
|||
|
||||
use crate::{
|
||||
anyhow_retry_if_wasnt_not_found, build_client, db::Db, get_fetch_retry_policy,
|
||||
get_parse_retry_policy, proxy_client::ProxyClient, sites, supermercado::Supermercado, Counters,
|
||||
PrecioPoint,
|
||||
get_parse_retry_policy, proxy_client::ProxyClient, sites, Counters, PrecioPoint,
|
||||
};
|
||||
use preciazo::supermercado::Supermercado;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Scraper {
|
|
@ -229,7 +229,7 @@ pub async fn get_best_selling_by_category(
|
|||
let json = &serde_json::from_str::<serde_json::Value>(&body)?;
|
||||
if let Some(errors_array) = json.pointer("/errors") {
|
||||
if let Some(error_messages) = errors_array.as_array().map(|a| {
|
||||
a.into_iter()
|
||||
a.iter()
|
||||
.map(|obj| obj.get("message").and_then(|v| v.as_str()))
|
||||
.collect_vec()
|
||||
}) {
|
50
rust/src/supermercado.rs
Normal file
50
rust/src/supermercado.rs
Normal file
|
@ -0,0 +1,50 @@
|
|||
use clap::ValueEnum;
|
||||
use reqwest::Url;
|
||||
|
||||
const SUPERMERCADOS_HOSTS: [(Supermercado, &str); 5] = [
|
||||
(Supermercado::Dia, "diaonline.supermercadosdia.com.ar"),
|
||||
(Supermercado::Carrefour, "www.carrefour.com.ar"),
|
||||
(Supermercado::Coto, "www.cotodigital3.com.ar"),
|
||||
(Supermercado::Jumbo, "www.jumbo.com.ar"),
|
||||
(Supermercado::Farmacity, "www.farmacity.com"),
|
||||
];
|
||||
|
||||
#[derive(ValueEnum, Clone, Debug, Copy, PartialEq)]
|
||||
pub enum Supermercado {
|
||||
Dia,
|
||||
Jumbo,
|
||||
Carrefour,
|
||||
Coto,
|
||||
Farmacity,
|
||||
}
|
||||
impl Supermercado {
|
||||
pub fn host(&self) -> &'static str {
|
||||
SUPERMERCADOS_HOSTS
|
||||
.into_iter()
|
||||
.find(|(supermercado, _host)| self == supermercado)
|
||||
.map(|(_, host)| host)
|
||||
.unwrap()
|
||||
}
|
||||
pub fn from_url(url: &Url) -> Option<Self> {
|
||||
SUPERMERCADOS_HOSTS
|
||||
.into_iter()
|
||||
.find(|(_supermercado, host)| *host == url.host_str().unwrap())
|
||||
.map(|(supermercado, _host)| supermercado)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Supermercado;
|
||||
|
||||
#[test]
|
||||
fn host_to_supermercado() {
|
||||
let supermercado = Supermercado::from_url(&reqwest::Url::parse("https://diaonline.supermercadosdia.com.ar/repelente-para-mosquitos-off--family-aerosol-165-cc-6338/p").unwrap());
|
||||
assert_eq!(supermercado, Some(Supermercado::Dia))
|
||||
}
|
||||
#[test]
|
||||
fn supermercado_to_host() {
|
||||
let host = Supermercado::Coto.host();
|
||||
assert_eq!(host, "www.cotodigital3.com.ar")
|
||||
}
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
.env
|
||||
target
|
|
@ -1,32 +0,0 @@
|
|||
use clap::ValueEnum;
|
||||
use reqwest::Url;
|
||||
|
||||
#[derive(ValueEnum, Clone, Debug, Copy)]
|
||||
pub enum Supermercado {
|
||||
Dia,
|
||||
Jumbo,
|
||||
Carrefour,
|
||||
Coto,
|
||||
Farmacity,
|
||||
}
|
||||
impl Supermercado {
|
||||
pub fn host(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Dia => "diaonline.supermercadosdia.com.ar",
|
||||
Self::Carrefour => "www.carrefour.com.ar",
|
||||
Self::Coto => "www.cotodigital3.com.ar",
|
||||
Self::Jumbo => "www.jumbo.com.ar",
|
||||
Self::Farmacity => "www.farmacity.com",
|
||||
}
|
||||
}
|
||||
pub fn from_url(url: &Url) -> Option<Self> {
|
||||
match url.host_str().unwrap() {
|
||||
"www.carrefour.com.ar" => Some(Self::Carrefour),
|
||||
"diaonline.supermercadosdia.com.ar" => Some(Self::Dia),
|
||||
"www.cotodigital3.com.ar" => Some(Self::Coto),
|
||||
"www.jumbo.com.ar" => Some(Self::Jumbo),
|
||||
"www.farmacity.com" => Some(Self::Farmacity),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue