mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 22:26:19 +00:00
Compare commits
3 commits
d495acfc9d
...
e64b993069
Author | SHA1 | Date | |
---|---|---|---|
e64b993069 | |||
1b55f47815 | |||
ccb5b2c2ef |
40 changed files with 2637 additions and 1696 deletions
36
.github/workflows/container.yml
vendored
36
.github/workflows/container.yml
vendored
|
@ -86,8 +86,40 @@ jobs:
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: "{{defaultContext}}:scraper-rs/"
|
context: "{{defaultContext}}:rust/"
|
||||||
file: Dockerfile
|
file: scraper.Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
cache-to: type=inline
|
||||||
|
platforms: linux/amd64
|
||||||
|
|
||||||
|
build-and-push-api-amd64:
|
||||||
|
name: "[amd64] oci:api"
|
||||||
|
runs-on: ubicloud-standard-16
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
- name: Log in to the Container registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
- name: Extract metadata (tags, labels) for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/api
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: "{{defaultContext}}:rust/"
|
||||||
|
file: api.Dockerfile
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
1
db-datos/drizzle/0012_hard_red_wolf.sql
Normal file
1
db-datos/drizzle/0012_hard_red_wolf.sql
Normal file
|
@ -0,0 +1 @@
|
||||||
|
CREATE INDEX `precios_fetched_at_idx` ON `precios` (`fetched_at`);
|
197
db-datos/drizzle/meta/0012_snapshot.json
Normal file
197
db-datos/drizzle/meta/0012_snapshot.json
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
{
|
||||||
|
"version": "5",
|
||||||
|
"dialect": "sqlite",
|
||||||
|
"id": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
|
||||||
|
"prevId": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
|
||||||
|
"tables": {
|
||||||
|
"db_best_selling": {
|
||||||
|
"name": "db_best_selling",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"fetched_at": {
|
||||||
|
"name": "fetched_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"category": {
|
||||||
|
"name": "category",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"eans_json": {
|
||||||
|
"name": "eans_json",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {}
|
||||||
|
},
|
||||||
|
"precios": {
|
||||||
|
"name": "precios",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"ean": {
|
||||||
|
"name": "ean",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"fetched_at": {
|
||||||
|
"name": "fetched_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"precio_centavos": {
|
||||||
|
"name": "precio_centavos",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"in_stock": {
|
||||||
|
"name": "in_stock",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"name": "url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"warc_record_id": {
|
||||||
|
"name": "warc_record_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"parser_version": {
|
||||||
|
"name": "parser_version",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"name": "name",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"image_url": {
|
||||||
|
"name": "image_url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"precios_ean_idx": {
|
||||||
|
"name": "precios_ean_idx",
|
||||||
|
"columns": [
|
||||||
|
"ean"
|
||||||
|
],
|
||||||
|
"isUnique": false
|
||||||
|
},
|
||||||
|
"precios_url_idx": {
|
||||||
|
"name": "precios_url_idx",
|
||||||
|
"columns": [
|
||||||
|
"url"
|
||||||
|
],
|
||||||
|
"isUnique": false
|
||||||
|
},
|
||||||
|
"precios_fetched_at_idx": {
|
||||||
|
"name": "precios_fetched_at_idx",
|
||||||
|
"columns": [
|
||||||
|
"fetched_at"
|
||||||
|
],
|
||||||
|
"isUnique": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {}
|
||||||
|
},
|
||||||
|
"producto_urls": {
|
||||||
|
"name": "producto_urls",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": true
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"name": "url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"first_seen": {
|
||||||
|
"name": "first_seen",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"last_seen": {
|
||||||
|
"name": "last_seen",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"producto_urls_url_unique": {
|
||||||
|
"name": "producto_urls_url_unique",
|
||||||
|
"columns": [
|
||||||
|
"url"
|
||||||
|
],
|
||||||
|
"isUnique": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"enums": {},
|
||||||
|
"_meta": {
|
||||||
|
"schemas": {},
|
||||||
|
"tables": {},
|
||||||
|
"columns": {}
|
||||||
|
}
|
||||||
|
}
|
|
@ -85,6 +85,13 @@
|
||||||
"when": 1706628184254,
|
"when": 1706628184254,
|
||||||
"tag": "0011_huge_next_avengers",
|
"tag": "0011_huge_next_avengers",
|
||||||
"breakpoints": true
|
"breakpoints": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 12,
|
||||||
|
"version": "5",
|
||||||
|
"when": 1719680946811,
|
||||||
|
"tag": "0012_hard_red_wolf",
|
||||||
|
"breakpoints": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
|
@ -19,5 +19,6 @@
|
||||||
"@types/better-sqlite3": "^7.6.9",
|
"@types/better-sqlite3": "^7.6.9",
|
||||||
"@types/node": "^20.12.7",
|
"@types/node": "^20.12.7",
|
||||||
"drizzle-kit": "^0.20.14"
|
"drizzle-kit": "^0.20.14"
|
||||||
}
|
},
|
||||||
|
"packageManager": "pnpm@9.0.6+sha256.0624e30eff866cdeb363b15061bdb7fd9425b17bc1bb42c22f5f4efdea21f6b3"
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@ export const precios = sqliteTable(
|
||||||
return {
|
return {
|
||||||
preciosEanIdx: index("precios_ean_idx").on(precios.ean),
|
preciosEanIdx: index("precios_ean_idx").on(precios.ean),
|
||||||
preciosUrlIdx: index("precios_url_idx").on(precios.url),
|
preciosUrlIdx: index("precios_url_idx").on(precios.url),
|
||||||
|
preciosFetchedAtIdx: index("precios_fetched_at_idx").on(
|
||||||
|
precios.fetchedAt
|
||||||
|
),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
3515
pnpm-lock.yaml
3515
pnpm-lock.yaml
File diff suppressed because it is too large
Load diff
3
rust/.dockerignore
Normal file
3
rust/.dockerignore
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
.env
|
||||||
|
target
|
||||||
|
*.Dockerfile
|
1
rust/.env
Normal file
1
rust/.env
Normal file
|
@ -0,0 +1 @@
|
||||||
|
DATABASE_URL=sqlite://../sqlite.db
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"db_name": "SQLite",
|
||||||
|
"query": "SELECT count(id) as count FROM precios\n WHERE fetched_at > ?\n AND url LIKE ?",
|
||||||
|
"describe": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"name": "count",
|
||||||
|
"ordinal": 0,
|
||||||
|
"type_info": "Int"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"Right": 2
|
||||||
|
},
|
||||||
|
"nullable": [
|
||||||
|
false
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hash": "71faba058f0a18e9aff6a12cc78353d3007dea8830088b07b67bfe86084a8ee2"
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"db_name": "SQLite",
|
||||||
|
"query": "SELECT count(id) as count FROM db_best_selling\n WHERE fetched_at > ?",
|
||||||
|
"describe": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"name": "count",
|
||||||
|
"ordinal": 0,
|
||||||
|
"type_info": "Int"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"Right": 1
|
||||||
|
},
|
||||||
|
"nullable": [
|
||||||
|
false
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hash": "e683ce875cc7e84586de163cdfd8d0bca2a1e679aebce4644fe0b31d639a1be4"
|
||||||
|
}
|
239
scraper-rs/Cargo.lock → rust/Cargo.lock
generated
239
scraper-rs/Cargo.lock → rust/Cargo.lock
generated
|
@ -41,6 +41,15 @@ dependencies = [
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alloc-no-stdlib"
|
name = "alloc-no-stdlib"
|
||||||
version = "2.0.4"
|
version = "2.0.4"
|
||||||
|
@ -146,6 +155,17 @@ dependencies = [
|
||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-trait"
|
||||||
|
version = "0.1.80"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.66",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atoi"
|
name = "atoi"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
|
@ -161,6 +181,61 @@ version = "1.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "axum"
|
||||||
|
version = "0.7.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||||
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
|
"axum-core",
|
||||||
|
"bytes",
|
||||||
|
"futures-util",
|
||||||
|
"http",
|
||||||
|
"http-body",
|
||||||
|
"http-body-util",
|
||||||
|
"hyper",
|
||||||
|
"hyper-util",
|
||||||
|
"itoa",
|
||||||
|
"matchit",
|
||||||
|
"memchr",
|
||||||
|
"mime",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"rustversion",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"serde_path_to_error",
|
||||||
|
"serde_urlencoded",
|
||||||
|
"sync_wrapper 1.0.1",
|
||||||
|
"tokio",
|
||||||
|
"tower",
|
||||||
|
"tower-layer",
|
||||||
|
"tower-service",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "axum-core"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
|
||||||
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
|
"bytes",
|
||||||
|
"futures-util",
|
||||||
|
"http",
|
||||||
|
"http-body",
|
||||||
|
"http-body-util",
|
||||||
|
"mime",
|
||||||
|
"pin-project-lite",
|
||||||
|
"rustversion",
|
||||||
|
"sync_wrapper 0.1.2",
|
||||||
|
"tower-layer",
|
||||||
|
"tower-service",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "backtrace"
|
name = "backtrace"
|
||||||
version = "0.3.73"
|
version = "0.3.73"
|
||||||
|
@ -795,6 +870,12 @@ version = "1.9.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9"
|
checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "httpdate"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hyper"
|
name = "hyper"
|
||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
|
@ -807,6 +888,7 @@ dependencies = [
|
||||||
"http",
|
"http",
|
||||||
"http-body",
|
"http-body",
|
||||||
"httparse",
|
"httparse",
|
||||||
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
|
@ -994,6 +1076,21 @@ version = "0.4.21"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matchers"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
|
||||||
|
dependencies = [
|
||||||
|
"regex-automata 0.1.10",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matchit"
|
||||||
|
version = "0.7.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "md-5"
|
name = "md-5"
|
||||||
version = "0.10.6"
|
version = "0.10.6"
|
||||||
|
@ -1283,6 +1380,36 @@ version = "0.2.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "preciazo"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"again",
|
||||||
|
"anyhow",
|
||||||
|
"axum",
|
||||||
|
"base64 0.21.7",
|
||||||
|
"chrono",
|
||||||
|
"clap",
|
||||||
|
"cron",
|
||||||
|
"futures",
|
||||||
|
"html-escape",
|
||||||
|
"itertools",
|
||||||
|
"nanoid",
|
||||||
|
"quick-xml",
|
||||||
|
"rand 0.8.5",
|
||||||
|
"reqwest",
|
||||||
|
"rusqlite",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"simple-error",
|
||||||
|
"sqlx",
|
||||||
|
"thiserror",
|
||||||
|
"tl",
|
||||||
|
"tokio",
|
||||||
|
"tracing",
|
||||||
|
"tracing-subscriber",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.85"
|
version = "1.0.85"
|
||||||
|
@ -1455,6 +1582,50 @@ dependencies = [
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.10.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata 0.4.7",
|
||||||
|
"regex-syntax 0.8.4",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||||
|
dependencies = [
|
||||||
|
"regex-syntax 0.6.29",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax 0.8.4",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.29"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
version = "0.12.5"
|
version = "0.12.5"
|
||||||
|
@ -1486,7 +1657,7 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_urlencoded",
|
"serde_urlencoded",
|
||||||
"sync_wrapper",
|
"sync_wrapper 1.0.1",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-rustls",
|
"tokio-rustls",
|
||||||
"tokio-socks",
|
"tokio-socks",
|
||||||
|
@ -1615,6 +1786,12 @@ dependencies = [
|
||||||
"untrusted",
|
"untrusted",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustversion"
|
||||||
|
version = "1.0.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ryu"
|
name = "ryu"
|
||||||
version = "1.0.18"
|
version = "1.0.18"
|
||||||
|
@ -1627,35 +1804,6 @@ version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "scraper-rs"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"again",
|
|
||||||
"anyhow",
|
|
||||||
"base64 0.21.7",
|
|
||||||
"chrono",
|
|
||||||
"clap",
|
|
||||||
"cron",
|
|
||||||
"futures",
|
|
||||||
"html-escape",
|
|
||||||
"itertools",
|
|
||||||
"nanoid",
|
|
||||||
"quick-xml",
|
|
||||||
"rand 0.8.5",
|
|
||||||
"reqwest",
|
|
||||||
"rusqlite",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
"simple-error",
|
|
||||||
"sqlx",
|
|
||||||
"thiserror",
|
|
||||||
"tl",
|
|
||||||
"tokio",
|
|
||||||
"tracing",
|
|
||||||
"tracing-subscriber",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.203"
|
version = "1.0.203"
|
||||||
|
@ -1687,6 +1835,16 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_path_to_error"
|
||||||
|
version = "0.1.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_urlencoded"
|
name = "serde_urlencoded"
|
||||||
version = "0.7.1"
|
version = "0.7.1"
|
||||||
|
@ -1838,6 +1996,7 @@ dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
"chrono",
|
||||||
"crc",
|
"crc",
|
||||||
"crossbeam-queue",
|
"crossbeam-queue",
|
||||||
"either",
|
"either",
|
||||||
|
@ -1898,6 +2057,7 @@ dependencies = [
|
||||||
"sha2",
|
"sha2",
|
||||||
"sqlx-core",
|
"sqlx-core",
|
||||||
"sqlx-mysql",
|
"sqlx-mysql",
|
||||||
|
"sqlx-postgres",
|
||||||
"sqlx-sqlite",
|
"sqlx-sqlite",
|
||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
@ -1916,6 +2076,7 @@ dependencies = [
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
"chrono",
|
||||||
"crc",
|
"crc",
|
||||||
"digest",
|
"digest",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
|
@ -1957,6 +2118,7 @@ dependencies = [
|
||||||
"base64 0.21.7",
|
"base64 0.21.7",
|
||||||
"bitflags 2.5.0",
|
"bitflags 2.5.0",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
|
"chrono",
|
||||||
"crc",
|
"crc",
|
||||||
"dotenvy",
|
"dotenvy",
|
||||||
"etcetera",
|
"etcetera",
|
||||||
|
@ -1992,6 +2154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
|
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atoi",
|
"atoi",
|
||||||
|
"chrono",
|
||||||
"flume",
|
"flume",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -2053,6 +2216,12 @@ dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sync_wrapper"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sync_wrapper"
|
name = "sync_wrapper"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
|
@ -2118,8 +2287,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tl"
|
name = "tl"
|
||||||
version = "0.7.7"
|
version = "0.7.8"
|
||||||
source = "git+https://github.com/evertedsphere/tl?branch=patch-1#56711166588fa6c7729a08e5740dca2526436316"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
|
@ -2211,6 +2381,7 @@ dependencies = [
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower-layer",
|
"tower-layer",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2275,10 +2446,14 @@ version = "0.3.18"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"matchers",
|
||||||
"nu-ansi-term",
|
"nu-ansi-term",
|
||||||
|
"once_cell",
|
||||||
|
"regex",
|
||||||
"sharded-slab",
|
"sharded-slab",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"thread_local",
|
"thread_local",
|
||||||
|
"tracing",
|
||||||
"tracing-core",
|
"tracing-core",
|
||||||
"tracing-log",
|
"tracing-log",
|
||||||
]
|
]
|
|
@ -1,5 +1,5 @@
|
||||||
[package]
|
[package]
|
||||||
name = "scraper-rs"
|
name = "preciazo"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
@ -9,10 +9,10 @@ edition = "2021"
|
||||||
again = "0.1.2"
|
again = "0.1.2"
|
||||||
anyhow = "1.0.79"
|
anyhow = "1.0.79"
|
||||||
base64 = "0.21.7"
|
base64 = "0.21.7"
|
||||||
chrono = "0.4.32"
|
chrono = "0.4"
|
||||||
clap = { version = "4.4.15", features = ["derive"] }
|
clap = { version = "4.4.15", features = ["derive"] }
|
||||||
cron = "0.12.0"
|
cron = "0.12.0"
|
||||||
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite" ] }
|
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] }
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
html-escape = "0.2.13"
|
html-escape = "0.2.13"
|
||||||
itertools = "0.12.0"
|
itertools = "0.12.0"
|
||||||
|
@ -27,11 +27,24 @@ reqwest = { version = "0.12", default-features = false, features = [
|
||||||
"json",
|
"json",
|
||||||
] }
|
] }
|
||||||
rusqlite = "0.30.0"
|
rusqlite = "0.30.0"
|
||||||
serde = { version = "1.0.193", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0.109"
|
serde_json = "1.0.109"
|
||||||
simple-error = "0.3.0"
|
simple-error = "0.3.0"
|
||||||
thiserror = "1.0.56"
|
thiserror = "1.0.56"
|
||||||
tl = { git = "https://github.com/evertedsphere/tl", branch = "patch-1" }
|
tl = "0.7.8"
|
||||||
tokio = { version = "1.35.1", features = ["full"] }
|
tokio = { version = "1.35", features = ["full"] }
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = "0.3"
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
axum = "0.7.5"
|
||||||
|
|
||||||
|
#[dependencies.rocket_db_pools]
|
||||||
|
#version = "0.2.0"
|
||||||
|
#features = ["sqlx_sqlite"]
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "api"
|
||||||
|
path = "src/api/main.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "scraper"
|
||||||
|
path = "src/scraper/main.rs"
|
3
rust/Rocket.toml
Normal file
3
rust/Rocket.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[default.databases.precios]
|
||||||
|
url = "../sqlite.db"
|
||||||
|
|
25
rust/api.Dockerfile
Normal file
25
rust/api.Dockerfile
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
FROM cgr.dev/chainguard/wolfi-base AS base
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
RUN apk add --no-cache libgcc
|
||||||
|
|
||||||
|
FROM docker.io/rust:1 AS rs-build
|
||||||
|
# RUN apt-get update && apt-get install -y openssl-dev libsqlite3-dev && rm -rf /var/lib/apt/lists/*
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
|
||||||
|
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
|
||||||
|
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
|
||||||
|
cargo install --bin api --locked --path .
|
||||||
|
|
||||||
|
FROM base
|
||||||
|
RUN apk add --no-cache sqlite sqlite-libs tini
|
||||||
|
ENTRYPOINT ["tini", "--"]
|
||||||
|
|
||||||
|
# api
|
||||||
|
COPY --from=rs-build /usr/local/cargo/bin/api /usr/local/bin/api
|
||||||
|
|
||||||
|
ENV DB_PATH=/db/db.db
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
CMD ["api"]
|
|
@ -10,14 +10,14 @@ COPY . .
|
||||||
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
|
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
|
||||||
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
|
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
|
||||||
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
|
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
|
||||||
cargo install --locked --path .
|
cargo install --bin scraper --locked --path .
|
||||||
|
|
||||||
FROM base
|
FROM base
|
||||||
RUN apk add --no-cache sqlite sqlite-libs
|
RUN apk add --no-cache sqlite sqlite-libs
|
||||||
|
|
||||||
# Scraper
|
# Scraper
|
||||||
COPY --from=rs-build /usr/local/cargo/bin/scraper-rs /usr/local/bin/scraper-rs
|
COPY --from=rs-build /usr/local/cargo/bin/scraper /usr/local/bin/scraper
|
||||||
|
|
||||||
ENV DB_PATH=/db/db.db
|
ENV DB_PATH=/db/db.db
|
||||||
|
|
||||||
CMD ["scraper-rs", "cron"]
|
CMD ["scraper", "cron"]
|
126
rust/src/api/main.rs
Normal file
126
rust/src/api/main.rs
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
|
||||||
|
use clap::ValueEnum;
|
||||||
|
use futures::future::join_all;
|
||||||
|
use itertools::Itertools;
|
||||||
|
use preciazo::supermercado::Supermercado;
|
||||||
|
use sqlx::{
|
||||||
|
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
|
||||||
|
SqlitePool,
|
||||||
|
};
|
||||||
|
use std::{env, str::FromStr, time::Duration};
|
||||||
|
|
||||||
|
async fn index() -> &'static str {
|
||||||
|
"Hello, world! <a href=https://github.com/catdevnull/preciazo>catdevnull/preciazo</a>"
|
||||||
|
}
|
||||||
|
async fn healthcheck(State(pool): State<SqlitePool>) -> impl IntoResponse {
|
||||||
|
let one_day_ago = chrono::Utc::now() - chrono::Duration::hours(25);
|
||||||
|
let timestamp = one_day_ago.timestamp();
|
||||||
|
|
||||||
|
let supermercados_checks =
|
||||||
|
join_all(Supermercado::value_variants().iter().map(|supermercado| {
|
||||||
|
let value = pool.clone();
|
||||||
|
async move {
|
||||||
|
let url_query = format!("%{}%", supermercado.host());
|
||||||
|
let count = sqlx::query!(
|
||||||
|
"SELECT count(id) as count FROM precios
|
||||||
|
WHERE fetched_at > ?
|
||||||
|
AND url LIKE ?",
|
||||||
|
timestamp,
|
||||||
|
url_query
|
||||||
|
)
|
||||||
|
.fetch_one(&value)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.count;
|
||||||
|
let expected_count = match *supermercado {
|
||||||
|
Supermercado::Carrefour => 45000,
|
||||||
|
Supermercado::Coto => 32000,
|
||||||
|
Supermercado::Jumbo => 20000,
|
||||||
|
Supermercado::Farmacity => 8000,
|
||||||
|
Supermercado::Dia => 4000,
|
||||||
|
};
|
||||||
|
if count < expected_count {
|
||||||
|
Err(format!(
|
||||||
|
"[{:?}] last 25h: expected at least {}, got {}",
|
||||||
|
supermercado, expected_count, count
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(format!("[{:?}] last 25h: {} precios", supermercado, count))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.into_iter()
|
||||||
|
.collect_vec();
|
||||||
|
|
||||||
|
let best_selling_check = {
|
||||||
|
let record = sqlx::query!(
|
||||||
|
"SELECT count(id) as count FROM db_best_selling
|
||||||
|
WHERE fetched_at > ?",
|
||||||
|
timestamp,
|
||||||
|
)
|
||||||
|
.fetch_one(&pool)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let count = record.count;
|
||||||
|
let expected_count = 3;
|
||||||
|
if count < expected_count {
|
||||||
|
Err(format!(
|
||||||
|
"[best_selling] last 25h: expected at least {}, got {}",
|
||||||
|
expected_count, count
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(format!("[best_selling] last 25h: {}", count))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let list = format!(
|
||||||
|
"{}\n- {:?}",
|
||||||
|
supermercados_checks
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|c| format!("- {:?}", c))
|
||||||
|
.join("\n"),
|
||||||
|
best_selling_check
|
||||||
|
);
|
||||||
|
|
||||||
|
if supermercados_checks.into_iter().all(|r| r.is_ok()) && best_selling_check.is_ok() {
|
||||||
|
(StatusCode::OK, format!("all is ok\n{}", list))
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
|
format!("errors:\n{}", list),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
|
let pool = SqlitePoolOptions::new()
|
||||||
|
.max_connections(1)
|
||||||
|
.connect_with(
|
||||||
|
SqliteConnectOptions::from_str(&format!(
|
||||||
|
"sqlite://{}",
|
||||||
|
env::var("DB_PATH").unwrap_or("../sqlite.db".to_string())
|
||||||
|
))
|
||||||
|
.unwrap()
|
||||||
|
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||||
|
.synchronous(sqlx::sqlite::SqliteSynchronous::Normal)
|
||||||
|
.busy_timeout(Duration::from_secs(15))
|
||||||
|
.pragma("cache_size", "1000000000")
|
||||||
|
.optimize_on_close(true, None),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("can't connect to database");
|
||||||
|
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/", get(index))
|
||||||
|
.route("/api/healthcheck", get(healthcheck))
|
||||||
|
.with_state(pool);
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
|
||||||
|
tracing::debug!("listening on {}", listener.local_addr().unwrap());
|
||||||
|
axum::serve(listener, app).await.unwrap();
|
||||||
|
}
|
1
rust/src/lib.rs
Normal file
1
rust/src/lib.rs
Normal file
|
@ -0,0 +1 @@
|
||||||
|
pub mod supermercado;
|
|
@ -1,11 +1,11 @@
|
||||||
use super::now_sec;
|
use super::now_sec;
|
||||||
use super::supermercado::Supermercado;
|
|
||||||
use super::AutoArgs;
|
use super::AutoArgs;
|
||||||
use super::AutoTelegram;
|
use super::AutoTelegram;
|
||||||
use crate::best_selling;
|
use crate::best_selling;
|
||||||
use crate::db::Db;
|
use crate::db::Db;
|
||||||
use crate::scraper::Scraper;
|
use crate::scraper::Scraper;
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
|
use preciazo::supermercado::Supermercado;
|
||||||
use reqwest::Url;
|
use reqwest::Url;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
|
@ -1,10 +1,11 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::{build_client, db::Db, sites::vtex, supermercado::Supermercado};
|
use crate::{build_client, db::Db, sites::vtex};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use clap::ValueEnum;
|
use clap::ValueEnum;
|
||||||
use futures::{stream, FutureExt, StreamExt};
|
use futures::{stream, FutureExt, StreamExt};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
use preciazo::supermercado::Supermercado;
|
||||||
use simple_error::SimpleError;
|
use simple_error::SimpleError;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
|
@ -15,8 +15,7 @@ use std::{
|
||||||
};
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
mod supermercado;
|
use preciazo::supermercado::Supermercado;
|
||||||
use supermercado::Supermercado;
|
|
||||||
mod auto;
|
mod auto;
|
||||||
use auto::Auto;
|
use auto::Auto;
|
||||||
mod proxy_client;
|
mod proxy_client;
|
||||||
|
@ -58,7 +57,7 @@ struct AutoArgs {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> () {
|
async fn main() {
|
||||||
tracing_subscriber::fmt::init();
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
match Args::parse() {
|
match Args::parse() {
|
|
@ -7,9 +7,9 @@ use tokio::fs;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
anyhow_retry_if_wasnt_not_found, build_client, db::Db, get_fetch_retry_policy,
|
anyhow_retry_if_wasnt_not_found, build_client, db::Db, get_fetch_retry_policy,
|
||||||
get_parse_retry_policy, proxy_client::ProxyClient, sites, supermercado::Supermercado, Counters,
|
get_parse_retry_policy, proxy_client::ProxyClient, sites, Counters, PrecioPoint,
|
||||||
PrecioPoint,
|
|
||||||
};
|
};
|
||||||
|
use preciazo::supermercado::Supermercado;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Scraper {
|
pub struct Scraper {
|
|
@ -229,7 +229,7 @@ pub async fn get_best_selling_by_category(
|
||||||
let json = &serde_json::from_str::<serde_json::Value>(&body)?;
|
let json = &serde_json::from_str::<serde_json::Value>(&body)?;
|
||||||
if let Some(errors_array) = json.pointer("/errors") {
|
if let Some(errors_array) = json.pointer("/errors") {
|
||||||
if let Some(error_messages) = errors_array.as_array().map(|a| {
|
if let Some(error_messages) = errors_array.as_array().map(|a| {
|
||||||
a.into_iter()
|
a.iter()
|
||||||
.map(|obj| obj.get("message").and_then(|v| v.as_str()))
|
.map(|obj| obj.get("message").and_then(|v| v.as_str()))
|
||||||
.collect_vec()
|
.collect_vec()
|
||||||
}) {
|
}) {
|
50
rust/src/supermercado.rs
Normal file
50
rust/src/supermercado.rs
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
use clap::ValueEnum;
|
||||||
|
use reqwest::Url;
|
||||||
|
|
||||||
|
const SUPERMERCADOS_HOSTS: [(Supermercado, &str); 5] = [
|
||||||
|
(Supermercado::Dia, "diaonline.supermercadosdia.com.ar"),
|
||||||
|
(Supermercado::Carrefour, "www.carrefour.com.ar"),
|
||||||
|
(Supermercado::Coto, "www.cotodigital3.com.ar"),
|
||||||
|
(Supermercado::Jumbo, "www.jumbo.com.ar"),
|
||||||
|
(Supermercado::Farmacity, "www.farmacity.com"),
|
||||||
|
];
|
||||||
|
|
||||||
|
#[derive(ValueEnum, Clone, Debug, Copy, PartialEq)]
|
||||||
|
pub enum Supermercado {
|
||||||
|
Dia,
|
||||||
|
Jumbo,
|
||||||
|
Carrefour,
|
||||||
|
Coto,
|
||||||
|
Farmacity,
|
||||||
|
}
|
||||||
|
impl Supermercado {
|
||||||
|
pub fn host(&self) -> &'static str {
|
||||||
|
SUPERMERCADOS_HOSTS
|
||||||
|
.into_iter()
|
||||||
|
.find(|(supermercado, _host)| self == supermercado)
|
||||||
|
.map(|(_, host)| host)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
pub fn from_url(url: &Url) -> Option<Self> {
|
||||||
|
SUPERMERCADOS_HOSTS
|
||||||
|
.into_iter()
|
||||||
|
.find(|(_supermercado, host)| *host == url.host_str().unwrap())
|
||||||
|
.map(|(supermercado, _host)| supermercado)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::Supermercado;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn host_to_supermercado() {
|
||||||
|
let supermercado = Supermercado::from_url(&reqwest::Url::parse("https://diaonline.supermercadosdia.com.ar/repelente-para-mosquitos-off--family-aerosol-165-cc-6338/p").unwrap());
|
||||||
|
assert_eq!(supermercado, Some(Supermercado::Dia))
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn supermercado_to_host() {
|
||||||
|
let host = Supermercado::Coto.host();
|
||||||
|
assert_eq!(host, "www.cotodigital3.com.ar")
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,2 +0,0 @@
|
||||||
.env
|
|
||||||
target
|
|
|
@ -1,32 +0,0 @@
|
||||||
use clap::ValueEnum;
|
|
||||||
use reqwest::Url;
|
|
||||||
|
|
||||||
#[derive(ValueEnum, Clone, Debug, Copy)]
|
|
||||||
pub enum Supermercado {
|
|
||||||
Dia,
|
|
||||||
Jumbo,
|
|
||||||
Carrefour,
|
|
||||||
Coto,
|
|
||||||
Farmacity,
|
|
||||||
}
|
|
||||||
impl Supermercado {
|
|
||||||
pub fn host(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
Self::Dia => "diaonline.supermercadosdia.com.ar",
|
|
||||||
Self::Carrefour => "www.carrefour.com.ar",
|
|
||||||
Self::Coto => "www.cotodigital3.com.ar",
|
|
||||||
Self::Jumbo => "www.jumbo.com.ar",
|
|
||||||
Self::Farmacity => "www.farmacity.com",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn from_url(url: &Url) -> Option<Self> {
|
|
||||||
match url.host_str().unwrap() {
|
|
||||||
"www.carrefour.com.ar" => Some(Self::Carrefour),
|
|
||||||
"diaonline.supermercadosdia.com.ar" => Some(Self::Dia),
|
|
||||||
"www.cotodigital3.com.ar" => Some(Self::Coto),
|
|
||||||
"www.jumbo.com.ar" => Some(Self::Jumbo),
|
|
||||||
"www.farmacity.com" => Some(Self::Farmacity),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in a new issue