Compare commits

..

3 commits

Author SHA1 Message Date
e64b993069 basic rust api 2024-06-29 21:04:59 -03:00
1b55f47815 index fetched_at 2024-06-29 19:16:23 -03:00
ccb5b2c2ef mejorar supermercado 2024-06-23 21:48:29 -03:00
40 changed files with 2637 additions and 1696 deletions

View file

@ -86,8 +86,40 @@ jobs:
- name: Build and push Docker image - name: Build and push Docker image
uses: docker/build-push-action@v5 uses: docker/build-push-action@v5
with: with:
context: "{{defaultContext}}:scraper-rs/" context: "{{defaultContext}}:rust/"
file: Dockerfile file: scraper.Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
cache-to: type=inline
platforms: linux/amd64
build-and-push-api-amd64:
name: "[amd64] oci:api"
runs-on: ubicloud-standard-16
permissions:
contents: read
packages: write
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/api
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:rust/"
file: api.Dockerfile
push: true push: true
tags: ${{ steps.meta.outputs.tags }} tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}

View file

@ -0,0 +1 @@
CREATE INDEX `precios_fetched_at_idx` ON `precios` (`fetched_at`);

View file

@ -0,0 +1,197 @@
{
"version": "5",
"dialect": "sqlite",
"id": "16046188-ab24-4bd4-bfb4-8a81f24c6f28",
"prevId": "8b4921b5-6ecd-4d69-ba64-9b0bfb53db84",
"tables": {
"db_best_selling": {
"name": "db_best_selling",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"category": {
"name": "category",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"eans_json": {
"name": "eans_json",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"precios": {
"name": "precios",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"ean": {
"name": "ean",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"fetched_at": {
"name": "fetched_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"precio_centavos": {
"name": "precio_centavos",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"in_stock": {
"name": "in_stock",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"warc_record_id": {
"name": "warc_record_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"parser_version": {
"name": "parser_version",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"name": {
"name": "name",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"image_url": {
"name": "image_url",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
}
},
"indexes": {
"precios_ean_idx": {
"name": "precios_ean_idx",
"columns": [
"ean"
],
"isUnique": false
},
"precios_url_idx": {
"name": "precios_url_idx",
"columns": [
"url"
],
"isUnique": false
},
"precios_fetched_at_idx": {
"name": "precios_fetched_at_idx",
"columns": [
"fetched_at"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
},
"producto_urls": {
"name": "producto_urls",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"autoincrement": true
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"first_seen": {
"name": "first_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"last_seen": {
"name": "last_seen",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"producto_urls_url_unique": {
"name": "producto_urls_url_unique",
"columns": [
"url"
],
"isUnique": true
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {}
}
},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
}
}

View file

@ -85,6 +85,13 @@
"when": 1706628184254, "when": 1706628184254,
"tag": "0011_huge_next_avengers", "tag": "0011_huge_next_avengers",
"breakpoints": true "breakpoints": true
},
{
"idx": 12,
"version": "5",
"when": 1719680946811,
"tag": "0012_hard_red_wolf",
"breakpoints": true
} }
] ]
} }

View file

@ -19,5 +19,6 @@
"@types/better-sqlite3": "^7.6.9", "@types/better-sqlite3": "^7.6.9",
"@types/node": "^20.12.7", "@types/node": "^20.12.7",
"drizzle-kit": "^0.20.14" "drizzle-kit": "^0.20.14"
} },
"packageManager": "pnpm@9.0.6+sha256.0624e30eff866cdeb363b15061bdb7fd9425b17bc1bb42c22f5f4efdea21f6b3"
} }

View file

@ -19,6 +19,9 @@ export const precios = sqliteTable(
return { return {
preciosEanIdx: index("precios_ean_idx").on(precios.ean), preciosEanIdx: index("precios_ean_idx").on(precios.ean),
preciosUrlIdx: index("precios_url_idx").on(precios.url), preciosUrlIdx: index("precios_url_idx").on(precios.url),
preciosFetchedAtIdx: index("precios_fetched_at_idx").on(
precios.fetchedAt
),
}; };
} }
); );

File diff suppressed because it is too large Load diff

3
rust/.dockerignore Normal file
View file

@ -0,0 +1,3 @@
.env
target
*.Dockerfile

1
rust/.env Normal file
View file

@ -0,0 +1 @@
DATABASE_URL=sqlite://../sqlite.db

View file

@ -0,0 +1,20 @@
{
"db_name": "SQLite",
"query": "SELECT count(id) as count FROM precios\n WHERE fetched_at > ?\n AND url LIKE ?",
"describe": {
"columns": [
{
"name": "count",
"ordinal": 0,
"type_info": "Int"
}
],
"parameters": {
"Right": 2
},
"nullable": [
false
]
},
"hash": "71faba058f0a18e9aff6a12cc78353d3007dea8830088b07b67bfe86084a8ee2"
}

View file

@ -0,0 +1,20 @@
{
"db_name": "SQLite",
"query": "SELECT count(id) as count FROM db_best_selling\n WHERE fetched_at > ?",
"describe": {
"columns": [
{
"name": "count",
"ordinal": 0,
"type_info": "Int"
}
],
"parameters": {
"Right": 1
},
"nullable": [
false
]
},
"hash": "e683ce875cc7e84586de163cdfd8d0bca2a1e679aebce4644fe0b31d639a1be4"
}

View file

@ -41,6 +41,15 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "alloc-no-stdlib" name = "alloc-no-stdlib"
version = "2.0.4" version = "2.0.4"
@ -146,6 +155,17 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "async-trait"
version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.66",
]
[[package]] [[package]]
name = "atoi" name = "atoi"
version = "2.0.0" version = "2.0.0"
@ -161,6 +181,61 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
[[package]]
name = "axum"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
dependencies = [
"async-trait",
"axum-core",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper 1.0.1",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper 0.1.2",
"tower-layer",
"tower-service",
"tracing",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.73" version = "0.3.73"
@ -795,6 +870,12 @@ version = "1.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9"
[[package]]
name = "httpdate"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "1.3.1" version = "1.3.1"
@ -807,6 +888,7 @@ dependencies = [
"http", "http",
"http-body", "http-body",
"httparse", "httparse",
"httpdate",
"itoa", "itoa",
"pin-project-lite", "pin-project-lite",
"smallvec", "smallvec",
@ -994,6 +1076,21 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]] [[package]]
name = "md-5" name = "md-5"
version = "0.10.6" version = "0.10.6"
@ -1283,6 +1380,36 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "preciazo"
version = "0.1.0"
dependencies = [
"again",
"anyhow",
"axum",
"base64 0.21.7",
"chrono",
"clap",
"cron",
"futures",
"html-escape",
"itertools",
"nanoid",
"quick-xml",
"rand 0.8.5",
"reqwest",
"rusqlite",
"serde",
"serde_json",
"simple-error",
"sqlx",
"thiserror",
"tl",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.85" version = "1.0.85"
@ -1455,6 +1582,50 @@ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
] ]
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata 0.4.7",
"regex-syntax 0.8.4",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.4",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.12.5" version = "0.12.5"
@ -1486,7 +1657,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"sync_wrapper", "sync_wrapper 1.0.1",
"tokio", "tokio",
"tokio-rustls", "tokio-rustls",
"tokio-socks", "tokio-socks",
@ -1615,6 +1786,12 @@ dependencies = [
"untrusted", "untrusted",
] ]
[[package]]
name = "rustversion"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.18" version = "1.0.18"
@ -1627,35 +1804,6 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper-rs"
version = "0.1.0"
dependencies = [
"again",
"anyhow",
"base64 0.21.7",
"chrono",
"clap",
"cron",
"futures",
"html-escape",
"itertools",
"nanoid",
"quick-xml",
"rand 0.8.5",
"reqwest",
"rusqlite",
"serde",
"serde_json",
"simple-error",
"sqlx",
"thiserror",
"tl",
"tokio",
"tracing",
"tracing-subscriber",
]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.203" version = "1.0.203"
@ -1687,6 +1835,16 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serde_path_to_error"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
dependencies = [
"itoa",
"serde",
]
[[package]] [[package]]
name = "serde_urlencoded" name = "serde_urlencoded"
version = "0.7.1" version = "0.7.1"
@ -1838,6 +1996,7 @@ dependencies = [
"atoi", "atoi",
"byteorder", "byteorder",
"bytes", "bytes",
"chrono",
"crc", "crc",
"crossbeam-queue", "crossbeam-queue",
"either", "either",
@ -1898,6 +2057,7 @@ dependencies = [
"sha2", "sha2",
"sqlx-core", "sqlx-core",
"sqlx-mysql", "sqlx-mysql",
"sqlx-postgres",
"sqlx-sqlite", "sqlx-sqlite",
"syn 1.0.109", "syn 1.0.109",
"tempfile", "tempfile",
@ -1916,6 +2076,7 @@ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"byteorder", "byteorder",
"bytes", "bytes",
"chrono",
"crc", "crc",
"digest", "digest",
"dotenvy", "dotenvy",
@ -1957,6 +2118,7 @@ dependencies = [
"base64 0.21.7", "base64 0.21.7",
"bitflags 2.5.0", "bitflags 2.5.0",
"byteorder", "byteorder",
"chrono",
"crc", "crc",
"dotenvy", "dotenvy",
"etcetera", "etcetera",
@ -1992,6 +2154,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa" checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa"
dependencies = [ dependencies = [
"atoi", "atoi",
"chrono",
"flume", "flume",
"futures-channel", "futures-channel",
"futures-core", "futures-core",
@ -2053,6 +2216,12 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "sync_wrapper"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]] [[package]]
name = "sync_wrapper" name = "sync_wrapper"
version = "1.0.1" version = "1.0.1"
@ -2118,8 +2287,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]] [[package]]
name = "tl" name = "tl"
version = "0.7.7" version = "0.7.8"
source = "git+https://github.com/evertedsphere/tl?branch=patch-1#56711166588fa6c7729a08e5740dca2526436316" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7"
[[package]] [[package]]
name = "tokio" name = "tokio"
@ -2211,6 +2381,7 @@ dependencies = [
"tokio", "tokio",
"tower-layer", "tower-layer",
"tower-service", "tower-service",
"tracing",
] ]
[[package]] [[package]]
@ -2275,10 +2446,14 @@ version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [ dependencies = [
"matchers",
"nu-ansi-term", "nu-ansi-term",
"once_cell",
"regex",
"sharded-slab", "sharded-slab",
"smallvec", "smallvec",
"thread_local", "thread_local",
"tracing",
"tracing-core", "tracing-core",
"tracing-log", "tracing-log",
] ]

View file

@ -1,5 +1,5 @@
[package] [package]
name = "scraper-rs" name = "preciazo"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
@ -9,10 +9,10 @@ edition = "2021"
again = "0.1.2" again = "0.1.2"
anyhow = "1.0.79" anyhow = "1.0.79"
base64 = "0.21.7" base64 = "0.21.7"
chrono = "0.4.32" chrono = "0.4"
clap = { version = "4.4.15", features = ["derive"] } clap = { version = "4.4.15", features = ["derive"] }
cron = "0.12.0" cron = "0.12.0"
sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite" ] } sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] }
futures = "0.3.30" futures = "0.3.30"
html-escape = "0.2.13" html-escape = "0.2.13"
itertools = "0.12.0" itertools = "0.12.0"
@ -27,11 +27,24 @@ reqwest = { version = "0.12", default-features = false, features = [
"json", "json",
] } ] }
rusqlite = "0.30.0" rusqlite = "0.30.0"
serde = { version = "1.0.193", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.109" serde_json = "1.0.109"
simple-error = "0.3.0" simple-error = "0.3.0"
thiserror = "1.0.56" thiserror = "1.0.56"
tl = { git = "https://github.com/evertedsphere/tl", branch = "patch-1" } tl = "0.7.8"
tokio = { version = "1.35.1", features = ["full"] } tokio = { version = "1.35", features = ["full"] }
tracing = "0.1" tracing = "0.1"
tracing-subscriber = "0.3" tracing-subscriber = { version = "0.3", features = ["env-filter"] }
axum = "0.7.5"
#[dependencies.rocket_db_pools]
#version = "0.2.0"
#features = ["sqlx_sqlite"]
[[bin]]
name = "api"
path = "src/api/main.rs"
[[bin]]
name = "scraper"
path = "src/scraper/main.rs"

3
rust/Rocket.toml Normal file
View file

@ -0,0 +1,3 @@
[default.databases.precios]
url = "../sqlite.db"

25
rust/api.Dockerfile Normal file
View file

@ -0,0 +1,25 @@
FROM cgr.dev/chainguard/wolfi-base AS base
WORKDIR /usr/src/app
RUN apk add --no-cache libgcc
FROM docker.io/rust:1 AS rs-build
# RUN apt-get update && apt-get install -y openssl-dev libsqlite3-dev && rm -rf /var/lib/apt/lists/*
WORKDIR /usr/src/app
COPY . .
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \
--mount=type=cache,sharing=locked,target=/usr/src/app/target \
cargo install --bin api --locked --path .
FROM base
RUN apk add --no-cache sqlite sqlite-libs tini
ENTRYPOINT ["tini", "--"]
# api
COPY --from=rs-build /usr/local/cargo/bin/api /usr/local/bin/api
ENV DB_PATH=/db/db.db
EXPOSE 8000
CMD ["api"]

View file

@ -10,14 +10,14 @@ COPY . .
RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \ RUN --mount=type=cache,sharing=locked,target=/root/.cargo/git \
--mount=type=cache,sharing=locked,target=/root/.cargo/registry \ --mount=type=cache,sharing=locked,target=/root/.cargo/registry \
--mount=type=cache,sharing=locked,target=/usr/src/app/target \ --mount=type=cache,sharing=locked,target=/usr/src/app/target \
cargo install --locked --path . cargo install --bin scraper --locked --path .
FROM base FROM base
RUN apk add --no-cache sqlite sqlite-libs RUN apk add --no-cache sqlite sqlite-libs
# Scraper # Scraper
COPY --from=rs-build /usr/local/cargo/bin/scraper-rs /usr/local/bin/scraper-rs COPY --from=rs-build /usr/local/cargo/bin/scraper /usr/local/bin/scraper
ENV DB_PATH=/db/db.db ENV DB_PATH=/db/db.db
CMD ["scraper-rs", "cron"] CMD ["scraper", "cron"]

126
rust/src/api/main.rs Normal file
View file

@ -0,0 +1,126 @@
use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router};
use clap::ValueEnum;
use futures::future::join_all;
use itertools::Itertools;
use preciazo::supermercado::Supermercado;
use sqlx::{
sqlite::{SqliteConnectOptions, SqlitePoolOptions},
SqlitePool,
};
use std::{env, str::FromStr, time::Duration};
async fn index() -> &'static str {
"Hello, world! <a href=https://github.com/catdevnull/preciazo>catdevnull/preciazo</a>"
}
async fn healthcheck(State(pool): State<SqlitePool>) -> impl IntoResponse {
let one_day_ago = chrono::Utc::now() - chrono::Duration::hours(25);
let timestamp = one_day_ago.timestamp();
let supermercados_checks =
join_all(Supermercado::value_variants().iter().map(|supermercado| {
let value = pool.clone();
async move {
let url_query = format!("%{}%", supermercado.host());
let count = sqlx::query!(
"SELECT count(id) as count FROM precios
WHERE fetched_at > ?
AND url LIKE ?",
timestamp,
url_query
)
.fetch_one(&value)
.await
.unwrap()
.count;
let expected_count = match *supermercado {
Supermercado::Carrefour => 45000,
Supermercado::Coto => 32000,
Supermercado::Jumbo => 20000,
Supermercado::Farmacity => 8000,
Supermercado::Dia => 4000,
};
if count < expected_count {
Err(format!(
"[{:?}] last 25h: expected at least {}, got {}",
supermercado, expected_count, count
))
} else {
Ok(format!("[{:?}] last 25h: {} precios", supermercado, count))
}
}
}))
.await
.into_iter()
.collect_vec();
let best_selling_check = {
let record = sqlx::query!(
"SELECT count(id) as count FROM db_best_selling
WHERE fetched_at > ?",
timestamp,
)
.fetch_one(&pool)
.await
.unwrap();
let count = record.count;
let expected_count = 3;
if count < expected_count {
Err(format!(
"[best_selling] last 25h: expected at least {}, got {}",
expected_count, count
))
} else {
Ok(format!("[best_selling] last 25h: {}", count))
}
};
let list = format!(
"{}\n- {:?}",
supermercados_checks
.clone()
.into_iter()
.map(|c| format!("- {:?}", c))
.join("\n"),
best_selling_check
);
if supermercados_checks.into_iter().all(|r| r.is_ok()) && best_selling_check.is_ok() {
(StatusCode::OK, format!("all is ok\n{}", list))
} else {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("errors:\n{}", list),
)
}
}
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let pool = SqlitePoolOptions::new()
.max_connections(1)
.connect_with(
SqliteConnectOptions::from_str(&format!(
"sqlite://{}",
env::var("DB_PATH").unwrap_or("../sqlite.db".to_string())
))
.unwrap()
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
.synchronous(sqlx::sqlite::SqliteSynchronous::Normal)
.busy_timeout(Duration::from_secs(15))
.pragma("cache_size", "1000000000")
.optimize_on_close(true, None),
)
.await
.expect("can't connect to database");
let app = Router::new()
.route("/", get(index))
.route("/api/healthcheck", get(healthcheck))
.with_state(pool);
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();
tracing::debug!("listening on {}", listener.local_addr().unwrap());
axum::serve(listener, app).await.unwrap();
}

1
rust/src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod supermercado;

View file

@ -1,11 +1,11 @@
use super::now_sec; use super::now_sec;
use super::supermercado::Supermercado;
use super::AutoArgs; use super::AutoArgs;
use super::AutoTelegram; use super::AutoTelegram;
use crate::best_selling; use crate::best_selling;
use crate::db::Db; use crate::db::Db;
use crate::scraper::Scraper; use crate::scraper::Scraper;
use futures::Future; use futures::Future;
use preciazo::supermercado::Supermercado;
use reqwest::Url; use reqwest::Url;
#[derive(Clone)] #[derive(Clone)]

View file

@ -1,10 +1,11 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::{build_client, db::Db, sites::vtex, supermercado::Supermercado}; use crate::{build_client, db::Db, sites::vtex};
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use clap::ValueEnum; use clap::ValueEnum;
use futures::{stream, FutureExt, StreamExt}; use futures::{stream, FutureExt, StreamExt};
use itertools::Itertools; use itertools::Itertools;
use preciazo::supermercado::Supermercado;
use simple_error::SimpleError; use simple_error::SimpleError;
use tracing::warn; use tracing::warn;

View file

@ -15,8 +15,7 @@ use std::{
}; };
use thiserror::Error; use thiserror::Error;
mod supermercado; use preciazo::supermercado::Supermercado;
use supermercado::Supermercado;
mod auto; mod auto;
use auto::Auto; use auto::Auto;
mod proxy_client; mod proxy_client;
@ -58,7 +57,7 @@ struct AutoArgs {
} }
#[tokio::main] #[tokio::main]
async fn main() -> () { async fn main() {
tracing_subscriber::fmt::init(); tracing_subscriber::fmt::init();
match Args::parse() { match Args::parse() {

View file

@ -7,9 +7,9 @@ use tokio::fs;
use crate::{ use crate::{
anyhow_retry_if_wasnt_not_found, build_client, db::Db, get_fetch_retry_policy, anyhow_retry_if_wasnt_not_found, build_client, db::Db, get_fetch_retry_policy,
get_parse_retry_policy, proxy_client::ProxyClient, sites, supermercado::Supermercado, Counters, get_parse_retry_policy, proxy_client::ProxyClient, sites, Counters, PrecioPoint,
PrecioPoint,
}; };
use preciazo::supermercado::Supermercado;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Scraper { pub struct Scraper {

View file

@ -229,7 +229,7 @@ pub async fn get_best_selling_by_category(
let json = &serde_json::from_str::<serde_json::Value>(&body)?; let json = &serde_json::from_str::<serde_json::Value>(&body)?;
if let Some(errors_array) = json.pointer("/errors") { if let Some(errors_array) = json.pointer("/errors") {
if let Some(error_messages) = errors_array.as_array().map(|a| { if let Some(error_messages) = errors_array.as_array().map(|a| {
a.into_iter() a.iter()
.map(|obj| obj.get("message").and_then(|v| v.as_str())) .map(|obj| obj.get("message").and_then(|v| v.as_str()))
.collect_vec() .collect_vec()
}) { }) {

50
rust/src/supermercado.rs Normal file
View file

@ -0,0 +1,50 @@
use clap::ValueEnum;
use reqwest::Url;
const SUPERMERCADOS_HOSTS: [(Supermercado, &str); 5] = [
(Supermercado::Dia, "diaonline.supermercadosdia.com.ar"),
(Supermercado::Carrefour, "www.carrefour.com.ar"),
(Supermercado::Coto, "www.cotodigital3.com.ar"),
(Supermercado::Jumbo, "www.jumbo.com.ar"),
(Supermercado::Farmacity, "www.farmacity.com"),
];
#[derive(ValueEnum, Clone, Debug, Copy, PartialEq)]
pub enum Supermercado {
Dia,
Jumbo,
Carrefour,
Coto,
Farmacity,
}
impl Supermercado {
pub fn host(&self) -> &'static str {
SUPERMERCADOS_HOSTS
.into_iter()
.find(|(supermercado, _host)| self == supermercado)
.map(|(_, host)| host)
.unwrap()
}
pub fn from_url(url: &Url) -> Option<Self> {
SUPERMERCADOS_HOSTS
.into_iter()
.find(|(_supermercado, host)| *host == url.host_str().unwrap())
.map(|(supermercado, _host)| supermercado)
}
}
#[cfg(test)]
mod tests {
use super::Supermercado;
#[test]
fn host_to_supermercado() {
let supermercado = Supermercado::from_url(&reqwest::Url::parse("https://diaonline.supermercadosdia.com.ar/repelente-para-mosquitos-off--family-aerosol-165-cc-6338/p").unwrap());
assert_eq!(supermercado, Some(Supermercado::Dia))
}
#[test]
fn supermercado_to_host() {
let host = Supermercado::Coto.host();
assert_eq!(host, "www.cotodigital3.com.ar")
}
}

View file

@ -1,2 +0,0 @@
.env
target

View file

@ -1,32 +0,0 @@
use clap::ValueEnum;
use reqwest::Url;
#[derive(ValueEnum, Clone, Debug, Copy)]
pub enum Supermercado {
Dia,
Jumbo,
Carrefour,
Coto,
Farmacity,
}
impl Supermercado {
pub fn host(&self) -> &'static str {
match self {
Self::Dia => "diaonline.supermercadosdia.com.ar",
Self::Carrefour => "www.carrefour.com.ar",
Self::Coto => "www.cotodigital3.com.ar",
Self::Jumbo => "www.jumbo.com.ar",
Self::Farmacity => "www.farmacity.com",
}
}
pub fn from_url(url: &Url) -> Option<Self> {
match url.host_str().unwrap() {
"www.carrefour.com.ar" => Some(Self::Carrefour),
"diaonline.supermercadosdia.com.ar" => Some(Self::Dia),
"www.cotodigital3.com.ar" => Some(Self::Coto),
"www.jumbo.com.ar" => Some(Self::Jumbo),
"www.farmacity.com" => Some(Self::Farmacity),
_ => None,
}
}
}