From ce0708738fc4685f0b485588db3f11a26aac1dfc Mon Sep 17 00:00:00 2001 From: Nulo Date: Tue, 30 Jan 2024 10:52:31 -0300 Subject: [PATCH] sqlx --- scraper-rs/.env | 1 + ...535425e5c2f39cf98735b5f67cb91d01937ce.json | 12 + ...7f6c0a78e7e025cf152a8c176b9fd1de241da.json | 12 + ...df4e257f3c9b0efa62c8342d077d69d826a69.json | 20 + ...8c67b47ec8a575d2a14a487b3294e0faec438.json | 12 + ...9744a8d5f70c83ed9ddddfd55009136088a52.json | 20 + scraper-rs/Cargo.lock | 755 +++++++++++++++++- scraper-rs/Cargo.toml | 3 +- scraper-rs/src/best_selling.rs | 27 +- scraper-rs/src/db.rs | 109 +++ scraper-rs/src/main.rs | 154 +--- scraper-rs/src/sites/common.rs | 4 +- scraper-rs/src/sites/coto.rs | 2 +- 13 files changed, 948 insertions(+), 183 deletions(-) create mode 100644 scraper-rs/.env create mode 100644 scraper-rs/.sqlx/query-08d55fc80c8a6ad73d311e8b1cd535425e5c2f39cf98735b5f67cb91d01937ce.json create mode 100644 scraper-rs/.sqlx/query-144f4622ac9a937aa4885ceb5a67f6c0a78e7e025cf152a8c176b9fd1de241da.json create mode 100644 scraper-rs/.sqlx/query-aa5c2a04aec149d88f6e25a9bd7df4e257f3c9b0efa62c8342d077d69d826a69.json create mode 100644 scraper-rs/.sqlx/query-d0c3a557a81f6685b242ed0be8e8c67b47ec8a575d2a14a487b3294e0faec438.json create mode 100644 scraper-rs/.sqlx/query-f249765f2fb013a81a4157a6ce19744a8d5f70c83ed9ddddfd55009136088a52.json create mode 100644 scraper-rs/src/db.rs diff --git a/scraper-rs/.env b/scraper-rs/.env new file mode 100644 index 0000000..83d2c4d --- /dev/null +++ b/scraper-rs/.env @@ -0,0 +1 @@ +DATABASE_URL=sqlite:../sqlite.db diff --git a/scraper-rs/.sqlx/query-08d55fc80c8a6ad73d311e8b1cd535425e5c2f39cf98735b5f67cb91d01937ce.json b/scraper-rs/.sqlx/query-08d55fc80c8a6ad73d311e8b1cd535425e5c2f39cf98735b5f67cb91d01937ce.json new file mode 100644 index 0000000..9f6eb81 --- /dev/null +++ b/scraper-rs/.sqlx/query-08d55fc80c8a6ad73d311e8b1cd535425e5c2f39cf98735b5f67cb91d01937ce.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT INTO producto_urls(url, first_seen, last_seen)\n VALUES (?1, ?2, ?2)\n ON CONFLICT(url) DO UPDATE SET last_seen=?2;", + "describe": { + "columns": [], + "parameters": { + "Right": 2 + }, + "nullable": [] + }, + "hash": "08d55fc80c8a6ad73d311e8b1cd535425e5c2f39cf98735b5f67cb91d01937ce" +} diff --git a/scraper-rs/.sqlx/query-144f4622ac9a937aa4885ceb5a67f6c0a78e7e025cf152a8c176b9fd1de241da.json b/scraper-rs/.sqlx/query-144f4622ac9a937aa4885ceb5a67f6c0a78e7e025cf152a8c176b9fd1de241da.json new file mode 100644 index 0000000..8bf6e78 --- /dev/null +++ b/scraper-rs/.sqlx/query-144f4622ac9a937aa4885ceb5a67f6c0a78e7e025cf152a8c176b9fd1de241da.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT INTO db_best_selling(fetched_at, category, eans_json)\n VALUES (?1, ?2, ?3);", + "describe": { + "columns": [], + "parameters": { + "Right": 3 + }, + "nullable": [] + }, + "hash": "144f4622ac9a937aa4885ceb5a67f6c0a78e7e025cf152a8c176b9fd1de241da" +} diff --git a/scraper-rs/.sqlx/query-aa5c2a04aec149d88f6e25a9bd7df4e257f3c9b0efa62c8342d077d69d826a69.json b/scraper-rs/.sqlx/query-aa5c2a04aec149d88f6e25a9bd7df4e257f3c9b0efa62c8342d077d69d826a69.json new file mode 100644 index 0000000..08c07a1 --- /dev/null +++ b/scraper-rs/.sqlx/query-aa5c2a04aec149d88f6e25a9bd7df4e257f3c9b0efa62c8342d077d69d826a69.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "SELECT url FROM producto_urls WHERE url LIKE ?1;", + "describe": { + "columns": [ + { + "name": "url", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "aa5c2a04aec149d88f6e25a9bd7df4e257f3c9b0efa62c8342d077d69d826a69" +} diff --git a/scraper-rs/.sqlx/query-d0c3a557a81f6685b242ed0be8e8c67b47ec8a575d2a14a487b3294e0faec438.json b/scraper-rs/.sqlx/query-d0c3a557a81f6685b242ed0be8e8c67b47ec8a575d2a14a487b3294e0faec438.json new file mode 100644 index 0000000..b870e21 --- /dev/null +++ b/scraper-rs/.sqlx/query-d0c3a557a81f6685b242ed0be8e8c67b47ec8a575d2a14a487b3294e0faec438.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);", + "describe": { + "columns": [], + "parameters": { + "Right": 9 + }, + "nullable": [] + }, + "hash": "d0c3a557a81f6685b242ed0be8e8c67b47ec8a575d2a14a487b3294e0faec438" +} diff --git a/scraper-rs/.sqlx/query-f249765f2fb013a81a4157a6ce19744a8d5f70c83ed9ddddfd55009136088a52.json b/scraper-rs/.sqlx/query-f249765f2fb013a81a4157a6ce19744a8d5f70c83ed9ddddfd55009136088a52.json new file mode 100644 index 0000000..eeb8a74 --- /dev/null +++ b/scraper-rs/.sqlx/query-f249765f2fb013a81a4157a6ce19744a8d5f70c83ed9ddddfd55009136088a52.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "SELECT ean FROM precios WHERE url = ?1;", + "describe": { + "columns": [ + { + "name": "ean", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "f249765f2fb013a81a4157a6ce19744a8d5f70c83ed9ddddfd55009136088a52" +} diff --git a/scraper-rs/Cargo.lock b/scraper-rs/Cargo.lock index 30ea007..fc60e87 100644 --- a/scraper-rs/Cargo.lock +++ b/scraper-rs/Cargo.lock @@ -35,6 +35,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", + "getrandom 0.2.11", "once_cell", "version_check", "zerocopy", @@ -145,14 +146,22 @@ dependencies = [ ] [[package]] -name = "async-trait" -version = "0.1.77" +name = "atoi" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" dependencies = [ - "proc-macro2", - "quote", - "syn", + "num-traits", +] + +[[package]] +name = "atomic-write-file" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edcdbedc2236483ab103a53415653d6b4442ea6141baf1ffa85df29635e88436" +dependencies = [ + "nix", + "rand 0.8.5", ] [[package]] @@ -182,6 +191,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "bitflags" version = "1.3.2" @@ -193,6 +208,18 @@ name = "bitflags" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +dependencies = [ + "serde", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] [[package]] name = "brotli" @@ -221,6 +248,12 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.5.0" @@ -287,7 +320,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -302,6 +335,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "core-foundation" version = "0.9.4" @@ -318,6 +357,30 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.3.2" @@ -339,51 +402,67 @@ dependencies = [ ] [[package]] -name = "deadpool" -version = "0.10.0" +name = "crossbeam-queue" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" dependencies = [ - "async-trait", - "deadpool-runtime", - "num_cpus", - "tokio", + "crossbeam-utils", ] [[package]] -name = "deadpool-runtime" -version = "0.1.3" +name = "crossbeam-utils" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63dfa964fe2a66f3fde91fc70b267fe193d822c7e603e2a675a49a7f46ad3f49" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ - "tokio", + "generic-array", + "typenum", ] [[package]] -name = "deadpool-sqlite" -version = "0.7.0" +name = "der" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8010e36e12f3be22543a5e478b4af20aeead9a700dd69581a5e050a070fc22c" +checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" dependencies = [ - "deadpool", - "deadpool-sync", - "rusqlite", + "const-oid", + "pem-rfc7468", + "zeroize", ] [[package]] -name = "deadpool-sync" -version = "0.1.2" +name = "digest" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8db70494c13cae4ce67b4b4dafdaf828cf0df7237ab5b9e2fcabee4965d0a0a" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "deadpool-runtime", + "block-buffer", + "const-oid", + "crypto-common", + "subtle", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "either" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +dependencies = [ + "serde", +] [[package]] name = "encoding_rs" @@ -400,6 +479,33 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -412,6 +518,18 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "finl_unicode" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" + [[package]] name = "flate2" version = "1.0.28" @@ -422,6 +540,17 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + [[package]] name = "fnv" version = "1.0.7" @@ -479,6 +608,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot 0.12.1", +] + [[package]] name = "futures-io" version = "0.3.30" @@ -493,7 +633,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -526,6 +666,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -597,6 +747,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "hermit-abi" @@ -604,6 +757,39 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "html-escape" version = "0.2.13" @@ -772,6 +958,9 @@ name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +dependencies = [ + "spin 0.5.2", +] [[package]] name = "libc" @@ -779,16 +968,29 @@ version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libsqlite3-sys" version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716" dependencies = [ + "cc", "pkg-config", "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + [[package]] name = "lock_api" version = "0.4.11" @@ -805,6 +1007,16 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.1" @@ -852,6 +1064,17 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.1", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -872,6 +1095,44 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -879,6 +1140,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -960,6 +1222,21 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -978,6 +1255,27 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.28" @@ -1158,11 +1456,31 @@ dependencies = [ "cc", "getrandom 0.2.11", "libc", - "spin", + "spin 0.9.8", "untrusted", "windows-sys 0.48.0", ] +[[package]] +name = "rsa" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rusqlite" version = "0.30.0" @@ -1183,6 +1501,19 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "rustls" version = "0.21.10" @@ -1236,8 +1567,6 @@ dependencies = [ "chrono", "clap", "cron", - "deadpool", - "deadpool-sqlite", "futures", "html-escape", "itertools", @@ -1249,6 +1578,7 @@ dependencies = [ "serde", "serde_json", "simple-error", + "sqlx", "thiserror", "tl", "tokio", @@ -1283,7 +1613,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1309,6 +1639,28 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1327,6 +1679,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simple-error" version = "0.3.0" @@ -1358,11 +1720,248 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dba03c279da73694ef99763320dea58b51095dfe87d001b1d4b5fe78ba8763cf" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d84b0a3c3739e220d94b3239fd69fb1f74bc36e16643423bd99de3b43c21bfbd" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "dotenvy", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink", + "hex", + "indexmap", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89961c00dc4d7dffb7aee214964b065072bff69e36ddb9e2c107541f75e4f2a5" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 1.0.109", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0bd4519486723648186a08785143599760f7cc81c52334a55d6a83ea1e20841" +dependencies = [ + "atomic-write-file", + "dotenvy", + "either", + "heck", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-sqlite", + "syn 1.0.109", + "tempfile", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e37195395df71fd068f6e2082247891bc11e3289624bbc776a0cdfa1ca7f1ea4" +dependencies = [ + "atoi", + "base64", + "bitflags 2.4.1", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6ac0ac3b7ccd10cc96c7ab29791a7dd236bd94021f31eec7ba3d46a74aa1c24" +dependencies = [ + "atoi", + "base64", + "bitflags 2.4.1", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "210976b7d948c7ba9fced8ca835b11cbb2d677c59c79de41ac0d397e14547490" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "sqlx-core", + "tracing", + "url", + "urlencoding", +] + +[[package]] +name = "stringprep" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" +dependencies = [ + "finl_unicode", + "unicode-bidi", + "unicode-normalization", +] [[package]] name = "strsim" @@ -1370,6 +1969,23 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.48" @@ -1402,6 +2018,19 @@ dependencies = [ "libc", ] +[[package]] +name = "tempfile" +version = "3.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall 0.4.1", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "thiserror" version = "1.0.56" @@ -1419,7 +2048,7 @@ checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1479,7 +2108,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1504,6 +2133,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.10" @@ -1530,6 +2170,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -1543,7 +2184,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] [[package]] @@ -1587,6 +2228,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-bidi" version = "0.3.14" @@ -1608,6 +2255,18 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "untrusted" version = "0.9.0" @@ -1625,6 +2284,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8-width" version = "0.1.7" @@ -1697,7 +2362,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -1731,7 +2396,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1773,6 +2438,12 @@ version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" +[[package]] +name = "whoami" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22fc3756b8a9133049b26c7f61ab35416c130e8c09b660f5b3958b446f52cc50" + [[package]] name = "winapi" version = "0.3.9" @@ -1963,5 +2634,11 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.48", ] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/scraper-rs/Cargo.toml b/scraper-rs/Cargo.toml index e8b1a0a..809f941 100644 --- a/scraper-rs/Cargo.toml +++ b/scraper-rs/Cargo.toml @@ -12,8 +12,7 @@ base64 = "0.21.7" chrono = "0.4.32" clap = { version = "4.4.15", features = ["derive"] } cron = "0.12.0" -deadpool = "0.10.0" -deadpool-sqlite = "0.7.0" +sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite" ] } futures = "0.3.30" html-escape = "0.2.13" itertools = "0.12.0" diff --git a/scraper-rs/src/best_selling.rs b/scraper-rs/src/best_selling.rs index 2f3e586..23f5cd9 100644 --- a/scraper-rs/src/best_selling.rs +++ b/scraper-rs/src/best_selling.rs @@ -1,9 +1,8 @@ use std::collections::HashMap; -use crate::{build_client, sites::vtex, supermercado::Supermercado}; +use crate::{build_client, db::Db, sites::vtex, supermercado::Supermercado}; use chrono::{DateTime, Utc}; use clap::ValueEnum; -use deadpool_sqlite::Pool; use futures::{stream, FutureExt, StreamExt, TryStreamExt}; use itertools::Itertools; use tracing::warn; @@ -49,21 +48,11 @@ pub struct BestSellingRecord { pub eans: Vec, } -async fn get_best_selling_eans(pool: &Pool, urls: Vec) -> anyhow::Result> { +async fn get_best_selling_eans(db: &Db, urls: Vec) -> anyhow::Result> { let mut eans: Vec = Vec::new(); for url in urls { - let q = url.clone(); - let ean = pool - .get() - .await? - .interact(move |conn| { - conn.prepare(r#"SELECT ean FROM precios WHERE url = ?1;"#)? - .query_map(rusqlite::params![q], |r| r.get::<_, String>(0)) - .map(|r| r.map(|r| r.unwrap()).next()) - }) - .await - .unwrap()?; + let ean = db.get_ean_by_url(&url).await?; match ean { Some(e) => eans.push(e), None => warn!("No encontrĂ© EAN para {}", url), @@ -75,13 +64,13 @@ async fn get_best_selling_eans(pool: &Pool, urls: Vec) -> anyhow::Result async fn try_get_best_selling_eans( client: reqwest::Client, - pool: Pool, + db: Db, supermercado: &Supermercado, category: &Category, ) -> anyhow::Result>> { if let Some(query) = category.query(supermercado) { let urls = vtex::get_best_selling_by_category(&client, supermercado.host(), query).await?; - let eans = get_best_selling_eans(&pool, urls).await?; + let eans = get_best_selling_eans(&db, urls).await?; Ok(Some(eans)) } else { Ok(None) @@ -107,18 +96,18 @@ fn rank_eans(eans: Vec>) -> Vec { .collect_vec() } -pub async fn get_all_best_selling(pool: &Pool) -> anyhow::Result> { +pub async fn get_all_best_selling(db: &Db) -> anyhow::Result> { let client = &build_client(); stream::iter(Category::value_variants()) .map(|category| { stream::iter(Supermercado::value_variants()) .map(|supermercado| { - let pool = pool.clone(); + let db = db.clone(); let client = client.clone(); tokio::spawn(try_get_best_selling_eans( client, - pool, + db, supermercado, category, )) diff --git a/scraper-rs/src/db.rs b/scraper-rs/src/db.rs new file mode 100644 index 0000000..2385f04 --- /dev/null +++ b/scraper-rs/src/db.rs @@ -0,0 +1,109 @@ +use std::{ + env, + str::FromStr, + time::{SystemTime, UNIX_EPOCH}, +}; + +use sqlx::{sqlite::SqliteConnectOptions, SqlitePool}; + +use crate::{best_selling::BestSellingRecord, PrecioPoint}; + +#[derive(Clone)] +pub struct Db { + pool: SqlitePool, +} + +impl Db { + pub async fn connect() -> anyhow::Result { + let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); + let pool = sqlx::pool::PoolOptions::new() + .max_connections(1) + .connect_with( + SqliteConnectOptions::from_str(&db_path)? + .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) + .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) + .optimize_on_close(true, None), + ) + .await?; + Ok(Self { pool }) + } + + pub async fn insert_precio(&self, point: PrecioPoint) -> anyhow::Result<()> { + sqlx::query!("INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);", + point.ean, + point.fetched_at, + point.precio_centavos, + point.in_stock, + point.url, + None::, + point.parser_version, + point.name, + point.image_url, + ).execute(&self.pool).await?; + Ok(()) + } + + pub async fn get_ean_by_url(&self, url: &str) -> anyhow::Result> { + Ok(sqlx::query!("SELECT ean FROM precios WHERE url = ?1;", url) + .fetch_optional(&self.pool) + .await? + .map(|r| r.ean)) + } + + pub async fn get_urls_by_domain(&self, domain: &str) -> anyhow::Result> { + let query = format!("%{}%", domain); + Ok( + sqlx::query!("SELECT url FROM producto_urls WHERE url LIKE ?1;", query) + .fetch_all(&self.pool) + .await? + .into_iter() + .map(|r| r.url) + .collect(), + ) + } + + pub async fn save_producto_urls(&self, urls: Vec) -> anyhow::Result<()> { + let now: i64 = now_ms().try_into()?; + let mut tx = self.pool.begin().await?; + for url in urls { + sqlx::query!( + r#"INSERT INTO producto_urls(url, first_seen, last_seen) + VALUES (?1, ?2, ?2) + ON CONFLICT(url) DO UPDATE SET last_seen=?2;"#, + url, + now + ) + .execute(&mut *tx) + .await?; + } + tx.commit().await?; + Ok(()) + } + + pub async fn save_best_selling(&self, records: Vec) -> anyhow::Result<()> { + let mut tx = self.pool.begin().await?; + for record in records { + let fetched_at = record.fetched_at.timestamp_millis(); + let category = record.category.id(); + let eans_json = serde_json::Value::from(record.eans).to_string(); + sqlx::query!( + r#"INSERT INTO db_best_selling(fetched_at, category, eans_json) + VALUES (?1, ?2, ?3);"#, + fetched_at, + category, + eans_json + ) + .execute(&mut *tx) + .await?; + } + tx.commit().await?; + Ok(()) + } +} + +fn now_ms() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_millis() +} diff --git a/scraper-rs/src/main.rs b/scraper-rs/src/main.rs index 308f0eb..3968b12 100644 --- a/scraper-rs/src/main.rs +++ b/scraper-rs/src/main.rs @@ -1,8 +1,7 @@ use again::RetryPolicy; -use best_selling::BestSellingRecord; use clap::{Parser, ValueEnum}; use cron::Schedule; -use deadpool_sqlite::Pool; +use db::Db; use futures::{future, stream, Future, StreamExt}; use nanoid::nanoid; use reqwest::{header::HeaderMap, StatusCode, Url}; @@ -73,7 +72,7 @@ async fn scrap_url_cli(url: String) -> anyhow::Result<()> { } mod best_selling; async fn scrap_best_selling_cli() -> anyhow::Result<()> { - let db = connect_db(); + let db = Db::connect().await?; let res = best_selling::get_all_best_selling(&db).await; println!("Result: {:#?}", res); @@ -89,14 +88,14 @@ async fn fetch_list_cli(links_list_path: String) -> anyhow::Result<()> { .map(|s| s.to_owned()) .collect::>(); - let pool = connect_db(); - let counters = fetch_list(&pool, links).await; + let db = Db::connect().await?; + let counters = fetch_list(&db, links).await; println!("Finished: {:?}", counters); Ok(()) } -async fn fetch_list(pool: &Pool, links: Vec) -> Counters { +async fn fetch_list(db: &Db, links: Vec) -> Counters { let n_coroutines = env::var("N_COROUTINES") .map_or(Ok(24), |s| s.parse::()) .expect("N_COROUTINES no es un nĂºmero"); @@ -105,9 +104,9 @@ async fn fetch_list(pool: &Pool, links: Vec) -> Counters { stream::iter(links) .map(|url| { - let pool = pool.clone(); + let db = db.clone(); let client = client.clone(); - tokio::spawn(fetch_and_save(client, url, pool)) + tokio::spawn(fetch_and_save(client, url, db)) }) .buffer_unordered(n_coroutines) .fold(Counters::default(), move |x, y| { @@ -121,11 +120,7 @@ async fn fetch_list(pool: &Pool, links: Vec) -> Counters { .await } -fn connect_db() -> Pool { - let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); - let cfg = deadpool_sqlite::Config::new(db_path); - cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap() -} +mod db; #[derive(Default, Debug)] struct Counters { @@ -134,26 +129,13 @@ struct Counters { skipped: u64, } -async fn fetch_and_save(client: reqwest::Client, url: String, pool: Pool) -> Counters { +async fn fetch_and_save(client: reqwest::Client, url: String, db: Db) -> Counters { let res = fetch_and_parse(&client, url.clone()).await; let mut counters = Counters::default(); match res { Ok(res) => { counters.success += 1; - pool.get().await.unwrap().interact(move |conn| conn.execute( - "INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);", - rusqlite::params![ - res.ean, - res.fetched_at, - res.precio_centavos, - res.in_stock, - res.url, - None::, - res.parser_version, - res.name, - res.image_url, - ] - )).await.unwrap().unwrap(); + db.insert_precio(res).await.unwrap(); } Err(err) => { match err.downcast_ref::() { @@ -301,7 +283,7 @@ struct AutoTelegram { #[derive(Clone)] struct Auto { - pool: Pool, + db: Db, telegram: Option, } impl Auto { @@ -316,24 +298,7 @@ impl Auto { )) .await; } - let links: Vec = { - let search = format!("%{}%", supermercado.host()); - self.pool - .get() - .await? - .interact(move |conn| -> anyhow::Result> { - Ok(conn - .prepare( - r#"SELECT url FROM producto_urls - WHERE url LIKE ?1;"#, - )? - .query_map(rusqlite::params![search], |r| r.get::<_, String>(0))? - .map(|r| r.unwrap()) - .collect()) - }) - .await - .unwrap()? - }; + let links: Vec = self.db.get_urls_by_domain(supermercado.host()).await?; // { // let debug_path = PathBuf::from("debug/"); // tokio::fs::create_dir_all(&debug_path).await.unwrap(); @@ -345,7 +310,7 @@ impl Auto { // } { let t0 = now_sec(); - let counters = fetch_list(&self.pool, links).await; + let counters = fetch_list(&self.db, links).await; self.inform(&format!( "Downloaded {:?}: {:?} (took {})", &supermercado, @@ -368,56 +333,7 @@ impl Auto { async fn get_and_save_urls(&self, supermercado: &Supermercado) -> anyhow::Result<()> { let urls = get_urls(supermercado).await?; - self.pool - .get() - .await? - .interact(|conn| -> Result<(), anyhow::Error> { - let tx = conn.transaction()?; - { - let mut stmt = tx.prepare( - r#"INSERT INTO producto_urls(url, first_seen, last_seen) - VALUES (?1, ?2, ?2) - ON CONFLICT(url) DO UPDATE SET last_seen=?2;"#, - )?; - let now: u64 = now_ms().try_into()?; - for url in urls { - stmt.execute(rusqlite::params![url, now])?; - } - } - tx.commit()?; - Ok(()) - }) - .await - .unwrap()?; - Ok(()) - } - - async fn save_best_selling(&self, best_selling: Vec) -> anyhow::Result<()> { - self.pool - .get() - .await? - .interact(move |conn| -> Result<(), anyhow::Error> { - let tx = conn.transaction()?; - { - let mut stmt = tx.prepare( - r#"INSERT INTO db_best_selling(fetched_at, category, eans_json) - VALUES (?1, ?2, ?3);"#, - )?; - for record in best_selling { - let eans_json = serde_json::Value::from(record.eans).to_string(); - let fetched_at = record.fetched_at.timestamp_millis(); - stmt.execute(rusqlite::params![ - fetched_at, - record.category.id(), - eans_json - ])?; - } - } - tx.commit()?; - Ok(()) - }) - .await - .unwrap()?; + self.db.save_producto_urls(urls).await?; Ok(()) } @@ -438,20 +354,22 @@ impl Auto { } async fn auto_cli() -> anyhow::Result<()> { - let db = connect_db(); - let telegram = { - match ( - env::var("TELEGRAM_BOT_TOKEN"), - env::var("TELEGRAM_BOT_CHAT_ID"), - ) { - (Ok(token), Ok(chat_id)) => Some(AutoTelegram { token, chat_id }), - _ => { - tracing::warn!("No token or chat_id for telegram"); - None + let auto = { + let db = Db::connect().await?; + let telegram = { + match ( + env::var("TELEGRAM_BOT_TOKEN"), + env::var("TELEGRAM_BOT_CHAT_ID"), + ) { + (Ok(token), Ok(chat_id)) => Some(AutoTelegram { token, chat_id }), + _ => { + tracing::warn!("No token or chat_id for telegram"); + None + } } - } + }; + Auto { db, telegram } }; - let auto = Auto { pool: db, telegram }; auto.inform("[auto] Empezando scrap").await; let handles: Vec<_> = Supermercado::value_variants() .iter() @@ -462,10 +380,10 @@ async fn auto_cli() -> anyhow::Result<()> { let best_selling = auto .inform_time( "Downloaded best selling", - best_selling::get_all_best_selling(&auto.pool), + best_selling::get_all_best_selling(&auto.db), ) .await?; - auto.save_best_selling(best_selling).await?; + auto.db.save_best_selling(best_selling).await?; Ok(()) } @@ -494,8 +412,8 @@ mod sites; struct PrecioPoint { ean: String, // unix - fetched_at: u64, - precio_centavos: Option, + fetched_at: i64, + precio_centavos: Option, in_stock: Option, url: String, parser_version: u16, @@ -503,13 +421,9 @@ struct PrecioPoint { image_url: Option, } -fn now_sec() -> u64 { - since_the_epoch().as_secs() +fn now_sec() -> i64 { + since_the_epoch().as_secs().try_into().unwrap() } -fn now_ms() -> u128 { - since_the_epoch().as_millis() -} - fn since_the_epoch() -> Duration { SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/scraper-rs/src/sites/common.rs b/scraper-rs/src/sites/common.rs index d75b03e..badafb3 100644 --- a/scraper-rs/src/sites/common.rs +++ b/scraper-rs/src/sites/common.rs @@ -11,9 +11,9 @@ pub fn get_meta_content<'a>(dom: &'a VDom<'a>, prop: &str) -> Option) -> Result, anyhow::Error> { +pub fn price_from_meta(dom: &tl::VDom<'_>) -> Result, anyhow::Error> { let precio_centavos = get_meta_content(dom, "product:price:amount") - .map(|s| s.parse::().map(|f| (f * 100.0) as u64)) + .map(|s| s.parse::().map(|f| (f * 100.0) as i64)) .transpose()?; Ok(precio_centavos) } diff --git a/scraper-rs/src/sites/coto.rs b/scraper-rs/src/sites/coto.rs index 2e89664..3f1e4d8 100644 --- a/scraper-rs/src/sites/coto.rs +++ b/scraper-rs/src/sites/coto.rs @@ -37,7 +37,7 @@ pub fn parse(url: String, dom: &tl::VDom) -> Result }) .transpose() .context("Parseando precio")? - .map(|f| (f * 100.0) as u64); + .map(|f| (f * 100.0) as i64); let in_stock = Some( dom.query_selector(".product_not_available")