From 613efc3111aef08b798c216a12dfbc3f6f6d3fbe Mon Sep 17 00:00:00 2001 From: Nulo Date: Wed, 10 Jan 2024 21:44:35 -0300 Subject: [PATCH] warcificator: restructuracion masiva --- .gitignore | 3 +- warcificator/Cargo.lock | 340 ++++++++++++++++++++++++++++++--------- warcificator/Cargo.toml | 11 +- warcificator/src/main.rs | 123 +++++++++----- 4 files changed, 354 insertions(+), 123 deletions(-) diff --git a/.gitignore b/.gitignore index d3a88c9..7c32af8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ target/ .env.* */flamegraph.svg -*/perf.data* \ No newline at end of file +*/perf.data* +warcificator/debug/ \ No newline at end of file diff --git a/warcificator/Cargo.lock b/warcificator/Cargo.lock index bac9aa5..2a6c63e 100644 --- a/warcificator/Cargo.lock +++ b/warcificator/Cargo.lock @@ -17,6 +17,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "again" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05802a5ad4d172eaf796f7047b42d0af9db513585d16d4169660a21613d34b93" +dependencies = [ + "log", + "rand 0.7.3", + "wasm-timer", +] + [[package]] name = "ahash" version = "0.8.7" @@ -198,16 +209,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82a9b73a36529d9c47029b9fb3a6f0ea3cc916a261195352ba19e770fc1748b2" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.18" @@ -218,13 +219,10 @@ dependencies = [ ] [[package]] -name = "deranged" -version = "0.3.11" +name = "either" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encoding_rs" @@ -299,6 +297,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -306,6 +319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -314,6 +328,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -332,10 +374,27 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", ] [[package]] @@ -346,7 +405,7 @@ checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", ] [[package]] @@ -491,6 +550,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + [[package]] name = "ipnet" version = "2.9.0" @@ -578,10 +646,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys", ] +[[package]] +name = "nanoid" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ffa00dec017b5b1a8b7cf5e2c008bfda1aa7e0697ac1508b491fdf2622fb4d8" +dependencies = [ + "rand 0.8.5", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -629,6 +706,17 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + [[package]] name = "parking_lot" version = "0.12.1" @@ -636,7 +724,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core", + "parking_lot_core 0.9.9", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall 0.2.16", + "smallvec", + "winapi", ] [[package]] @@ -647,7 +749,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.4.1", "smallvec", "windows-targets", ] @@ -677,29 +779,109 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" [[package]] -name = "powerfmt" -version = "0.2.0" +name = "ppv-lite86" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.71" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.11", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -741,6 +923,7 @@ dependencies = [ "system-configuration", "tokio", "tokio-rustls", + "tokio-socks", "tokio-util", "tower-service", "url", @@ -758,7 +941,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", - "getrandom", + "getrandom 0.2.11", "libc", "spin", "untrusted", @@ -899,6 +1082,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simple-error" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8542b68b8800c3cda649d2c72d688b6907b30f1580043135d61669d4aad1c175" + [[package]] name = "slab" version = "0.4.9" @@ -932,9 +1121,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "syn" -version = "2.0.43" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -964,18 +1153,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.55" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e3de26b0965292219b4287ff031fcba86837900fe9cd2b34ea8ad893c0953d2" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.55" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268026685b2be38d7103e9e507c938a1fcb3d7e6eb15e87870b617bf37b6d581" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", @@ -992,35 +1181,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "time" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" -dependencies = [ - "deranged", - "itoa", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "time-macros" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" -dependencies = [ - "time-core", -] - [[package]] name = "tinyvec" version = "1.6.0" @@ -1052,7 +1212,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", @@ -1081,6 +1241,18 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-socks" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51165dfa029d2a65969413a6cc96f354b86b464498702f174a4efa13608fd8c0" +dependencies = [ + "either", + "futures-util", + "thiserror", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.10" @@ -1107,24 +1279,11 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", "pin-project-lite", "tracing-attributes", "tracing-core", ] -[[package]] -name = "tracing-appender" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" -dependencies = [ - "crossbeam-channel", - "thiserror", - "time", - "tracing-subscriber", -] - [[package]] name = "tracing-attributes" version = "0.1.27" @@ -1246,18 +1405,28 @@ dependencies = [ name = "warcificator" version = "0.1.0" dependencies = [ + "again", "async-channel", + "nanoid", + "rand 0.8.5", "reqwest", "rusqlite", "serde", "serde_json", + "simple-error", + "thiserror", "tl", "tokio", "tracing", - "tracing-appender", "tracing-subscriber", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1330,6 +1499,21 @@ version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +[[package]] +name = "wasm-timer" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f" +dependencies = [ + "futures", + "js-sys", + "parking_lot 0.11.2", + "pin-utils", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.66" diff --git a/warcificator/Cargo.toml b/warcificator/Cargo.toml index a9f7076..2f53a73 100644 --- a/warcificator/Cargo.toml +++ b/warcificator/Cargo.toml @@ -6,19 +6,24 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +again = "0.1.2" async-channel = "2.1.1" +nanoid = "0.4.0" +rand = "0.8.5" # lol_html = "1.2.0" reqwest = { version = "0.11.23", default-features = false, features = [ "rustls-tls", "gzip", "brotli", + "socks", ] } rusqlite = "0.30.0" # scraper = "0.18.1" serde = { version = "1.0.193", features = ["derive"] } serde_json = "1.0.109" +simple-error = "0.3.0" +thiserror = "1.0.56" tl = { git = "https://github.com/evertedsphere/tl", branch = "patch-1", features = ["simd"] } tokio = { version = "1.35.1", features = ["full"] } -tracing = { version = "0.1", features = ["log"] } -tracing-appender = "0.2.3" -tracing-subscriber = "0.3.18" +tracing = "0.1" +tracing-subscriber = "0.3" diff --git a/warcificator/src/main.rs b/warcificator/src/main.rs index 77070cb..f329daa 100644 --- a/warcificator/src/main.rs +++ b/warcificator/src/main.rs @@ -1,11 +1,18 @@ +use again::RetryPolicy; use async_channel::{Receiver, Sender}; +use nanoid::nanoid; +use rand::seq::SliceRandom; +use reqwest::Url; use rusqlite::Connection; +use simple_error::{bail, SimpleError}; use std::{ borrow::Cow, env::{self, args}, fs, - time::{SystemTime, UNIX_EPOCH}, + path::PathBuf, + time::{Duration, SystemTime, UNIX_EPOCH}, }; +use thiserror::Error; use tl::VDom; use tokio::io::{stderr, AsyncWriteExt}; @@ -95,14 +102,16 @@ struct PrecioPoint { // } #[tokio::main] -async fn main() { +async fn main() -> Result<(), Box> { + tracing_subscriber::fmt::init(); + let mut args = args().skip(1); - let links_list_path = args.next().unwrap(); + let links_list_path = args.next().expect("Falta arg para path de lista de urls"); let links_str = fs::read_to_string(links_list_path).unwrap(); let links = links_str - .split("\n") + .split('\n') .map(|s| s.trim()) - .filter(|s| s.len() > 0) + .filter(|s| !s.is_empty()) .map(|s| s.to_owned()) .collect::>(); @@ -112,8 +121,8 @@ async fn main() { let mut handles = Vec::new(); for _ in 1..env::var("N_COROUTINES") - .map_or(Ok(32), |s| s.parse::()) - .unwrap() + .map_or(Ok(128), |s| s.parse::()) + .expect("N_COROUTINES no es un nĂºmero") { let rx = receiver.clone(); let tx = res_sender.clone(); @@ -134,6 +143,7 @@ async fn main() { db_writer_handle }; handle.await.unwrap(); + Ok(()) } async fn worker(rx: Receiver, tx: Sender) { @@ -145,46 +155,68 @@ async fn worker(rx: Receiver, tx: Sender) { tx.send(ex).await.unwrap(); } Err(err) => { - stderr() - .write_all(format!("Failed to fetch {}: {:?}\n", url.as_str(), err).as_bytes()) - .await - .unwrap(); + tracing::error!(error=%err, url=url); } } } } -#[derive(Debug)] +#[derive(Debug, Error)] enum FetchError { - HttpError(reqwest::Error), - ParseError(&'static str), + #[error("reqwest error")] + Http(#[from] reqwest::Error), + #[error("http status: {0}")] + HttpStatus(reqwest::StatusCode), + #[error("parse error")] + Parse(#[from] SimpleError), + #[error("tl error")] + Tl(#[from] tl::ParseError), } +#[tracing::instrument(skip(client))] async fn fetch_and_parse(client: &reqwest::Client, url: String) -> Result { - let request = client.get(url.as_str()).build().unwrap(); - let response = client - .execute(request) - .await - .map_err(|e| FetchError::HttpError(e))?; - let body = response - .text() - .await - .map_err(|e| FetchError::HttpError(e))?; + let policy = RetryPolicy::exponential(Duration::from_millis(300)) + .with_max_retries(10) + .with_jitter(true); - let dom = tl::parse(&body, tl::ParserOptions::default()).unwrap(); - // let parser = dom.parser(); + let response = policy + .retry(|| { + let request = client.get(url.as_str()).build().unwrap(); + client.execute(request) + }) + .await + .map_err(FetchError::Http)?; + if !response.status().is_success() { + return Err(FetchError::HttpStatus(response.status())); + } + let body = response.text().await.map_err(FetchError::Http)?; - let point = parse_carrefour(url, &dom)?; + let maybe_point = { + let dom = tl::parse(&body, tl::ParserOptions::default()).map_err(FetchError::Tl)?; + parse_carrefour(url, &dom) + }; + + let point = match maybe_point { + Ok(p) => Ok(p), + Err(err) => { + let debug_path = PathBuf::from("debug/"); + tokio::fs::create_dir_all(&debug_path).await.unwrap(); + let file_path = debug_path.join(format!("{}.html", nanoid!())); + tokio::fs::write(&file_path, &body).await.unwrap(); + tracing::debug!(error=%err, "Failed to parse, saved body at {}",file_path.display()); + Err(err) + } + }?; Ok(point) } -fn parse_carrefour(url: String, dom: &tl::VDom) -> Result { +fn parse_carrefour(url: String, dom: &tl::VDom) -> Result { let precio_centavos = { get_meta_content(dom, "product:price:amount")? .map(|s| { s.parse::() - .map_err(|_| FetchError::ParseError("Failed to parse number")) + .map_err(|_| SimpleError::new("Failed to parse number")) }) .transpose() .map(|f| f.map(|f| (f * 100.0) as u64)) @@ -195,7 +227,7 @@ fn parse_carrefour(url: String, dom: &tl::VDom) -> Result match s.as_ref() { "oos" => Some(false), "instock" => Some(true), - _ => return Err(FetchError::ParseError("Not a valid product:availability")), + _ => return Err(SimpleError::new("Not a valid product:availability")), }, None => None, }; @@ -204,7 +236,10 @@ fn parse_carrefour(url: String, dom: &tl::VDom) -> Result Result Result Result(dom: &'a VDom<'a>, prop: &str) -> Result>, FetchError> { +fn get_meta_content<'a>( + dom: &'a VDom<'a>, + prop: &str, +) -> Result>, SimpleError> { let tag = &dom .query_selector(&format!("meta[property=\"{}\"]", prop)) .and_then(|mut iter| iter.next()) @@ -259,14 +297,14 @@ fn get_meta_content<'a>(dom: &'a VDom<'a>, prop: &str) -> Result Ok(None), } } -fn parse_script_json(dom: &VDom, varname: &str) -> Result { +fn parse_script_json(dom: &VDom, varname: &str) -> Result { let parser = dom.parser(); let inner_html = &dom .query_selector(&format!( @@ -282,11 +320,11 @@ fn parse_script_json(dom: &VDom, varname: &str) -> Result u64 { @@ -300,7 +338,10 @@ fn now_sec() -> u64 { async fn db_writer(rx: Receiver) { // let conn = Connection::open("../scraper/sqlite.db").unwrap(); // let mut stmt = conn.prepare("SELECT id, name, data FROM person")?; + let mut n = 0; while let Ok(res) = rx.recv().await { - println!("{:?}", res) + n += 1; + println!("{}", n); + // println!("{:?}", res) } }