diff --git a/scraper-rs/src/main.rs b/scraper-rs/src/main.rs index 0e46c11..565b0fd 100644 --- a/scraper-rs/src/main.rs +++ b/scraper-rs/src/main.rs @@ -23,7 +23,7 @@ enum Supermercado { Coto, } impl Supermercado { - fn host(self: &Self) -> &'static str { + fn host(&self) -> &'static str { match self { Self::Dia => "diaonline.supermercadosdia.com.ar", Self::Carrefour => "www.carrefour.com.ar", @@ -128,8 +128,7 @@ async fn fetch_list(pool: &Pool, links: Vec) -> Counters { fn connect_db() -> Pool { let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); let cfg = deadpool_sqlite::Config::new(db_path); - let pool = cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap(); - pool + cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap() } fn build_client() -> reqwest::Client { @@ -283,13 +282,13 @@ async fn scrap_url( let url_p = Url::parse(&url).unwrap(); match url_p.host_str().unwrap() { "www.carrefour.com.ar" => { - sites::carrefour::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) + sites::carrefour::parse(url, &tl::parse(body, tl::ParserOptions::default())?) } "diaonline.supermercadosdia.com.ar" => { - sites::dia::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) + sites::dia::parse(url, &tl::parse(body, tl::ParserOptions::default())?) } "www.cotodigital3.com.ar" => { - sites::coto::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) + sites::coto::parse(url, &tl::parse(body, tl::ParserOptions::default())?) } "www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await, s => bail!("Unknown host {}", s), @@ -308,7 +307,7 @@ struct Auto { telegram: Option, } impl Auto { - async fn download_supermercado(self: Self, supermercado: Supermercado) -> anyhow::Result<()> { + async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> { { let t0 = now_sec(); self.get_and_save_urls(&supermercado).await?; @@ -360,7 +359,7 @@ impl Auto { Ok(()) } - async fn get_and_save_urls(self: &Self, supermercado: &Supermercado) -> anyhow::Result<()> { + async fn get_and_save_urls(&self, supermercado: &Supermercado) -> anyhow::Result<()> { let urls = get_urls(supermercado).await?; self.pool .get() @@ -386,7 +385,7 @@ impl Auto { Ok(()) } - async fn inform(self: &Self, msg: &str) { + async fn inform(&self, msg: &str) { println!("{}", msg); if let Some(telegram) = &self.telegram { let u = Url::parse_with_params( diff --git a/scraper-rs/src/sites/vtex.rs b/scraper-rs/src/sites/vtex.rs index b2a013c..77242a3 100644 --- a/scraper-rs/src/sites/vtex.rs +++ b/scraper-rs/src/sites/vtex.rs @@ -118,25 +118,6 @@ pub fn parse_urls_from_sitemap(sitemap: &str) -> anyhow::Result> { .try_collect() } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_decode_url() -> anyhow::Result<()> { - let links = parse_urls_from_sitemap( - r#" - - - https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g​-684952/p - 2024-01-12T10:41:25.962Z -"#, - )?; - assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p"); - Ok(()) - } -} - pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result> { let mut total: Vec = vec![]; let client = build_client(); @@ -146,7 +127,6 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result) -> anyhow::Result anyhow::Result<()> { + let links = parse_urls_from_sitemap( + r#" + + + https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g​-684952/p + 2024-01-12T10:41:25.962Z +"#, + )?; + assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p"); + Ok(()) + } +}