seguir al comandante clippy

This commit is contained in:
Cat /dev/Nulo 2024-01-25 17:12:32 -03:00
parent cce34571f1
commit c687ea1484
2 changed files with 27 additions and 29 deletions

View file

@ -23,7 +23,7 @@ enum Supermercado {
Coto, Coto,
} }
impl Supermercado { impl Supermercado {
fn host(self: &Self) -> &'static str { fn host(&self) -> &'static str {
match self { match self {
Self::Dia => "diaonline.supermercadosdia.com.ar", Self::Dia => "diaonline.supermercadosdia.com.ar",
Self::Carrefour => "www.carrefour.com.ar", Self::Carrefour => "www.carrefour.com.ar",
@ -128,8 +128,7 @@ async fn fetch_list(pool: &Pool, links: Vec<String>) -> Counters {
fn connect_db() -> Pool { fn connect_db() -> Pool {
let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string());
let cfg = deadpool_sqlite::Config::new(db_path); let cfg = deadpool_sqlite::Config::new(db_path);
let pool = cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap(); cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap()
pool
} }
fn build_client() -> reqwest::Client { fn build_client() -> reqwest::Client {
@ -283,13 +282,13 @@ async fn scrap_url(
let url_p = Url::parse(&url).unwrap(); let url_p = Url::parse(&url).unwrap();
match url_p.host_str().unwrap() { match url_p.host_str().unwrap() {
"www.carrefour.com.ar" => { "www.carrefour.com.ar" => {
sites::carrefour::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) sites::carrefour::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
} }
"diaonline.supermercadosdia.com.ar" => { "diaonline.supermercadosdia.com.ar" => {
sites::dia::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) sites::dia::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
} }
"www.cotodigital3.com.ar" => { "www.cotodigital3.com.ar" => {
sites::coto::parse(url, &tl::parse(&body, tl::ParserOptions::default())?) sites::coto::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
} }
"www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await, "www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await,
s => bail!("Unknown host {}", s), s => bail!("Unknown host {}", s),
@ -308,7 +307,7 @@ struct Auto {
telegram: Option<AutoTelegram>, telegram: Option<AutoTelegram>,
} }
impl Auto { impl Auto {
async fn download_supermercado(self: Self, supermercado: Supermercado) -> anyhow::Result<()> { async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
{ {
let t0 = now_sec(); let t0 = now_sec();
self.get_and_save_urls(&supermercado).await?; self.get_and_save_urls(&supermercado).await?;
@ -360,7 +359,7 @@ impl Auto {
Ok(()) Ok(())
} }
async fn get_and_save_urls(self: &Self, supermercado: &Supermercado) -> anyhow::Result<()> { async fn get_and_save_urls(&self, supermercado: &Supermercado) -> anyhow::Result<()> {
let urls = get_urls(supermercado).await?; let urls = get_urls(supermercado).await?;
self.pool self.pool
.get() .get()
@ -386,7 +385,7 @@ impl Auto {
Ok(()) Ok(())
} }
async fn inform(self: &Self, msg: &str) { async fn inform(&self, msg: &str) {
println!("{}", msg); println!("{}", msg);
if let Some(telegram) = &self.telegram { if let Some(telegram) = &self.telegram {
let u = Url::parse_with_params( let u = Url::parse_with_params(

View file

@ -118,25 +118,6 @@ pub fn parse_urls_from_sitemap(sitemap: &str) -> anyhow::Result<Vec<String>> {
.try_collect() .try_collect()
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_url() -> anyhow::Result<()> {
let links = parse_urls_from_sitemap(
r#"
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url>
<loc>https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g&#x200B;-684952/p</loc>
<lastmod>2024-01-12T10:41:25.962Z</lastmod>
</url>"#,
)?;
assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p");
Ok(())
}
}
pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<String>> { pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<String>> {
let mut total: Vec<String> = vec![]; let mut total: Vec<String> = vec![];
let client = build_client(); let client = build_client();
@ -146,7 +127,6 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<St
let url = url.to_string(); let url = url.to_string();
async move { async move {
let client = client; let client = client;
let url = url;
let text = get_retry_policy() let text = get_retry_policy()
.retry_if(|| do_request(&client, &url), retry_if_wasnt_not_found) .retry_if(|| do_request(&client, &url), retry_if_wasnt_not_found)
.await? .await?
@ -165,3 +145,22 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<St
} }
Ok(total.into_iter().unique().collect()) Ok(total.into_iter().unique().collect())
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decode_url() -> anyhow::Result<()> {
let links = parse_urls_from_sitemap(
r#"
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url>
<loc>https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g&#x200B;-684952/p</loc>
<lastmod>2024-01-12T10:41:25.962Z</lastmod>
</url>"#,
)?;
assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p");
Ok(())
}
}