mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-29 13:06:19 +00:00
seguir al comandante clippy
This commit is contained in:
parent
cce34571f1
commit
c687ea1484
2 changed files with 27 additions and 29 deletions
|
@ -23,7 +23,7 @@ enum Supermercado {
|
||||||
Coto,
|
Coto,
|
||||||
}
|
}
|
||||||
impl Supermercado {
|
impl Supermercado {
|
||||||
fn host(self: &Self) -> &'static str {
|
fn host(&self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
Self::Dia => "diaonline.supermercadosdia.com.ar",
|
Self::Dia => "diaonline.supermercadosdia.com.ar",
|
||||||
Self::Carrefour => "www.carrefour.com.ar",
|
Self::Carrefour => "www.carrefour.com.ar",
|
||||||
|
@ -128,8 +128,7 @@ async fn fetch_list(pool: &Pool, links: Vec<String>) -> Counters {
|
||||||
fn connect_db() -> Pool {
|
fn connect_db() -> Pool {
|
||||||
let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string());
|
let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string());
|
||||||
let cfg = deadpool_sqlite::Config::new(db_path);
|
let cfg = deadpool_sqlite::Config::new(db_path);
|
||||||
let pool = cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap();
|
cfg.create_pool(deadpool_sqlite::Runtime::Tokio1).unwrap()
|
||||||
pool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_client() -> reqwest::Client {
|
fn build_client() -> reqwest::Client {
|
||||||
|
@ -283,13 +282,13 @@ async fn scrap_url(
|
||||||
let url_p = Url::parse(&url).unwrap();
|
let url_p = Url::parse(&url).unwrap();
|
||||||
match url_p.host_str().unwrap() {
|
match url_p.host_str().unwrap() {
|
||||||
"www.carrefour.com.ar" => {
|
"www.carrefour.com.ar" => {
|
||||||
sites::carrefour::parse(url, &tl::parse(&body, tl::ParserOptions::default())?)
|
sites::carrefour::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
|
||||||
}
|
}
|
||||||
"diaonline.supermercadosdia.com.ar" => {
|
"diaonline.supermercadosdia.com.ar" => {
|
||||||
sites::dia::parse(url, &tl::parse(&body, tl::ParserOptions::default())?)
|
sites::dia::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
|
||||||
}
|
}
|
||||||
"www.cotodigital3.com.ar" => {
|
"www.cotodigital3.com.ar" => {
|
||||||
sites::coto::parse(url, &tl::parse(&body, tl::ParserOptions::default())?)
|
sites::coto::parse(url, &tl::parse(body, tl::ParserOptions::default())?)
|
||||||
}
|
}
|
||||||
"www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await,
|
"www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await,
|
||||||
s => bail!("Unknown host {}", s),
|
s => bail!("Unknown host {}", s),
|
||||||
|
@ -308,7 +307,7 @@ struct Auto {
|
||||||
telegram: Option<AutoTelegram>,
|
telegram: Option<AutoTelegram>,
|
||||||
}
|
}
|
||||||
impl Auto {
|
impl Auto {
|
||||||
async fn download_supermercado(self: Self, supermercado: Supermercado) -> anyhow::Result<()> {
|
async fn download_supermercado(self, supermercado: Supermercado) -> anyhow::Result<()> {
|
||||||
{
|
{
|
||||||
let t0 = now_sec();
|
let t0 = now_sec();
|
||||||
self.get_and_save_urls(&supermercado).await?;
|
self.get_and_save_urls(&supermercado).await?;
|
||||||
|
@ -360,7 +359,7 @@ impl Auto {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_and_save_urls(self: &Self, supermercado: &Supermercado) -> anyhow::Result<()> {
|
async fn get_and_save_urls(&self, supermercado: &Supermercado) -> anyhow::Result<()> {
|
||||||
let urls = get_urls(supermercado).await?;
|
let urls = get_urls(supermercado).await?;
|
||||||
self.pool
|
self.pool
|
||||||
.get()
|
.get()
|
||||||
|
@ -386,7 +385,7 @@ impl Auto {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn inform(self: &Self, msg: &str) {
|
async fn inform(&self, msg: &str) {
|
||||||
println!("{}", msg);
|
println!("{}", msg);
|
||||||
if let Some(telegram) = &self.telegram {
|
if let Some(telegram) = &self.telegram {
|
||||||
let u = Url::parse_with_params(
|
let u = Url::parse_with_params(
|
||||||
|
|
|
@ -118,25 +118,6 @@ pub fn parse_urls_from_sitemap(sitemap: &str) -> anyhow::Result<Vec<String>> {
|
||||||
.try_collect()
|
.try_collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_decode_url() -> anyhow::Result<()> {
|
|
||||||
let links = parse_urls_from_sitemap(
|
|
||||||
r#"
|
|
||||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
|
||||||
<url>
|
|
||||||
<loc>https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g​-684952/p</loc>
|
|
||||||
<lastmod>2024-01-12T10:41:25.962Z</lastmod>
|
|
||||||
</url>"#,
|
|
||||||
)?;
|
|
||||||
assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p");
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<String>> {
|
pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<String>> {
|
||||||
let mut total: Vec<String> = vec![];
|
let mut total: Vec<String> = vec![];
|
||||||
let client = build_client();
|
let client = build_client();
|
||||||
|
@ -146,7 +127,6 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<St
|
||||||
let url = url.to_string();
|
let url = url.to_string();
|
||||||
async move {
|
async move {
|
||||||
let client = client;
|
let client = client;
|
||||||
let url = url;
|
|
||||||
let text = get_retry_policy()
|
let text = get_retry_policy()
|
||||||
.retry_if(|| do_request(&client, &url), retry_if_wasnt_not_found)
|
.retry_if(|| do_request(&client, &url), retry_if_wasnt_not_found)
|
||||||
.await?
|
.await?
|
||||||
|
@ -165,3 +145,22 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<St
|
||||||
}
|
}
|
||||||
Ok(total.into_iter().unique().collect())
|
Ok(total.into_iter().unique().collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_decode_url() -> anyhow::Result<()> {
|
||||||
|
let links = parse_urls_from_sitemap(
|
||||||
|
r#"
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||||
|
<url>
|
||||||
|
<loc>https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g​-684952/p</loc>
|
||||||
|
<lastmod>2024-01-12T10:41:25.962Z</lastmod>
|
||||||
|
</url>"#,
|
||||||
|
)?;
|
||||||
|
assert_eq!(links[0], "https://www.carrefour.com.ar/postre-danette-mousse-dulce-de-leche-80-g\u{200b}-684952/p");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue