diff --git a/scraper-rs/src/main.rs b/scraper-rs/src/main.rs index 415dabd..411be05 100644 --- a/scraper-rs/src/main.rs +++ b/scraper-rs/src/main.rs @@ -178,13 +178,17 @@ pub fn get_retry_policy() -> again::RetryPolicy { .with_jitter(true) } +pub fn retry_if_wasnt_not_found(err: &reqwest::Error) -> bool { + !err.status().is_some_and(|s| s == StatusCode::NOT_FOUND) +} + #[tracing::instrument(skip(client))] async fn fetch_and_parse( client: &reqwest::Client, url: String, ) -> Result { let body = get_retry_policy() - .retry(|| do_request(client, &url)) + .retry_if(|| do_request(client, &url), retry_if_wasnt_not_found) .await? .text() .await diff --git a/scraper-rs/src/sites/coto.rs b/scraper-rs/src/sites/coto.rs index f6a9e04..c3f19d8 100644 --- a/scraper-rs/src/sites/coto.rs +++ b/scraper-rs/src/sites/coto.rs @@ -3,7 +3,7 @@ use futures::{stream, StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use reqwest::Url; -use crate::{build_client, do_request, get_retry_policy, PrecioPoint}; +use crate::{build_client, do_request, get_retry_policy, retry_if_wasnt_not_found, PrecioPoint}; pub fn parse(url: String, dom: &tl::VDom) -> Result { let ean = dom @@ -90,7 +90,10 @@ pub async fn get_urls() -> anyhow::Result> { let client = &client; async move { let text = get_retry_policy() - .retry(|| do_request(client, u.as_str()).and_then(|r| r.text())) + .retry_if( + || do_request(client, u.as_str()).and_then(|r| r.text()), + retry_if_wasnt_not_found, + ) .await?; let dom = tl::parse(&text, tl::ParserOptions::default())?; diff --git a/scraper-rs/src/sites/vtex.rs b/scraper-rs/src/sites/vtex.rs index fc50304..7c9becc 100644 --- a/scraper-rs/src/sites/vtex.rs +++ b/scraper-rs/src/sites/vtex.rs @@ -5,7 +5,7 @@ use serde::Deserialize; use simple_error::SimpleError; use tl::VDom; -use crate::{build_client, do_request, get_retry_policy}; +use crate::{build_client, do_request, get_retry_policy, retry_if_wasnt_not_found}; use super::common; @@ -128,7 +128,7 @@ pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result