From 17c7e5701aca446058c8f1f0bf5c267727093b8a Mon Sep 17 00:00:00 2001 From: Nulo Date: Sat, 6 Jul 2024 08:46:04 -0300 Subject: [PATCH] ignore forbidden errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit some Carrefour pages return forbidden for a few products 2024-07-06T11:32:47.059230Z ERROR scraper::scraper: error=HTTP status client error (403 Forbidden) for url (https://www.carrefour.com.ar/apple-macbook-pro-14--m1-pro-chip-10%E2%80%91core-cpu-16%E2%80%91core-gpu---1tb-ssd---silver--1656560/p) url="https://www.carrefour.com.ar/apple-macbook-pro-14--m1-pro-chip-10‑core-cpu-16‑core-gpu---1tb-ssd---silver--1656560/p" 2024-07-06T11:33:07.881310Z ERROR scraper::scraper: error=HTTP status client error (403 Forbidden) for url (https://www.carrefour.com.ar/apple-macbook-pro-14--m1-pro-chip-10%E2%80%91core-cpu-16%E2%80%91core-gpu---1tb-ssd---space-grey-1657067/p) url="https://www.carrefour.com.ar/apple-macbook-pro-14--m1-pro-chip-10‑core-cpu-16‑core-gpu---1tb-ssd---space-grey-1657067/p" 2024-07-06T11:33:56.483980Z ERROR scraper::scraper: error=HTTP status client error (403 Forbidden) for url (https://www.carrefour.com.ar/apple-macbook-pro-16--m1-pro-chip-10%E2%80%91core-cpu-16%E2%80%91core-gpu---1tb-ssd---silver--1656993/p) url="https://www.carrefour.com.ar/apple-macbook-pro-16--m1-pro-chip-10‑core-cpu-16‑core-gpu---1tb-ssd---silver--1656993/p" 2024-07-06T11:34:16.992339Z ERROR scraper::scraper: error=HTTP status client error (403 Forbidden) for url (https://www.carrefour.com.ar/apple-macbook-pro-16--m1-pro-chip-10%E2%80%91core-cpu-16%E2%80%91core-gpu---1tb-ssd---space-grey-1656986/p) url="https://www.carrefour.com.ar/apple-macbook-pro-16--m1-pro-chip-10‑core-cpu-16‑core-gpu---1tb-ssd---space-grey-1656986/p" 2024-07-06T11:41:00.997947Z ERROR scraper::scraper: error=HTTP status client error (403 Forbidden) for url (https://www.carrefour.com.ar/leche-de-coco-coco-do-vale-reducida-en-calor%E2%88%9A-as-500-cc/p) url="https://www.carrefour.com.ar/leche-de-coco-coco-do-vale-reducida-en-calor√-as-500-cc/p" --- rust/src/scraper/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/src/scraper/main.rs b/rust/src/scraper/main.rs index 2979e4c..bb29ca4 100644 --- a/rust/src/scraper/main.rs +++ b/rust/src/scraper/main.rs @@ -163,7 +163,8 @@ pub fn get_parse_retry_policy() -> again::RetryPolicy { } pub fn retry_if_wasnt_not_found(err: &reqwest::Error) -> bool { - !err.status().is_some_and(|s| s == StatusCode::NOT_FOUND) + !err.status() + .is_some_and(|s| s == StatusCode::NOT_FOUND || s == StatusCode::FORBIDDEN) } pub fn anyhow_retry_if_wasnt_not_found(err: &anyhow::Error) -> bool { match err.downcast_ref::() {