From c0c50662843d8b1879713af947e9065c39931899 Mon Sep 17 00:00:00 2001 From: Nulo Date: Sun, 23 Jun 2024 13:53:04 -0300 Subject: [PATCH] fix: print errors en best selling + refactor --- scraper-rs/src/best_selling.rs | 44 ++++++++++++++++++++-------------- scraper-rs/src/sites/vtex.rs | 31 ++++++++++++++++-------- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/scraper-rs/src/best_selling.rs b/scraper-rs/src/best_selling.rs index 23f5cd9..420d087 100644 --- a/scraper-rs/src/best_selling.rs +++ b/scraper-rs/src/best_selling.rs @@ -3,8 +3,9 @@ use std::collections::HashMap; use crate::{build_client, db::Db, sites::vtex, supermercado::Supermercado}; use chrono::{DateTime, Utc}; use clap::ValueEnum; -use futures::{stream, FutureExt, StreamExt, TryStreamExt}; +use futures::{stream, FutureExt, StreamExt}; use itertools::Itertools; +use simple_error::SimpleError; use tracing::warn; #[derive(ValueEnum, Clone, Debug)] @@ -77,10 +78,6 @@ async fn try_get_best_selling_eans( } } -async fn noop(t: T) -> anyhow::Result { - Ok(t) -} - fn rank_eans(eans: Vec>) -> Vec { let mut map: HashMap = HashMap::new(); for eans in eans { @@ -98,34 +95,45 @@ fn rank_eans(eans: Vec>) -> Vec { pub async fn get_all_best_selling(db: &Db) -> anyhow::Result> { let client = &build_client(); - - stream::iter(Category::value_variants()) + let records = stream::iter(Category::value_variants()) .map(|category| { stream::iter(Supermercado::value_variants()) .map(|supermercado| { - let db = db.clone(); - let client = client.clone(); tokio::spawn(try_get_best_selling_eans( - client, - db, + client.clone(), + db.clone(), supermercado, category, )) }) .buffer_unordered(5) .map(|f| f.unwrap()) - .try_filter_map(noop) - .try_collect::>>() + .filter_map(|r| async { + match r { + Err(err) => { + tracing::error!("Error getting best selling: {}", err); + None + } + Ok(v) => v, + } + }) + .collect::>>() .map(|r| { - r.map(rank_eans).map(|eans| BestSellingRecord { + let ranked = rank_eans(r); + BestSellingRecord { fetched_at: Utc::now(), category: category.clone(), - eans, - }) + eans: ranked, + } }) }) .buffer_unordered(5) .boxed() - .try_collect() - .await + .collect::>() + .await; + if records.len() < 10 { + Err(SimpleError::new("Too few BestSellingRecords").into()) + } else { + Ok(records) + } } diff --git a/scraper-rs/src/sites/vtex.rs b/scraper-rs/src/sites/vtex.rs index 7204151..edb36cc 100644 --- a/scraper-rs/src/sites/vtex.rs +++ b/scraper-rs/src/sites/vtex.rs @@ -225,19 +225,30 @@ pub async fn get_best_selling_by_category( url }; let body = fetch_body(client, url.as_str()).await?; - let urls: Vec = serde_json::from_str::(&body)? + tracing::debug!("best selling body: {}", body); + let json = &serde_json::from_str::(&body)?; + if let Some(errors_array) = json.pointer("/errors") { + if let Some(error_messages) = errors_array.as_array().map(|a| { + a.into_iter() + .map(|obj| obj.get("message").and_then(|v| v.as_str())) + .collect_vec() + }) { + bail!("Errors from API: {:?}", error_messages); + } else { + bail!("Unknown error from API") + } + } + let urls: Vec = json .pointer("/data/productSearch/products") .and_then(|v| v.as_array()) - .map(|a| { - a.iter() - .filter_map(|p| { - p.get("link") - .and_then(|v| v.as_str()) - .map(|s| format!("https://{}{}", domain, s)) - }) - .collect() + .ok_or(SimpleError::new("failed to get best selling product urls"))? + .iter() + .filter_map(|p| { + p.get("link") + .and_then(|v| v.as_str()) + .map(|s| format!("https://{}{}", domain, s)) }) - .ok_or(SimpleError::new("failed to get best selling product urls"))?; + .collect(); if urls.len() < 2 { bail!("Too few best selling");