From 1118bcf75d4f596392d466c0a69f73b95eaaab5f Mon Sep 17 00:00:00 2001 From: Nulo Date: Tue, 6 Feb 2024 23:20:04 -0300 Subject: [PATCH] bringup farmacity --- db-datos/supermercado.ts | 6 +++ scraper-rs/src/main.rs | 4 ++ scraper-rs/src/sites/farmacity.rs | 50 +++++++++++++++++++++++++ scraper-rs/src/sites/mod.rs | 1 + scraper-rs/src/supermercado.rs | 2 + sitio/src/routes/ean/[ean]/+page.svelte | 1 + 6 files changed, 64 insertions(+) create mode 100644 scraper-rs/src/sites/farmacity.rs diff --git a/db-datos/supermercado.ts b/db-datos/supermercado.ts index ec4c8c3..992e1bf 100644 --- a/db-datos/supermercado.ts +++ b/db-datos/supermercado.ts @@ -3,25 +3,31 @@ export enum Supermercado { Carrefour = "Carrefour", Coto = "Coto", Jumbo = "Jumbo", + Farmacity = "Farmacity", } export const supermercados: Supermercado[] = [ Supermercado.Carrefour, Supermercado.Coto, Supermercado.Dia, Supermercado.Jumbo, + Supermercado.Farmacity, ]; export const hosts: { [host: string]: Supermercado } = { "diaonline.supermercadosdia.com.ar": Supermercado.Dia, "www.carrefour.com.ar": Supermercado.Carrefour, "www.cotodigital3.com.ar": Supermercado.Coto, "www.jumbo.com.ar": Supermercado.Jumbo, + "www.farmacity.com": Supermercado.Farmacity, }; export const hostBySupermercado = Object.fromEntries( Object.entries(hosts).map(([a, b]) => [b, a]) ) as Record; + +// tambiƩn actualizar en sitio/src/routes/ean/[ean]/+page.svelte export const colorBySupermercado: { [supermercado in Supermercado]: string } = { [Supermercado.Dia]: "#d52b1e", [Supermercado.Carrefour]: "#19549d", [Supermercado.Coto]: "#e20025", [Supermercado.Jumbo]: "#2dc850", + [Supermercado.Farmacity]: "#EF7603", }; diff --git a/scraper-rs/src/main.rs b/scraper-rs/src/main.rs index b5d75b9..0f9efbf 100644 --- a/scraper-rs/src/main.rs +++ b/scraper-rs/src/main.rs @@ -285,6 +285,7 @@ async fn get_urls(supermercado: &Supermercado) -> Result, anyhow::Er Supermercado::Jumbo => sites::jumbo::get_urls().await?, Supermercado::Carrefour => sites::carrefour::get_urls().await?, Supermercado::Coto => sites::coto::get_urls().await?, + Supermercado::Farmacity => sites::farmacity::get_urls().await?, }) } @@ -305,6 +306,9 @@ async fn scrap_url( sites::coto::parse(url, &tl::parse(body, tl::ParserOptions::default())?) } "www.jumbo.com.ar" => sites::jumbo::scrap(client, url, body).await, + "www.farmacity.com" => { + sites::farmacity::parse(url, &tl::parse(body, tl::ParserOptions::default())?) + } s => bail!("Unknown host {}", s), } } diff --git a/scraper-rs/src/sites/farmacity.rs b/scraper-rs/src/sites/farmacity.rs new file mode 100644 index 0000000..5e9cc11 --- /dev/null +++ b/scraper-rs/src/sites/farmacity.rs @@ -0,0 +1,50 @@ +use anyhow::Context; +use simple_error::bail; + +use crate::sites::common; +use crate::PrecioPoint; + +use super::vtex; +use super::vtex::find_product_ld; +use super::vtex::AvailabilityLd; + +pub fn parse(url: String, dom: &tl::VDom) -> Result { + let ean = common::get_meta_content(dom, "product:retailer_item_id") + .context("Parsing EAN")? + .to_string(); + let precio_centavos = common::price_from_meta(dom)?; + + let (name, image_url, in_stock) = match find_product_ld(dom) { + Some(pm) => { + let p = pm?; + ( + Some(p.name), + Some(p.image), + Some( + p.offers.offers.first().context("No offer")?.availability + == AvailabilityLd::InStock, + ), + ) + } + None => bail!("No JSON/LD"), + }; + + Ok(PrecioPoint { + ean, + fetched_at: crate::now_sec(), + in_stock, + name, + image_url, + parser_version: 5, + precio_centavos, + url, + }) +} + +pub async fn get_urls() -> anyhow::Result> { + let urls = vec![ + "https://www.farmacity.com/sitemap/product-0.xml", + "https://www.farmacity.com/sitemap/product-1.xml", + ]; + vtex::get_urls_from_sitemap(urls).await +} diff --git a/scraper-rs/src/sites/mod.rs b/scraper-rs/src/sites/mod.rs index e305f94..d96e38e 100644 --- a/scraper-rs/src/sites/mod.rs +++ b/scraper-rs/src/sites/mod.rs @@ -2,5 +2,6 @@ pub mod carrefour; mod common; pub mod coto; pub mod dia; +pub mod farmacity; pub mod jumbo; pub mod vtex; diff --git a/scraper-rs/src/supermercado.rs b/scraper-rs/src/supermercado.rs index d7cdbc0..19bbecb 100644 --- a/scraper-rs/src/supermercado.rs +++ b/scraper-rs/src/supermercado.rs @@ -6,6 +6,7 @@ pub enum Supermercado { Jumbo, Carrefour, Coto, + Farmacity, } impl Supermercado { pub fn host(&self) -> &'static str { @@ -14,6 +15,7 @@ impl Supermercado { Self::Carrefour => "www.carrefour.com.ar", Self::Coto => "www.cotodigital3.com.ar", Self::Jumbo => "www.jumbo.com.ar", + Self::Farmacity => "www.farmacity.com", } } } diff --git a/sitio/src/routes/ean/[ean]/+page.svelte b/sitio/src/routes/ean/[ean]/+page.svelte index d3ce771..78cc6a8 100644 --- a/sitio/src/routes/ean/[ean]/+page.svelte +++ b/sitio/src/routes/ean/[ean]/+page.svelte @@ -18,6 +18,7 @@ [Supermercado.Carrefour]: "bg-[#19549d] focus:ring-[#19549d]", [Supermercado.Coto]: "bg-[#e20025] focus:ring-[#e20025]", [Supermercado.Jumbo]: "bg-[#2dc850] focus:ring-[#2dc850]", + [Supermercado.Farmacity]: "bg-[#EF7603] focus:ring-[#EF7603]", };