mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 11:36:20 +00:00
scraper-rs: simplificar y parsear json ld
This commit is contained in:
parent
348d054b7b
commit
27aee01c1a
7 changed files with 218 additions and 216 deletions
7
scraper-rs/Cargo.lock
generated
7
scraper-rs/Cargo.lock
generated
|
@ -61,6 +61,12 @@ version = "0.2.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.79"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-channel"
|
name = "async-channel"
|
||||||
version = "2.1.1"
|
version = "2.1.1"
|
||||||
|
@ -1016,6 +1022,7 @@ name = "scraper-rs"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"again",
|
"again",
|
||||||
|
"anyhow",
|
||||||
"async-channel",
|
"async-channel",
|
||||||
"nanoid",
|
"nanoid",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
|
|
|
@ -7,6 +7,7 @@ edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
again = "0.1.2"
|
again = "0.1.2"
|
||||||
|
anyhow = "1.0.79"
|
||||||
async-channel = "2.1.1"
|
async-channel = "2.1.1"
|
||||||
nanoid = "0.4.0"
|
nanoid = "0.4.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
|
|
|
@ -1,105 +1,15 @@
|
||||||
use again::RetryPolicy;
|
use again::RetryPolicy;
|
||||||
use async_channel::{Receiver, Sender};
|
use async_channel::{Receiver, Sender};
|
||||||
use nanoid::nanoid;
|
use nanoid::nanoid;
|
||||||
use rand::seq::SliceRandom;
|
|
||||||
use reqwest::Url;
|
|
||||||
use rusqlite::Connection;
|
use rusqlite::Connection;
|
||||||
use simple_error::{bail, SimpleError};
|
use simple_error::{bail, SimpleError};
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
|
||||||
env::{self, args},
|
env::{self, args},
|
||||||
fs,
|
fs,
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
time::Duration,
|
||||||
};
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tl::VDom;
|
|
||||||
use tokio::io::{stderr, AsyncWriteExt};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct PrecioPoint {
|
|
||||||
ean: String,
|
|
||||||
// unix
|
|
||||||
fetched_at: u64,
|
|
||||||
precio_centavos: Option<u64>,
|
|
||||||
in_stock: Option<bool>,
|
|
||||||
url: String,
|
|
||||||
parser_version: u16,
|
|
||||||
name: Option<String>,
|
|
||||||
image_url: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// fn main() {
|
|
||||||
// let arg = args().skip(1).next().unwrap();
|
|
||||||
|
|
||||||
// let file_iter = fs::read_dir(arg)
|
|
||||||
// .unwrap()
|
|
||||||
// .filter(|pr| {
|
|
||||||
// if let Ok(p) = pr {
|
|
||||||
// !p.file_name().to_str().unwrap().ends_with(".link")
|
|
||||||
// } else {
|
|
||||||
// false
|
|
||||||
// }
|
|
||||||
// })
|
|
||||||
// .take(1000)
|
|
||||||
// .map(|f| fs::read(f.unwrap().path()).unwrap());
|
|
||||||
|
|
||||||
// let mut i = 0;
|
|
||||||
// for item in file_iter {
|
|
||||||
// i = i + 1;
|
|
||||||
// {
|
|
||||||
// // let mut text: Option<String> = None;
|
|
||||||
// // let mut price_str: Option<String> = None;
|
|
||||||
// // let mut rewriter = HtmlRewriter::new(
|
|
||||||
// // Settings {
|
|
||||||
// // element_content_handlers: vec![
|
|
||||||
// // // Rewrite insecure hyperlinks
|
|
||||||
// // element!("a[href]", |el| {
|
|
||||||
// // let href = el.get_attribute("href").unwrap().replace("http:", "https:");
|
|
||||||
|
|
||||||
// // el.set_attribute("href", &href).unwrap();
|
|
||||||
|
|
||||||
// // Ok(())
|
|
||||||
// // }),
|
|
||||||
// // (
|
|
||||||
// // Cow::Owned("a".parse().unwrap()),
|
|
||||||
// // ElementContentHandlers::default().text(extract_first_text(&mut text)),
|
|
||||||
// // ),
|
|
||||||
// // element!(
|
|
||||||
// // "meta[property=\"product:price:amount\"]",
|
|
||||||
// // extract_first_attr(&mut price_str, "content")
|
|
||||||
// // ),
|
|
||||||
// // ],
|
|
||||||
// // memory_settings: lol_html::MemorySettings {
|
|
||||||
// // preallocated_parsing_buffer_size: 1024 * 16,
|
|
||||||
// // max_allowed_memory_usage: std::usize::MAX,
|
|
||||||
// // },
|
|
||||||
// // ..Settings::default()
|
|
||||||
// // },
|
|
||||||
// // |_: &[u8]| {},
|
|
||||||
// // );
|
|
||||||
|
|
||||||
// // rewriter.write(&item).unwrap();
|
|
||||||
// // rewriter.end().unwrap();
|
|
||||||
// // println!("{:#?}", price_str);
|
|
||||||
|
|
||||||
// // let html = scraper::Html::parse_document(&String::from_utf8(item).unwrap());
|
|
||||||
|
|
||||||
// let html = String::from_utf8(item).unwrap();
|
|
||||||
// let dom = tl::parse(&html, tl::ParserOptions::default()).unwrap();
|
|
||||||
|
|
||||||
// match parse_carrefour("".into(), &dom) {
|
|
||||||
// Ok(point) => {
|
|
||||||
// // println!("{:?}", point);
|
|
||||||
// }
|
|
||||||
// Err(err) => {
|
|
||||||
// // println!("Error {:#?}: {}", err, html);
|
|
||||||
// }
|
|
||||||
// };
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// println!("n={}", i);
|
|
||||||
// }
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
@ -174,7 +84,10 @@ enum FetchError {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip(client))]
|
#[tracing::instrument(skip(client))]
|
||||||
async fn fetch_and_parse(client: &reqwest::Client, url: String) -> Result<PrecioPoint, FetchError> {
|
async fn fetch_and_parse(
|
||||||
|
client: &reqwest::Client,
|
||||||
|
url: String,
|
||||||
|
) -> Result<PrecioPoint, anyhow::Error> {
|
||||||
let policy = RetryPolicy::exponential(Duration::from_millis(300))
|
let policy = RetryPolicy::exponential(Duration::from_millis(300))
|
||||||
.with_max_retries(10)
|
.with_max_retries(10)
|
||||||
.with_jitter(true);
|
.with_jitter(true);
|
||||||
|
@ -187,13 +100,13 @@ async fn fetch_and_parse(client: &reqwest::Client, url: String) -> Result<Precio
|
||||||
.await
|
.await
|
||||||
.map_err(FetchError::Http)?;
|
.map_err(FetchError::Http)?;
|
||||||
if !response.status().is_success() {
|
if !response.status().is_success() {
|
||||||
return Err(FetchError::HttpStatus(response.status()));
|
bail!(FetchError::HttpStatus(response.status()));
|
||||||
}
|
}
|
||||||
let body = response.text().await.map_err(FetchError::Http)?;
|
let body = response.text().await.map_err(FetchError::Http)?;
|
||||||
|
|
||||||
let maybe_point = {
|
let maybe_point = {
|
||||||
let dom = tl::parse(&body, tl::ParserOptions::default()).map_err(FetchError::Tl)?;
|
let dom = tl::parse(&body, tl::ParserOptions::default()).map_err(FetchError::Tl)?;
|
||||||
parse_carrefour(url, &dom)
|
sites::carrefour::parse(url, &dom)
|
||||||
};
|
};
|
||||||
|
|
||||||
let point = match maybe_point {
|
let point = match maybe_point {
|
||||||
|
@ -211,120 +124,32 @@ async fn fetch_and_parse(client: &reqwest::Client, url: String) -> Result<Precio
|
||||||
Ok(point)
|
Ok(point)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_carrefour(url: String, dom: &tl::VDom) -> Result<PrecioPoint, SimpleError> {
|
async fn db_writer(rx: Receiver<PrecioPoint>) {
|
||||||
let precio_centavos = {
|
// let conn = Connection::open("../scraper/sqlite.db").unwrap();
|
||||||
get_meta_content(dom, "product:price:amount")?
|
// let mut stmt = conn.prepare("SELECT id, name, data FROM person")?;
|
||||||
.map(|s| {
|
let mut n = 0;
|
||||||
s.parse::<f64>()
|
while let Ok(res) = rx.recv().await {
|
||||||
.map_err(|_| SimpleError::new("Failed to parse number"))
|
n += 1;
|
||||||
})
|
println!("{}", n);
|
||||||
.transpose()
|
println!("{:?}", res)
|
||||||
.map(|f| f.map(|f| (f * 100.0) as u64))
|
|
||||||
}?;
|
|
||||||
|
|
||||||
let in_stock_meta = get_meta_content(dom, "product:availability")?.map(|s| s.into_owned());
|
|
||||||
let in_stock = match in_stock_meta {
|
|
||||||
Some(s) => match s.as_ref() {
|
|
||||||
"oos" => Some(false),
|
|
||||||
"instock" => Some(true),
|
|
||||||
_ => return Err(SimpleError::new("Not a valid product:availability")),
|
|
||||||
},
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let ean = {
|
|
||||||
let json = &parse_script_json(dom, "__STATE__")?;
|
|
||||||
let state = json
|
|
||||||
.as_object()
|
|
||||||
.ok_or(SimpleError::new("Seed state not an object"))?;
|
|
||||||
if state.is_empty() {
|
|
||||||
bail!("Seed state is an empty object")
|
|
||||||
}
|
|
||||||
let (_, product_json) = state
|
|
||||||
.into_iter()
|
|
||||||
.find(|(key, val)| {
|
|
||||||
key.starts_with("Product:")
|
|
||||||
&& val
|
|
||||||
.as_object()
|
|
||||||
.and_then(|val| val.get("__typename"))
|
|
||||||
.map_or(false, |typename| typename == "Product")
|
|
||||||
})
|
|
||||||
.ok_or(SimpleError::new("No product in seed state"))?;
|
|
||||||
let cache_id = product_json
|
|
||||||
.get("cacheId")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.ok_or(SimpleError::new("No cacheId in seed state"))?;
|
|
||||||
let (_, product_sku_json) = state
|
|
||||||
.iter()
|
|
||||||
.find(|(key, val)| {
|
|
||||||
key.starts_with(&format!("Product:{}", cache_id))
|
|
||||||
&& val.as_object().map_or(false, |obj| {
|
|
||||||
obj.get("__typename")
|
|
||||||
.map_or(false, |typename| typename == "SKU")
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.ok_or(SimpleError::new("No Product:cacheId* found"))?;
|
|
||||||
product_sku_json
|
|
||||||
.get("ean")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.ok_or(SimpleError::new("No product SKU in seed state"))?
|
|
||||||
.to_string()
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(PrecioPoint {
|
|
||||||
ean,
|
|
||||||
fetched_at: now_sec(),
|
|
||||||
in_stock,
|
|
||||||
name: None,
|
|
||||||
image_url: None,
|
|
||||||
parser_version: 5,
|
|
||||||
precio_centavos,
|
|
||||||
url,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_meta_content<'a>(
|
|
||||||
dom: &'a VDom<'a>,
|
|
||||||
prop: &str,
|
|
||||||
) -> Result<Option<Cow<'a, str>>, SimpleError> {
|
|
||||||
let tag = &dom
|
|
||||||
.query_selector(&format!("meta[property=\"{}\"]", prop))
|
|
||||||
.and_then(|mut iter| iter.next())
|
|
||||||
.and_then(|h| h.get(dom.parser()))
|
|
||||||
.and_then(|n| n.as_tag());
|
|
||||||
match tag {
|
|
||||||
Some(tag) => Ok(Some(
|
|
||||||
tag.attributes()
|
|
||||||
.get("content")
|
|
||||||
.flatten()
|
|
||||||
.ok_or(SimpleError::new("Failed to get content attr"))?
|
|
||||||
.as_utf8_str(),
|
|
||||||
)),
|
|
||||||
None => Ok(None),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_script_json(dom: &VDom, varname: &str) -> Result<serde_json::Value, SimpleError> {
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
let parser = dom.parser();
|
|
||||||
let inner_html = &dom
|
mod sites;
|
||||||
.query_selector(&format!(
|
|
||||||
"template[data-type=\"json\"][data-varname=\"{}\"]",
|
#[derive(Debug)]
|
||||||
varname
|
struct PrecioPoint {
|
||||||
))
|
ean: String,
|
||||||
.and_then(|mut iter| iter.next())
|
// unix
|
||||||
.and_then(|h| h.get(parser))
|
fetched_at: u64,
|
||||||
.and_then(|n| n.as_tag())
|
precio_centavos: Option<u64>,
|
||||||
.and_then(|t| {
|
in_stock: Option<bool>,
|
||||||
t.children()
|
url: String,
|
||||||
.all(parser)
|
parser_version: u16,
|
||||||
.iter()
|
name: Option<String>,
|
||||||
.find(|n| n.as_tag().is_some())
|
image_url: Option<String>,
|
||||||
})
|
|
||||||
.ok_or(SimpleError::new("Failed to get script tag"))?
|
|
||||||
.inner_html(parser);
|
|
||||||
inner_html
|
|
||||||
.parse()
|
|
||||||
.map_err(|_| SimpleError::new("Couldn't parse JSON in script"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn now_sec() -> u64 {
|
fn now_sec() -> u64 {
|
||||||
|
@ -334,14 +159,3 @@ fn now_sec() -> u64 {
|
||||||
.expect("Time went backwards");
|
.expect("Time went backwards");
|
||||||
since_the_epoch.as_secs()
|
since_the_epoch.as_secs()
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn db_writer(rx: Receiver<PrecioPoint>) {
|
|
||||||
// let conn = Connection::open("../scraper/sqlite.db").unwrap();
|
|
||||||
// let mut stmt = conn.prepare("SELECT id, name, data FROM person")?;
|
|
||||||
let mut n = 0;
|
|
||||||
while let Ok(res) = rx.recv().await {
|
|
||||||
n += 1;
|
|
||||||
println!("{}", n);
|
|
||||||
// println!("{:?}", res)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
77
scraper-rs/src/sites/carrefour.rs
Normal file
77
scraper-rs/src/sites/carrefour.rs
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
use simple_error::bail;
|
||||||
|
use simple_error::SimpleError;
|
||||||
|
|
||||||
|
use crate::sites::common;
|
||||||
|
use crate::sites::vtex;
|
||||||
|
use crate::PrecioPoint;
|
||||||
|
|
||||||
|
use super::vtex::find_product_ld;
|
||||||
|
|
||||||
|
pub fn parse(url: String, dom: &tl::VDom) -> Result<PrecioPoint, anyhow::Error> {
|
||||||
|
let precio_centavos = common::get_meta_content(dom, "product:price:amount")
|
||||||
|
.map(|s| s.parse::<f64>().map(|f| (f * 100.0) as u64))
|
||||||
|
.transpose()?;
|
||||||
|
|
||||||
|
let in_stock = match common::get_meta_content(dom, "product:availability") {
|
||||||
|
Some(s) => match s.as_ref() {
|
||||||
|
"oos" => false,
|
||||||
|
"instock" => true,
|
||||||
|
_ => bail!("Not a valid product:availability"),
|
||||||
|
},
|
||||||
|
None => bail!("No product:availability in carrefour"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let ean = {
|
||||||
|
let json = &vtex::parse_script_json(dom, "__STATE__")?;
|
||||||
|
let state = json
|
||||||
|
.as_object()
|
||||||
|
.ok_or(SimpleError::new("Seed state not an object"))?;
|
||||||
|
if state.is_empty() {
|
||||||
|
bail!("Seed state is an empty object")
|
||||||
|
}
|
||||||
|
let (_, product_json) = state
|
||||||
|
.iter()
|
||||||
|
.find(|(key, val)| {
|
||||||
|
key.starts_with("Product:") && val.get("__typename").is_some_and(|t| t == "Product")
|
||||||
|
})
|
||||||
|
.ok_or(SimpleError::new("No product in seed state"))?;
|
||||||
|
let cache_id = product_json
|
||||||
|
.get("cacheId")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or(SimpleError::new("No cacheId in seed state"))?;
|
||||||
|
let (_, product_sku_json) = state
|
||||||
|
.iter()
|
||||||
|
.find(|(key, val)| {
|
||||||
|
key.starts_with(&format!("Product:{}", cache_id))
|
||||||
|
&& val.get("__typename").is_some_and(|t| t == "SKU")
|
||||||
|
})
|
||||||
|
.ok_or(SimpleError::new("No Product:cacheId* found"))?;
|
||||||
|
product_sku_json
|
||||||
|
.get("ean")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.ok_or(SimpleError::new("No product SKU in seed state"))?
|
||||||
|
.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let (name, image_url) = match find_product_ld(dom) {
|
||||||
|
Some(pm) => {
|
||||||
|
let p = pm?;
|
||||||
|
(Some(p.name), Some(p.image))
|
||||||
|
}
|
||||||
|
None => match in_stock {
|
||||||
|
true => bail!("No JSONLD product in in stock product"),
|
||||||
|
false => (None, None),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(PrecioPoint {
|
||||||
|
ean,
|
||||||
|
fetched_at: crate::now_sec(),
|
||||||
|
in_stock: Some(in_stock),
|
||||||
|
name,
|
||||||
|
image_url,
|
||||||
|
parser_version: 5,
|
||||||
|
precio_centavos,
|
||||||
|
url,
|
||||||
|
})
|
||||||
|
}
|
12
scraper-rs/src/sites/common.rs
Normal file
12
scraper-rs/src/sites/common.rs
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use tl::VDom;
|
||||||
|
|
||||||
|
pub fn get_meta_content<'a>(dom: &'a VDom<'a>, prop: &str) -> Option<Cow<'a, str>> {
|
||||||
|
dom.query_selector(&format!("meta[property=\"{}\"]", prop))
|
||||||
|
.and_then(|mut iter| iter.next())
|
||||||
|
.and_then(|h| h.get(dom.parser()))
|
||||||
|
.and_then(|n| n.as_tag())
|
||||||
|
.and_then(|tag| tag.attributes().get("content").flatten())
|
||||||
|
.map(|s| s.as_utf8_str())
|
||||||
|
}
|
3
scraper-rs/src/sites/mod.rs
Normal file
3
scraper-rs/src/sites/mod.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
pub mod carrefour;
|
||||||
|
mod common;
|
||||||
|
mod vtex;
|
88
scraper-rs/src/sites/vtex.rs
Normal file
88
scraper-rs/src/sites/vtex.rs
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
use anyhow::Context;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use simple_error::SimpleError;
|
||||||
|
use tl::VDom;
|
||||||
|
|
||||||
|
pub fn parse_script_json(dom: &VDom, varname: &str) -> Result<serde_json::Value, anyhow::Error> {
|
||||||
|
let inner_html = &dom
|
||||||
|
.query_selector("template[data-type=\"json\"]")
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(|h| h.get(dom.parser()).and_then(|n| n.as_tag()))
|
||||||
|
.find(|t| {
|
||||||
|
t.attributes()
|
||||||
|
.get("data-varname")
|
||||||
|
.flatten()
|
||||||
|
.map_or(false, |v| v.as_utf8_str() == varname)
|
||||||
|
})
|
||||||
|
.ok_or(SimpleError::new("Failed to get template tag"))?
|
||||||
|
.query_selector(dom.parser(), "script")
|
||||||
|
.and_then(|mut it| it.next())
|
||||||
|
.and_then(|h| h.get(dom.parser()))
|
||||||
|
.ok_or(SimpleError::new("Failed to get script tag"))?
|
||||||
|
.inner_html(dom.parser());
|
||||||
|
inner_html.parse().context("Couldn't parse JSON in script")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_json_lds<'a>(
|
||||||
|
dom: &'a VDom,
|
||||||
|
) -> impl Iterator<Item = std::result::Result<serde_json::Value, serde_json::Error>> + 'a {
|
||||||
|
dom.query_selector("script[type=\"application/ld+json\"]")
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(|h| h.get(dom.parser()))
|
||||||
|
.filter_map(|n| n.as_tag())
|
||||||
|
.map(|t| serde_json::from_str(&t.inner_html(dom.parser())))
|
||||||
|
}
|
||||||
|
#[tracing::instrument]
|
||||||
|
pub fn find_json_ld(dom: &VDom, typ: &str) -> Option<Result<Ld, serde_json::Error>> {
|
||||||
|
get_json_lds(dom)
|
||||||
|
.filter_map(|v| v.ok())
|
||||||
|
.find(|v| v.get("@type").is_some_and(|t| t == typ))
|
||||||
|
.map(serde_json::from_value)
|
||||||
|
}
|
||||||
|
pub fn find_product_ld(dom: &VDom) -> Option<Result<ProductLd, serde_json::Error>> {
|
||||||
|
find_json_ld(dom, "Product").map(|l| {
|
||||||
|
l.map(|l| match l {
|
||||||
|
Ld::Product(p) => p,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(tag = "@type")]
|
||||||
|
pub enum Ld {
|
||||||
|
Product(ProductLd),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ProductLd {
|
||||||
|
pub name: String,
|
||||||
|
pub image: String,
|
||||||
|
pub sku: Option<String>,
|
||||||
|
pub offers: OffersLd,
|
||||||
|
}
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct OffersLd {
|
||||||
|
pub offers: Vec<OfferLd>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct OfferLd {
|
||||||
|
#[serde(rename = "@type")]
|
||||||
|
_type: OfferTypeLd,
|
||||||
|
pub price: f64,
|
||||||
|
pub price_currency: String,
|
||||||
|
pub availability: AvailabilityLd,
|
||||||
|
}
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub enum OfferTypeLd {
|
||||||
|
Offer,
|
||||||
|
}
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub enum AvailabilityLd {
|
||||||
|
#[serde(rename = "http://schema.org/InStock")]
|
||||||
|
InStock,
|
||||||
|
#[serde(rename = "http://schema.org/OutOfStock")]
|
||||||
|
OutOfStock,
|
||||||
|
}
|
Loading…
Reference in a new issue