mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-25 19:16:19 +00:00
coto: decodear html entities
This commit is contained in:
parent
856dfcb1a4
commit
f7bc0a9db8
3 changed files with 19 additions and 1 deletions
16
scraper-rs/Cargo.lock
generated
16
scraper-rs/Cargo.lock
generated
|
@ -604,6 +604,15 @@ version = "0.3.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
|
||||
|
||||
[[package]]
|
||||
name = "html-escape"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
|
||||
dependencies = [
|
||||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.11"
|
||||
|
@ -1229,6 +1238,7 @@ dependencies = [
|
|||
"deadpool",
|
||||
"deadpool-sqlite",
|
||||
"futures",
|
||||
"html-escape",
|
||||
"itertools",
|
||||
"nanoid",
|
||||
"quick-xml",
|
||||
|
@ -1614,6 +1624,12 @@ dependencies = [
|
|||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.1"
|
||||
|
|
|
@ -14,6 +14,7 @@ cron = "0.12.0"
|
|||
deadpool = "0.10.0"
|
||||
deadpool-sqlite = "0.7.0"
|
||||
futures = "0.3.30"
|
||||
html-escape = "0.2.13"
|
||||
itertools = "0.12.0"
|
||||
nanoid = "0.4.0"
|
||||
quick-xml = "0.31.0"
|
||||
|
|
|
@ -53,7 +53,8 @@ pub fn parse(url: String, dom: &tl::VDom) -> Result<PrecioPoint, anyhow::Error>
|
|||
.filter_map(|h| h.get(dom.parser()))
|
||||
.find_map(|n| n.as_tag())
|
||||
.map(|t| t.inner_text(dom.parser()))
|
||||
.map(|s| s.trim().to_string());
|
||||
// https://github.com/catdevnull/preciazo/issues/24
|
||||
.map(|s| html_escape::decode_html_entities(s.trim()).to_string());
|
||||
|
||||
let image_url = dom
|
||||
.query_selector(".zoomImage1")
|
||||
|
|
Loading…
Reference in a new issue