mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 03:26:19 +00:00
coto: decodear html entities
This commit is contained in:
parent
856dfcb1a4
commit
f7bc0a9db8
3 changed files with 19 additions and 1 deletions
16
scraper-rs/Cargo.lock
generated
16
scraper-rs/Cargo.lock
generated
|
@ -604,6 +604,15 @@ version = "0.3.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
|
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html-escape"
|
||||||
|
version = "0.2.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
|
||||||
|
dependencies = [
|
||||||
|
"utf8-width",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "0.2.11"
|
version = "0.2.11"
|
||||||
|
@ -1229,6 +1238,7 @@ dependencies = [
|
||||||
"deadpool",
|
"deadpool",
|
||||||
"deadpool-sqlite",
|
"deadpool-sqlite",
|
||||||
"futures",
|
"futures",
|
||||||
|
"html-escape",
|
||||||
"itertools",
|
"itertools",
|
||||||
"nanoid",
|
"nanoid",
|
||||||
"quick-xml",
|
"quick-xml",
|
||||||
|
@ -1614,6 +1624,12 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8-width"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8parse"
|
name = "utf8parse"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
|
|
@ -14,6 +14,7 @@ cron = "0.12.0"
|
||||||
deadpool = "0.10.0"
|
deadpool = "0.10.0"
|
||||||
deadpool-sqlite = "0.7.0"
|
deadpool-sqlite = "0.7.0"
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
|
html-escape = "0.2.13"
|
||||||
itertools = "0.12.0"
|
itertools = "0.12.0"
|
||||||
nanoid = "0.4.0"
|
nanoid = "0.4.0"
|
||||||
quick-xml = "0.31.0"
|
quick-xml = "0.31.0"
|
||||||
|
|
|
@ -53,7 +53,8 @@ pub fn parse(url: String, dom: &tl::VDom) -> Result<PrecioPoint, anyhow::Error>
|
||||||
.filter_map(|h| h.get(dom.parser()))
|
.filter_map(|h| h.get(dom.parser()))
|
||||||
.find_map(|n| n.as_tag())
|
.find_map(|n| n.as_tag())
|
||||||
.map(|t| t.inner_text(dom.parser()))
|
.map(|t| t.inner_text(dom.parser()))
|
||||||
.map(|s| s.trim().to_string());
|
// https://github.com/catdevnull/preciazo/issues/24
|
||||||
|
.map(|s| html_escape::decode_html_entities(s.trim()).to_string());
|
||||||
|
|
||||||
let image_url = dom
|
let image_url = dom
|
||||||
.query_selector(".zoomImage1")
|
.query_selector(".zoomImage1")
|
||||||
|
|
Loading…
Reference in a new issue