mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 22:26:19 +00:00
xoxo
This commit is contained in:
parent
405502877c
commit
3cf723cc3d
3 changed files with 617 additions and 341 deletions
684
warcificator/Cargo.lock
generated
684
warcificator/Cargo.lock
generated
|
@ -18,10 +18,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "adler32"
|
||||
version = "1.2.0"
|
||||
name = "ahash"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||
checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
|
@ -39,25 +46,10 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "android-tzdata"
|
||||
version = "0.1.1"
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
|
@ -119,6 +111,12 @@ version = "1.3.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.4.0"
|
||||
|
@ -146,6 +144,12 @@ version = "3.14.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.5.0"
|
||||
|
@ -167,20 +171,6 @@ version = "1.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
|
||||
dependencies = [
|
||||
"android-tzdata",
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "2.4.0"
|
||||
|
@ -224,6 +214,61 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser"
|
||||
version = "0.31.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
|
||||
dependencies = [
|
||||
"cssparser-macros",
|
||||
"dtoa-short",
|
||||
"itoa",
|
||||
"phf 0.11.2",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cssparser-macros"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_more"
|
||||
version = "0.99.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa-short"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74"
|
||||
dependencies = [
|
||||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ego-tree"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.33"
|
||||
|
@ -260,6 +305,18 @@ dependencies = [
|
|||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.28"
|
||||
|
@ -285,6 +342,16 @@ dependencies = [
|
|||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||
dependencies = [
|
||||
"mac",
|
||||
"new_debug_unreachable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.30"
|
||||
|
@ -324,6 +391,24 @@ dependencies = [
|
|||
"pin-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.11"
|
||||
|
@ -365,6 +450,19 @@ name = "hashbrown"
|
|||
version = "0.14.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
|
@ -372,6 +470,20 @@ version = "0.3.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.11"
|
||||
|
@ -444,29 +556,6 @@ dependencies = [
|
|||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539"
|
||||
dependencies = [
|
||||
"android_system_properties",
|
||||
"core-foundation-sys",
|
||||
"iana-time-zone-haiku",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
"windows-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone-haiku"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.5.0"
|
||||
|
@ -508,19 +597,6 @@ dependencies = [
|
|||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lexical-core"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"ryu",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.151"
|
||||
|
@ -528,23 +604,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
|
||||
|
||||
[[package]]
|
||||
name = "libflate"
|
||||
version = "1.4.0"
|
||||
name = "libsqlite3-sys"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18"
|
||||
checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716"
|
||||
dependencies = [
|
||||
"adler32",
|
||||
"crc32fast",
|
||||
"libflate_lz77",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libflate_lz77"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a52d3a8bfc85f250440e4424db7d857e241a3aebbbe301f3eb606ab15c39acbf"
|
||||
dependencies = [
|
||||
"rle-decode-fast",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -563,6 +629,26 @@ version = "0.4.20"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "mac"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf 0.10.1",
|
||||
"phf_codegen",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.1"
|
||||
|
@ -596,24 +682,10 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.3"
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b"
|
||||
dependencies = [
|
||||
"lexical-core",
|
||||
"memchr",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
|
@ -666,7 +738,7 @@ dependencies = [
|
|||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -675,6 +747,86 @@ version = "2.3.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
|
||||
dependencies = [
|
||||
"phf_macros",
|
||||
"phf_shared 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||
dependencies = [
|
||||
"phf_shared 0.10.0",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
|
||||
dependencies = [
|
||||
"phf_shared 0.11.2",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_macros"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
|
||||
dependencies = [
|
||||
"phf_generator 0.11.2",
|
||||
"phf_shared 0.11.2",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.13"
|
||||
|
@ -687,6 +839,24 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "precomputed-hash"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.71"
|
||||
|
@ -705,13 +875,43 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -771,10 +971,18 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "rle-decode-fast"
|
||||
version = "1.0.3"
|
||||
name = "rusqlite"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
|
||||
checksum = "a78046161564f5e7cd9008aff3b2990b3850dc8e0349119b98e8f251e099f24d"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"fallible-iterator",
|
||||
"fallible-streaming-iterator",
|
||||
"hashlink",
|
||||
"libsqlite3-sys",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
|
@ -825,6 +1033,22 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scraper"
|
||||
version = "0.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "585480e3719b311b78a573db1c9d9c4c1f8010c2dee4cc59c2efe58ea4dbc3e1"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"cssparser",
|
||||
"ego-tree",
|
||||
"getopts",
|
||||
"html5ever",
|
||||
"once_cell",
|
||||
"selectors",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.7.1"
|
||||
|
@ -835,6 +1059,25 @@ dependencies = [
|
|||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "selectors"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log",
|
||||
"new_debug_unreachable",
|
||||
"phf 0.10.1",
|
||||
"phf_codegen",
|
||||
"precomputed-hash",
|
||||
"servo_arc",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.193"
|
||||
|
@ -852,14 +1095,14 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.108"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
|
||||
checksum = "cb0652c533506ad7a2e353cce269330d6afd8bdfb6d75e0ace5b35aacbd7b9e9"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -878,6 +1121,15 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "servo_arc"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.1"
|
||||
|
@ -887,6 +1139,12 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.9"
|
||||
|
@ -919,10 +1177,47 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "string_cache"
|
||||
version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
|
||||
dependencies = [
|
||||
"new_debug_unreachable",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"phf_shared 0.10.0",
|
||||
"precomputed-hash",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "string_cache_codegen"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
|
||||
dependencies = [
|
||||
"phf_generator 0.10.0",
|
||||
"phf_shared 0.10.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
|
@ -941,7 +1236,7 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation",
|
||||
"system-configuration-sys",
|
||||
]
|
||||
|
@ -956,6 +1251,17 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tendril"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||
dependencies = [
|
||||
"futf",
|
||||
"mac",
|
||||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinyvec"
|
||||
version = "1.6.0"
|
||||
|
@ -998,7 +1304,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1077,6 +1383,12 @@ dependencies = [
|
|||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
|
@ -1095,13 +1407,16 @@ dependencies = [
|
|||
]
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "0.8.2"
|
||||
name = "utf-8"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
|
@ -1118,28 +1433,17 @@ dependencies = [
|
|||
"try-lock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "warc"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e44585619c04f9a4bcfdfbbbefff9e7c4e277a693424162b30c88aaf3abd785e"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"libflate",
|
||||
"nom",
|
||||
"url",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "warcificator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"http",
|
||||
"reqwest",
|
||||
"rusqlite",
|
||||
"scraper",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"warc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1169,7 +1473,7 @@ dependencies = [
|
|||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.43",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
|
@ -1203,7 +1507,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.43",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -1230,22 +1534,13 @@ version = "0.25.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1254,28 +1549,13 @@ version = "0.48.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.48.5",
|
||||
"windows_aarch64_msvc 0.48.5",
|
||||
"windows_i686_gnu 0.48.5",
|
||||
"windows_i686_msvc 0.48.5",
|
||||
"windows_x86_64_gnu 0.48.5",
|
||||
"windows_x86_64_gnullvm 0.48.5",
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.52.0",
|
||||
"windows_aarch64_msvc 0.52.0",
|
||||
"windows_i686_gnu 0.52.0",
|
||||
"windows_i686_msvc 0.52.0",
|
||||
"windows_x86_64_gnu 0.52.0",
|
||||
"windows_x86_64_gnullvm 0.52.0",
|
||||
"windows_x86_64_msvc 0.52.0",
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1284,84 +1564,42 @@ version = "0.48.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.50.0"
|
||||
|
@ -1371,3 +1609,23 @@ dependencies = [
|
|||
"cfg-if",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
|
|
@ -7,11 +7,13 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
async-channel = "2.1.1"
|
||||
http = "0.2.11"
|
||||
reqwest = { version = "0.11.23", default-features = false, features = [
|
||||
"rustls-tls",
|
||||
"gzip",
|
||||
"brotli",
|
||||
] }
|
||||
rusqlite = "0.30.0"
|
||||
scraper = "0.18.1"
|
||||
serde = { version = "1.0.193", features = ["derive"] }
|
||||
serde_json = "1.0.109"
|
||||
tokio = { version = "1.35.1", features = ["full"] }
|
||||
warc = "0.3.1"
|
||||
|
|
|
@ -1,24 +1,30 @@
|
|||
use async_channel::{Receiver, Sender};
|
||||
use rusqlite::Connection;
|
||||
use scraper::{Element, Html, Selector};
|
||||
use std::{
|
||||
env::args,
|
||||
fs,
|
||||
net::SocketAddr,
|
||||
process::{Command, Stdio},
|
||||
time::{SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use tokio::io::{stderr, AsyncWriteExt};
|
||||
use warc::{RecordBuilder, WarcHeader, WarcWriter};
|
||||
|
||||
struct FullExchange {
|
||||
socket_addr: Option<SocketAddr>,
|
||||
request: http::Request<&'static str>,
|
||||
response: http::Response<Vec<u8>>,
|
||||
#[derive(Debug)]
|
||||
struct PrecioPoint {
|
||||
ean: String,
|
||||
// unix
|
||||
fetched_at: u64,
|
||||
precio_centavos: Option<u64>,
|
||||
in_stock: Option<bool>,
|
||||
url: String,
|
||||
parser_version: u16,
|
||||
name: Option<String>,
|
||||
image_url: Option<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut args = args().skip(1);
|
||||
let links_list_path = args.next().unwrap();
|
||||
let output_zstd_path = args.next().unwrap();
|
||||
let links_str = fs::read_to_string(links_list_path).unwrap();
|
||||
let links = links_str
|
||||
.split("\n")
|
||||
|
@ -29,7 +35,7 @@ async fn main() {
|
|||
|
||||
let handle = {
|
||||
let (sender, receiver) = async_channel::bounded::<String>(1);
|
||||
let (res_sender, res_receiver) = async_channel::unbounded::<FullExchange>();
|
||||
let (res_sender, res_receiver) = async_channel::unbounded::<PrecioPoint>();
|
||||
|
||||
let mut handles = Vec::new();
|
||||
for _ in 1..16 {
|
||||
|
@ -38,7 +44,7 @@ async fn main() {
|
|||
handles.push(tokio::spawn(worker(rx, tx)));
|
||||
}
|
||||
|
||||
let warc_writer_handle = tokio::spawn(warc_writer(res_receiver, output_zstd_path));
|
||||
let db_writer_handle = tokio::spawn(db_writer(res_receiver));
|
||||
|
||||
for link in links {
|
||||
sender.send_blocking(link).unwrap();
|
||||
|
@ -49,22 +55,22 @@ async fn main() {
|
|||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
warc_writer_handle
|
||||
db_writer_handle
|
||||
};
|
||||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
async fn worker(rx: Receiver<String>, tx: Sender<FullExchange>) {
|
||||
async fn worker(rx: Receiver<String>, tx: Sender<PrecioPoint>) {
|
||||
let client = reqwest::ClientBuilder::default().build().unwrap();
|
||||
while let Ok(url) = rx.recv().await {
|
||||
let res = fetch(&client, url.clone()).await;
|
||||
let res = fetch_and_parse(&client, url.clone()).await;
|
||||
match res {
|
||||
Ok(ex) => {
|
||||
tx.send(ex).await.unwrap();
|
||||
}
|
||||
Err(err) => {
|
||||
stderr()
|
||||
.write_all(format!("Failed to fetch {}: {:#?}", url.as_str(), err).as_bytes())
|
||||
.write_all(format!("Failed to fetch {}: {:#?}\n", url.as_str(), err).as_bytes())
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -72,128 +78,138 @@ async fn worker(rx: Receiver<String>, tx: Sender<FullExchange>) {
|
|||
}
|
||||
}
|
||||
|
||||
async fn fetch(client: &reqwest::Client, url: String) -> Result<FullExchange, reqwest::Error> {
|
||||
let request = client.get(url).build().unwrap();
|
||||
let mut http_request_builder = http::Request::builder()
|
||||
.method(request.method())
|
||||
.uri(request.url().as_str());
|
||||
for (key, val) in request.headers() {
|
||||
http_request_builder = http_request_builder.header(key, val);
|
||||
}
|
||||
let response = client.execute(request).await?;
|
||||
#[derive(Debug)]
|
||||
enum FetchError {
|
||||
HttpError(reqwest::Error),
|
||||
NoPriceMetaEl,
|
||||
NoMetaContent,
|
||||
NotANumber,
|
||||
NoStockMetaEl,
|
||||
NoValidStockMeta,
|
||||
NoSeedState,
|
||||
NoProductInSeedState,
|
||||
NoProductSkuInSeedState,
|
||||
}
|
||||
|
||||
let ip_address = response.remote_addr();
|
||||
async fn fetch_and_parse(client: &reqwest::Client, url: String) -> Result<PrecioPoint, FetchError> {
|
||||
let request = client.get(url.as_str()).build().unwrap();
|
||||
let response = client
|
||||
.execute(request)
|
||||
.await
|
||||
.map_err(|e| FetchError::HttpError(e))?;
|
||||
let body = response
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| FetchError::HttpError(e))?;
|
||||
|
||||
let http_request = {
|
||||
http_request_builder
|
||||
.version(response.version())
|
||||
.body("")
|
||||
.unwrap()
|
||||
let html = Html::parse_document(&body);
|
||||
|
||||
let point = parse_carrefour(url, html)?;
|
||||
|
||||
Ok(point)
|
||||
}
|
||||
fn parse_carrefour(url: String, html: Html) -> Result<PrecioPoint, FetchError> {
|
||||
let meta_price_sel = Selector::parse("meta[property=\"product:price:amount\"]").unwrap();
|
||||
let precio_centavos = match html.select(&meta_price_sel).next() {
|
||||
Some(el) => match el.attr("content") {
|
||||
Some(attr) => match attr.parse::<f64>() {
|
||||
Ok(f) => Ok((f * 100.0) as u64),
|
||||
Err(_) => Err(FetchError::NotANumber),
|
||||
},
|
||||
None => Err(FetchError::NoMetaContent),
|
||||
},
|
||||
None => Err(FetchError::NoPriceMetaEl),
|
||||
}?;
|
||||
|
||||
let meta_stock_el = Selector::parse("meta[property=\"product:availability\"]").unwrap();
|
||||
let in_stock = match html.select(&meta_stock_el).next() {
|
||||
Some(el) => match el.attr("content") {
|
||||
Some(attr) => match attr {
|
||||
"oos" => Ok(Some(false)),
|
||||
"instock" => Ok(Some(true)),
|
||||
_ => Err(FetchError::NoValidStockMeta),
|
||||
},
|
||||
None => Err(FetchError::NoMetaContent),
|
||||
},
|
||||
None => Err(FetchError::NoStockMetaEl),
|
||||
}?;
|
||||
|
||||
let ean = {
|
||||
let state = parse_script_json(&html, "__STATE__").ok_or(FetchError::NoSeedState)?;
|
||||
let seed_state = &state.as_object().ok_or(FetchError::NoSeedState)?;
|
||||
let (_, product_json) = seed_state
|
||||
.into_iter()
|
||||
.find(|(key, val)| {
|
||||
key.starts_with("Product:")
|
||||
&& val.as_object().map_or(false, |val| {
|
||||
val.get("__typename")
|
||||
.map_or(false, |typename| typename == "Product")
|
||||
})
|
||||
})
|
||||
.ok_or(FetchError::NoProductInSeedState)?;
|
||||
let cache_id = product_json
|
||||
.get("cacheId")
|
||||
.ok_or(FetchError::NoProductInSeedState)?;
|
||||
let (_, product_sku_json) = seed_state
|
||||
.into_iter()
|
||||
.filter_map(|(key, val)| val.as_object().map_or(None, |o| Some((key, o))))
|
||||
.find(|(key, val)| {
|
||||
key.starts_with(&format!("Product:{}", cache_id))
|
||||
&& val
|
||||
.get("__typename")
|
||||
.map_or(false, |typename| typename == "SKU")
|
||||
})
|
||||
.ok_or(FetchError::NoProductSkuInSeedState)?;
|
||||
product_sku_json
|
||||
.get("ean")
|
||||
.ok_or(FetchError::NoProductSkuInSeedState)?
|
||||
.as_str()
|
||||
.ok_or(FetchError::NoProductSkuInSeedState)?
|
||||
.to_string()
|
||||
};
|
||||
|
||||
let http_response = {
|
||||
let mut http_response_builder = http::Response::<()>::builder()
|
||||
.status(response.status())
|
||||
.version(response.version());
|
||||
for (key, val) in response.headers() {
|
||||
http_response_builder = http_response_builder.header(key, val);
|
||||
}
|
||||
let body = response.bytes().await?;
|
||||
http_response_builder.body(body.to_vec()).unwrap()
|
||||
};
|
||||
|
||||
Ok(FullExchange {
|
||||
socket_addr: ip_address,
|
||||
request: http_request,
|
||||
response: http_response,
|
||||
Ok(PrecioPoint {
|
||||
ean: ean,
|
||||
fetched_at: now_sec(),
|
||||
in_stock: in_stock,
|
||||
name: None,
|
||||
image_url: None,
|
||||
parser_version: 5,
|
||||
precio_centavos: Some(precio_centavos),
|
||||
url: url,
|
||||
})
|
||||
}
|
||||
|
||||
async fn warc_writer(rx: Receiver<FullExchange>, output_zstd_path: String) {
|
||||
let zstd_proc = Command::new("zstd")
|
||||
.args(&["-T0", "-15", "--long", "-o", &output_zstd_path])
|
||||
.stdin(Stdio::piped())
|
||||
.stderr(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
let mut writer = WarcWriter::new(zstd_proc.stdin.unwrap());
|
||||
writer
|
||||
.write(
|
||||
&RecordBuilder::default()
|
||||
.version("1.0".to_owned())
|
||||
.warc_type(warc::RecordType::WarcInfo)
|
||||
.header(WarcHeader::ContentType, "application/warc-fields")
|
||||
.body(format!("software: preciazo-warcificator/0.0.0\nformat: WARC file version 1.0\nconformsTo: http://www.archive.org/documents/WarcFileFormat-1.0.html").into())
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
while let Ok(res) = rx.recv().await {
|
||||
let uri = res.request.uri().to_string();
|
||||
let req_record = {
|
||||
let mut builder = RecordBuilder::default()
|
||||
.version("1.0".to_owned())
|
||||
.warc_type(warc::RecordType::Request)
|
||||
.header(WarcHeader::TargetURI, uri.clone())
|
||||
.header(WarcHeader::ContentType, "application/http;msgtype=request")
|
||||
.header(
|
||||
WarcHeader::Unknown("X-Warcificator-Lying".to_string()),
|
||||
"the request contains other headers not included here",
|
||||
);
|
||||
if let Some(addr) = res.socket_addr {
|
||||
builder = builder.header(WarcHeader::IPAddress, addr.ip().to_string());
|
||||
}
|
||||
builder
|
||||
.body(format_http11_request(res.request).into_bytes())
|
||||
.build()
|
||||
.unwrap()
|
||||
};
|
||||
writer.write(&req_record).unwrap();
|
||||
writer
|
||||
.write(&{
|
||||
let mut builder = RecordBuilder::default()
|
||||
.version("1.0".to_owned())
|
||||
.warc_type(warc::RecordType::Response)
|
||||
.header(WarcHeader::TargetURI, uri)
|
||||
.header(WarcHeader::ConcurrentTo, req_record.warc_id())
|
||||
.header(WarcHeader::ContentType, "application/http;msgtype=response");
|
||||
if let Some(addr) = res.socket_addr {
|
||||
builder = builder.header(WarcHeader::IPAddress, addr.ip().to_string());
|
||||
}
|
||||
builder
|
||||
.body(format_http11_response(res.response))
|
||||
.build()
|
||||
.unwrap()
|
||||
})
|
||||
.unwrap();
|
||||
fn parse_script_json(html: &Html, varname: &str) -> Option<serde_json::Value> {
|
||||
let template_sel = Selector::parse(&format!(
|
||||
"template[data-type=\"json\"][data-varname=\"{}\"]",
|
||||
varname
|
||||
))
|
||||
.unwrap();
|
||||
match html.select(&template_sel).next() {
|
||||
Some(value) => match value.first_element_child() {
|
||||
Some(script) => match serde_json::from_str(&script.inner_html()) {
|
||||
Ok(val) => val,
|
||||
Err(_) => None,
|
||||
},
|
||||
None => None,
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn format_http11_request(req: http::Request<&'static str>) -> String {
|
||||
let start_line = format!("{} {} HTTP/1.1", req.method().as_str(), req.uri().path());
|
||||
let headers_str = req
|
||||
.headers()
|
||||
.iter()
|
||||
.map(|(key, val)| format!("{}: {}\r\n", key, val.to_str().unwrap()))
|
||||
.collect::<String>();
|
||||
|
||||
[start_line.as_str(), headers_str.as_str(), req.body()].join("\r\n")
|
||||
fn now_sec() -> u64 {
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
since_the_epoch.as_secs()
|
||||
}
|
||||
|
||||
fn format_http11_response(res: http::Response<Vec<u8>>) -> Vec<u8> {
|
||||
let start_line = format!(
|
||||
"HTTP/1.1 {} {}",
|
||||
res.status().as_str(),
|
||||
res.status().canonical_reason().unwrap_or("")
|
||||
);
|
||||
let headers_str = res
|
||||
.headers()
|
||||
.iter()
|
||||
.map(|(key, val)| format!("{}: {}\r\n", key, val.to_str().unwrap()))
|
||||
.collect::<String>();
|
||||
|
||||
let crlf: &[u8] = &[13, 10];
|
||||
[start_line.as_bytes(), headers_str.as_bytes(), res.body()].join(crlf)
|
||||
async fn db_writer(rx: Receiver<PrecioPoint>) {
|
||||
let conn = Connection::open("../scraper/sqlite.db").unwrap();
|
||||
// let mut stmt = conn.prepare("SELECT id, name, data FROM person")?;
|
||||
while let Ok(res) = rx.recv().await {
|
||||
println!("{:#?}", res)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue