mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 03:26:19 +00:00
wtf
This commit is contained in:
parent
0144a56158
commit
75b2297a07
5 changed files with 21 additions and 14 deletions
|
@ -1,7 +1,7 @@
|
|||
use again::RetryPolicy;
|
||||
use async_channel::Receiver;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use futures::{stream, StreamExt, TryStreamExt};
|
||||
use futures::future;
|
||||
use nanoid::nanoid;
|
||||
use r2d2::Pool;
|
||||
use r2d2_sqlite::SqliteConnectionManager;
|
||||
|
@ -129,7 +129,8 @@ async fn worker(rx: Receiver<String>, pool: Pool<SqliteConnectionManager>) -> Co
|
|||
|
||||
let mut counters = Counters::default();
|
||||
while let Ok(url) = rx.recv().await {
|
||||
let res = fetch_and_parse(&client, url.clone()).await;
|
||||
let client = &client;
|
||||
let res = fetch_and_parse(client, url.clone()).await;
|
||||
match res {
|
||||
Ok(res) => {
|
||||
counters.success += 1;
|
||||
|
@ -274,13 +275,14 @@ async fn scrap_url(
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct Auto {
|
||||
pool: Pool<SqliteConnectionManager>,
|
||||
telegram_token: String,
|
||||
telegram_chat_id: String,
|
||||
}
|
||||
impl Auto {
|
||||
async fn download_supermercado(self: &Self, supermercado: Supermercado) -> anyhow::Result<()> {
|
||||
async fn download_supermercado(self: Self, supermercado: Supermercado) -> anyhow::Result<()> {
|
||||
{
|
||||
let t0 = now_sec();
|
||||
self.get_and_save_urls(&supermercado).await?;
|
||||
|
@ -357,11 +359,11 @@ async fn auto_cli() -> anyhow::Result<()> {
|
|||
telegram_chat_id: env::var("TELEGRAM_BOT_CHAT_ID")?,
|
||||
};
|
||||
auto.inform("[auto] Empezando scrap").await;
|
||||
stream::iter(Supermercado::value_variants().iter())
|
||||
.map(|s| auto.download_supermercado(s.to_owned()))
|
||||
.buffer_unordered(64)
|
||||
.try_collect()
|
||||
.await?;
|
||||
let handles: Vec<_> = Supermercado::value_variants()
|
||||
.iter()
|
||||
.map(|s| tokio::spawn(auto.clone().download_supermercado(s.to_owned())))
|
||||
.collect();
|
||||
future::try_join_all(handles).await?;
|
||||
Ok(())
|
||||
}
|
||||
async fn cron_cli() -> anyhow::Result<()> {
|
||||
|
|
|
@ -80,5 +80,5 @@ pub async fn get_urls() -> anyhow::Result<Vec<String>> {
|
|||
"https://www.carrefour.com.ar/sitemap/product-8.xml",
|
||||
"https://www.carrefour.com.ar/sitemap/product-9.xml",
|
||||
];
|
||||
vtex::get_urls_from_sitemap(&urls).await
|
||||
vtex::get_urls_from_sitemap(urls).await
|
||||
}
|
||||
|
|
|
@ -49,5 +49,5 @@ pub async fn get_urls() -> anyhow::Result<Vec<String>> {
|
|||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-4.xml",
|
||||
"https://diaonline.supermercadosdia.com.ar/sitemap/product-5.xml",
|
||||
];
|
||||
vtex::get_urls_from_sitemap(&urls).await
|
||||
vtex::get_urls_from_sitemap(urls).await
|
||||
}
|
||||
|
|
|
@ -110,5 +110,5 @@ pub async fn get_urls() -> anyhow::Result<Vec<String>> {
|
|||
"https://www.jumbo.com.ar/sitemap/product-8.xml",
|
||||
"https://www.jumbo.com.ar/sitemap/product-9.xml",
|
||||
];
|
||||
vtex::get_urls_from_sitemap(&urls).await
|
||||
vtex::get_urls_from_sitemap(urls).await
|
||||
}
|
||||
|
|
|
@ -117,21 +117,26 @@ pub fn parse_urls_from_sitemap(sitemap: &str) -> anyhow::Result<Vec<String>> {
|
|||
.collect())
|
||||
}
|
||||
|
||||
pub async fn get_urls_from_sitemap<'a>(sitemaps: &[&str]) -> anyhow::Result<Vec<String>> {
|
||||
pub async fn get_urls_from_sitemap(sitemaps: Vec<&str>) -> anyhow::Result<Vec<String>> {
|
||||
let mut total: Vec<String> = vec![];
|
||||
let client = build_client();
|
||||
let handles = stream::iter(sitemaps)
|
||||
.map(|url| {
|
||||
let client = &client;
|
||||
let client = client.clone();
|
||||
let url = url.to_string();
|
||||
async move {
|
||||
let client = client;
|
||||
let url = url;
|
||||
let text = get_retry_policy()
|
||||
.retry(|| do_request(client, url))
|
||||
.retry(|| do_request(&client, &url))
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
parse_urls_from_sitemap(&text)
|
||||
}
|
||||
})
|
||||
// https://github.com/rust-lang/rust/issues/89976#issuecomment-1073115246
|
||||
.boxed()
|
||||
.buffer_unordered(8)
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
|
|
Loading…
Reference in a new issue