mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-10-29 23:19:56 +00:00
probemos así
This commit is contained in:
parent
75b2297a07
commit
6b315f9af2
1 changed files with 50 additions and 55 deletions
|
@ -1,7 +1,6 @@
|
||||||
use again::RetryPolicy;
|
use again::RetryPolicy;
|
||||||
use async_channel::Receiver;
|
|
||||||
use clap::{Parser, ValueEnum};
|
use clap::{Parser, ValueEnum};
|
||||||
use futures::future;
|
use futures::{future, stream, StreamExt};
|
||||||
use nanoid::nanoid;
|
use nanoid::nanoid;
|
||||||
use r2d2::Pool;
|
use r2d2::Pool;
|
||||||
use r2d2_sqlite::SqliteConnectionManager;
|
use r2d2_sqlite::SqliteConnectionManager;
|
||||||
|
@ -78,32 +77,30 @@ async fn fetch_list_cli(links_list_path: String) -> anyhow::Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_list(pool: &Pool<SqliteConnectionManager>, links: Vec<String>) -> Counters {
|
async fn fetch_list(pool: &Pool<SqliteConnectionManager>, links: Vec<String>) -> Counters {
|
||||||
let (sender, receiver) = async_channel::bounded::<String>(1);
|
|
||||||
|
|
||||||
let n_coroutines = env::var("N_COROUTINES")
|
let n_coroutines = env::var("N_COROUTINES")
|
||||||
.map_or(Ok(24), |s| s.parse::<usize>())
|
.map_or(Ok(24), |s| s.parse::<usize>())
|
||||||
.expect("N_COROUTINES no es un número");
|
.expect("N_COROUTINES no es un número");
|
||||||
let handles = (1..n_coroutines)
|
|
||||||
.map(|_| {
|
let client = build_client();
|
||||||
let rx = receiver.clone();
|
|
||||||
|
stream::iter(links)
|
||||||
|
.map(|url| {
|
||||||
let pool = pool.clone();
|
let pool = pool.clone();
|
||||||
tokio::spawn(worker(rx, pool))
|
let client = client.clone();
|
||||||
|
tokio::spawn(fetch_and_save(client, url, pool))
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
// https://github.com/rust-lang/rust/issues/89976#issuecomment-1073115246
|
||||||
|
.boxed()
|
||||||
for link in links {
|
.buffer_unordered(n_coroutines)
|
||||||
sender.send_blocking(link).unwrap();
|
.fold(Counters::default(), move |x, y| {
|
||||||
}
|
let ret = y.unwrap();
|
||||||
sender.close();
|
future::ready(Counters {
|
||||||
|
success: x.success + ret.success,
|
||||||
let mut counters = Counters::default();
|
errored: x.errored + ret.errored,
|
||||||
for handle in handles {
|
skipped: x.skipped + ret.skipped,
|
||||||
let c = handle.await.unwrap();
|
})
|
||||||
counters.success += c.success;
|
})
|
||||||
counters.errored += c.errored;
|
.await
|
||||||
counters.skipped += c.skipped;
|
|
||||||
}
|
|
||||||
counters
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn connect_db() -> Pool<SqliteConnectionManager> {
|
fn connect_db() -> Pool<SqliteConnectionManager> {
|
||||||
|
@ -124,42 +121,40 @@ struct Counters {
|
||||||
skipped: u64,
|
skipped: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn worker(rx: Receiver<String>, pool: Pool<SqliteConnectionManager>) -> Counters {
|
async fn fetch_and_save(
|
||||||
let client = build_client();
|
client: reqwest::Client,
|
||||||
|
url: String,
|
||||||
|
pool: Pool<SqliteConnectionManager>,
|
||||||
|
) -> Counters {
|
||||||
|
let res = fetch_and_parse(&client, url.clone()).await;
|
||||||
let mut counters = Counters::default();
|
let mut counters = Counters::default();
|
||||||
while let Ok(url) = rx.recv().await {
|
match res {
|
||||||
let client = &client;
|
Ok(res) => {
|
||||||
let res = fetch_and_parse(client, url.clone()).await;
|
counters.success += 1;
|
||||||
match res {
|
pool.get().unwrap().execute("INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);",rusqlite::params![
|
||||||
Ok(res) => {
|
res.ean,
|
||||||
counters.success += 1;
|
res.fetched_at,
|
||||||
pool.get().unwrap().execute("INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);",rusqlite::params![
|
res.precio_centavos,
|
||||||
res.ean,
|
res.in_stock,
|
||||||
res.fetched_at,
|
res.url,
|
||||||
res.precio_centavos,
|
None::<String>,
|
||||||
res.in_stock,
|
res.parser_version,
|
||||||
res.url,
|
res.name,
|
||||||
None::<String>,
|
res.image_url,
|
||||||
res.parser_version,
|
]).unwrap();
|
||||||
res.name,
|
}
|
||||||
res.image_url,
|
Err(err) => {
|
||||||
]).unwrap();
|
match err.downcast_ref::<FetchError>() {
|
||||||
}
|
Some(FetchError::Http(e)) => match e.status() {
|
||||||
Err(err) => {
|
Some(StatusCode::NOT_FOUND) => counters.skipped += 1,
|
||||||
match err.downcast_ref::<FetchError>() {
|
|
||||||
Some(FetchError::Http(e)) => match e.status() {
|
|
||||||
Some(StatusCode::NOT_FOUND) => counters.skipped += 1,
|
|
||||||
_ => counters.errored += 1,
|
|
||||||
},
|
|
||||||
_ => counters.errored += 1,
|
_ => counters.errored += 1,
|
||||||
}
|
},
|
||||||
|
_ => counters.errored += 1,
|
||||||
tracing::error!(error=%err, url=url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tracing::error!(error=%err, url=url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
counters
|
counters
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -370,8 +365,8 @@ async fn cron_cli() -> anyhow::Result<()> {
|
||||||
let mut interval = time::interval(std::time::Duration::from_secs(60 * 60 * 24));
|
let mut interval = time::interval(std::time::Duration::from_secs(60 * 60 * 24));
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
interval.tick().await;
|
|
||||||
auto_cli().await.unwrap();
|
auto_cli().await.unwrap();
|
||||||
|
interval.tick().await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue