From 0843a28f48b4bfd87b3811555514548350858a7e Mon Sep 17 00:00:00 2001 From: Nulo Date: Sat, 13 Jul 2024 12:43:24 -0300 Subject: [PATCH] WIP: postgres --- rust/.env | 3 +- rust/Cargo.lock | 1 + rust/Cargo.toml | 3 +- rust/build.rs | 5 ++ rust/docker-compose.yml | 11 +++ ...240712112531_precios-and-producto-urls.sql | 21 ++++++ .../20240712112948_create-best-selling.sql | 7 ++ rust/src/db.rs | 33 +++++++++ rust/src/lib.rs | 1 + rust/src/scraper/db.rs | 67 ++++++++++--------- rust/src/scraper/main.rs | 22 +++--- 11 files changed, 131 insertions(+), 43 deletions(-) create mode 100644 rust/build.rs create mode 100644 rust/docker-compose.yml create mode 100644 rust/migrations/20240712112531_precios-and-producto-urls.sql create mode 100644 rust/migrations/20240712112948_create-best-selling.sql create mode 100644 rust/src/db.rs diff --git a/rust/.env b/rust/.env index 4b6c948..09554f3 100644 --- a/rust/.env +++ b/rust/.env @@ -1 +1,2 @@ -DATABASE_URL=sqlite://../sqlite.db +# DATABASE_URL=sqlite://../sqlite.db +DATABASE_URL=postgres://preciazo:rezgVdLJik8HRX9heS9dDt5nRXyy9n@localhost:54325/preciazo \ No newline at end of file diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 2670ea3..eb60aaf 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1391,6 +1391,7 @@ dependencies = [ "chrono", "clap", "cron", + "dotenvy", "futures", "html-escape", "itertools", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 511d09a..33d8008 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -12,7 +12,7 @@ base64 = "0.21.7" chrono = "0.4" clap = { version = "4.4.15", features = ["derive"] } cron = "0.12.0" -sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono" ] } +sqlx = { version = "0.7", features = [ "runtime-tokio", "sqlite", "chrono", "postgres" ] } futures = "0.3.30" html-escape = "0.2.13" itertools = "0.12.0" @@ -36,6 +36,7 @@ tokio = { version = "1.35", features = ["full"] } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } axum = "0.7.5" +dotenvy = "0.15.7" [[bin]] name = "api" diff --git a/rust/build.rs b/rust/build.rs new file mode 100644 index 0000000..7609593 --- /dev/null +++ b/rust/build.rs @@ -0,0 +1,5 @@ +// generated by `sqlx migrate build-script` +fn main() { + // trigger recompilation when a new migration is added + println!("cargo:rerun-if-changed=migrations"); +} \ No newline at end of file diff --git a/rust/docker-compose.yml b/rust/docker-compose.yml new file mode 100644 index 0000000..4fa0454 --- /dev/null +++ b/rust/docker-compose.yml @@ -0,0 +1,11 @@ +services: + postgres: + image: docker.io/postgres:16 + restart: always + shm_size: 128mb + ports: + - 127.0.0.1:54325:5432 + environment: + POSTGRES_USER: preciazo + POSTGRES_PASSWORD: rezgVdLJik8HRX9heS9dDt5nRXyy9n + diff --git a/rust/migrations/20240712112531_precios-and-producto-urls.sql b/rust/migrations/20240712112531_precios-and-producto-urls.sql new file mode 100644 index 0000000..bc30bbc --- /dev/null +++ b/rust/migrations/20240712112531_precios-and-producto-urls.sql @@ -0,0 +1,21 @@ +CREATE TABLE + precios ( + id serial PRIMARY KEY NOT NULL, + ean text NOT NULL, + fetched_at integer NOT NULL, + precio_centavos integer, + in_stock integer, + url text NOT NULL, + warc_record_id text, + parser_version integer, + name text, + image_url text + ); + +CREATE TABLE + producto_urls ( + id serial PRIMARY KEY NOT NULL, + url text NOT NULL, + first_seen integer NOT NULL, + last_seen integer NOT NULL + ); \ No newline at end of file diff --git a/rust/migrations/20240712112948_create-best-selling.sql b/rust/migrations/20240712112948_create-best-selling.sql new file mode 100644 index 0000000..6ef8815 --- /dev/null +++ b/rust/migrations/20240712112948_create-best-selling.sql @@ -0,0 +1,7 @@ +CREATE TABLE + db_best_selling ( + id serial PRIMARY KEY NOT NULL, + fetched_at integer NOT NULL, + category text NOT NULL, + eans_json text NOT NULL + ); \ No newline at end of file diff --git a/rust/src/db.rs b/rust/src/db.rs new file mode 100644 index 0000000..11238dd --- /dev/null +++ b/rust/src/db.rs @@ -0,0 +1,33 @@ +use std::env; + +use sqlx::{postgres::PgPoolOptions, PgPool}; + +pub async fn connect_db() -> anyhow::Result { + dotenvy::dotenv()?; + + let pool = PgPoolOptions::new() + .max_connections(5) + .connect(&env::var("DATABASE_URL")?) + .await?; + sqlx::migrate!("./migrations/").run(&pool).await?; + Ok(pool) + + // let pool = SqlitePoolOptions::new() + // .max_connections(1) + // .connect_with( + // SqliteConnectOptions::from_str(&format!( + // "sqlite://{}", + // env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()) + // )) + // .unwrap() + // .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) + // .pragma("journal_size_limit", "67108864") + // .pragma("mmap_size", "134217728") + // .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) + // .busy_timeout(Duration::from_secs(15)) + // .pragma("cache_size", "2000") + // .optimize_on_close(true, None), + // ) + // .await + // .expect("can't connect to database"); +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index fe54469..d14314f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1 +1,2 @@ +pub mod db; pub mod supermercado; diff --git a/rust/src/scraper/db.rs b/rust/src/scraper/db.rs index eb4be9c..05df00b 100644 --- a/rust/src/scraper/db.rs +++ b/rust/src/scraper/db.rs @@ -4,31 +4,36 @@ use std::{ time::{Duration, SystemTime, UNIX_EPOCH}, }; -use sqlx::{sqlite::SqliteConnectOptions, SqlitePool}; +use preciazo::db::connect_db; +use sqlx::{sqlite::SqliteConnectOptions, PgPool, SqlitePool}; use tracing::info; use crate::{best_selling::BestSellingRecord, PrecioPoint}; #[derive(Clone)] pub struct Db { - read_pool: SqlitePool, - write_pool: SqlitePool, + pool: PgPool, + // read_pool: SqlitePool, + // write_pool: SqlitePool, } impl Db { pub async fn connect() -> anyhow::Result { - let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); - info!("Opening DB at {}", db_path); - let read_pool = connect_to_db(&db_path, 32).await?; - let write_pool = connect_to_db(&db_path, 1).await?; Ok(Self { - read_pool, - write_pool, + pool: connect_db().await?, }) + // let db_path = env::var("DB_PATH").unwrap_or("../sqlite.db".to_string()); + // info!("Opening DB at {}", db_path); + // let read_pool = connect_to_db(&db_path, 32).await?; + // let write_pool = connect_to_db(&db_path, 1).await?; + // Ok(Self { + // read_pool, + // write_pool, + // }) } pub async fn insert_precio(&self, point: PrecioPoint) -> anyhow::Result<()> { - sqlx::query!("INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9);", + sqlx::query!("INSERT INTO precios(ean, fetched_at, precio_centavos, in_stock, url, warc_record_id, parser_version, name, image_url) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9);", point.ean, point.fetched_at, point.precio_centavos, @@ -38,7 +43,7 @@ impl Db { point.parser_version, point.name, point.image_url, - ).execute(&self.write_pool).await?; + ).execute(&self.pool).await?; Ok(()) } @@ -105,26 +110,26 @@ impl Db { } } -async fn connect_to_db( - db_path: &str, - max_connections: u32, -) -> Result, anyhow::Error> { - Ok(sqlx::pool::PoolOptions::new() - .max_connections(max_connections) - .connect_with( - SqliteConnectOptions::from_str(&format!("sqlite://{}", db_path))? - // https://fractaledmind.github.io/2023/09/07/enhancing-rails-sqlite-fine-tuning/ - .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) - .pragma("journal_size_limit", "67108864") - .pragma("mmap_size", "134217728") - .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) - .busy_timeout(Duration::from_secs(15)) - .pragma("cache_size", "2000") - .pragma("temp_store", "memory") - .optimize_on_close(true, None), - ) - .await?) -} +// async fn connect_to_db( +// db_path: &str, +// max_connections: u32, +// ) -> Result, anyhow::Error> { +// Ok(sqlx::pool::PoolOptions::new() +// .max_connections(max_connections) +// .connect_with( +// SqliteConnectOptions::from_str(&format!("sqlite://{}", db_path))? +// // https://fractaledmind.github.io/2023/09/07/enhancing-rails-sqlite-fine-tuning/ +// .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) +// .pragma("journal_size_limit", "67108864") +// .pragma("mmap_size", "134217728") +// .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) +// .busy_timeout(Duration::from_secs(15)) +// .pragma("cache_size", "2000") +// .pragma("temp_store", "memory") +// .optimize_on_close(true, None), +// ) +// .await?) +// } fn now_ms() -> u128 { now().as_millis() diff --git a/rust/src/scraper/main.rs b/rust/src/scraper/main.rs index bb29ca4..7d83d1b 100644 --- a/rust/src/scraper/main.rs +++ b/rust/src/scraper/main.rs @@ -60,16 +60,18 @@ struct AutoArgs { async fn main() { tracing_subscriber::fmt::init(); - match Args::parse() { - Args::FetchList(a) => fetch_list_cli(a.list_path).await, - Args::ParseFile(a) => parse_file_cli(a.file_path).await, - Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await, - Args::ScrapUrl(a) => scrap_url_cli(a.url).await, - Args::ScrapBestSelling => scrap_best_selling_cli().await, - Args::Auto(a) => auto_cli(a).await, - Args::Cron(_) => cron_cli().await, - } - .unwrap() + preciazo::db::connect_db().await.unwrap(); + + // match Args::parse() { + // Args::FetchList(a) => fetch_list_cli(a.list_path).await, + // Args::ParseFile(a) => parse_file_cli(a.file_path).await, + // Args::GetUrlList(a) => get_url_list_cli(a.supermercado).await, + // Args::ScrapUrl(a) => scrap_url_cli(a.url).await, + // Args::ScrapBestSelling => scrap_best_selling_cli().await, + // Args::Auto(a) => auto_cli(a).await, + // Args::Cron(_) => cron_cli().await, + // } + // .unwrap() } async fn scrap_url_cli(url: String) -> anyhow::Result<()> {