Compare commits

..

No commits in common. "63ebf48da69bddf5290e0378d3a9a00faed21acf" and "a45e4f196a15799b87b68e6cf669e40c1fd20b4c" have entirely different histories.

6 changed files with 1 additions and 483 deletions

5
.gitignore vendored
View file

@ -1,10 +1,5 @@
node_modules/ node_modules/
input*/ input*/
keywords-no-compressed/
keywords-no-compressed.zip
*.sqlite3 *.sqlite3
*.sqlite3-shm *.sqlite3-shm
*.sqlite3-wal *.sqlite3-wal
rust/*.sqlite3
rust/*.sqlite3-shm
rust/*.sqlite3-wal

View file

@ -42,7 +42,7 @@ async function recurse(parent, strip) {
const brotli = createBrotliCompress({ const brotli = createBrotliCompress({
params: { params: {
[constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_TEXT, [constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_TEXT,
[constants.BROTLI_PARAM_QUALITY]: 11, [constants.BROTLI_PARAM_QUALITY]: constants.BROTLI_MIN_QUALITY,
[constants.BROTLI_PARAM_SIZE_HINT]: (await stat(realPath)).size, [constants.BROTLI_PARAM_SIZE_HINT]: (await stat(realPath)).size,
}, },
}); });

3
rust/.gitignore vendored
View file

@ -1,3 +0,0 @@
target/
perf.data*
flamegraph.svg

235
rust/Cargo.lock generated
View file

@ -1,235 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "alloc-no-stdlib"
version = "2.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
[[package]]
name = "alloc-stdlib"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
dependencies = [
"alloc-no-stdlib",
]
[[package]]
name = "bitflags"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3"
[[package]]
name = "brotli"
version = "3.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
"brotli-decompressor",
]
[[package]]
name = "brotli-decompressor"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "fallible-iterator"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "getrandom"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashlink"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa"
dependencies = [
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.142"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
[[package]]
name = "libsqlite3-sys"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "pkg-config"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
[[package]]
name = "rusqlite"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec",
]
[[package]]
name = "rust"
version = "0.1.0"
dependencies = [
"brotli",
"rusqlite",
"walkdir",
]
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "smallvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -1,15 +0,0 @@
[package]
name = "rust"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
brotli = "3.3.4"
# rayon = "1.7.0"
rusqlite = { version = "0.29.0", features = ["bundled", "unlock_notify"] }
# smol-potat = { version = "1.1.2", features = ["auto"] }
# sqlite-zstd = "0.3.2"
walkdir = "2.3.3"
# zstd = "0.12.3"

View file

@ -1,224 +0,0 @@
use std::{
error::Error,
fs::{read, remove_file, File},
path::{Path, PathBuf},
sync::mpsc::{self, Receiver, Sender},
thread::{self, JoinHandle},
};
use brotli::enc::{backward_references::BrotliEncoderMode, BrotliEncoderParams};
use rusqlite::Connection;
use walkdir::WalkDir;
fn db_path() -> &'static str {
"./archiveee.sqlite3"
}
fn main() -> Result<(), Box<dyn Error>> {
_ = remove_file(db_path());
let conn = make_conn()?;
make_db_schema(&conn)?;
let input = Path::new("./input");
// let (dict, dict_id) = make_dict(conn, input)?;
// let mut enc = zstd::bulk::Compressor::new(5)?;
let paths = iterator(input).collect::<Vec<PathBuf>>();
let chunks = paths.chunks(paths.len() / 4);
println!("{}", chunks.len());
let (tx, rx): (Sender<Entry>, Receiver<Entry>) = mpsc::channel();
let threads = chunks
.map(|chunk| {
let chunkk = chunk.to_vec();
let ttx = tx.clone();
thread::spawn(move || {
let input = Path::new("./input");
let params = file_compress_params();
// let mut conn = make_conn().unwrap();
println!("thread");
// let trans = conn.transaction().unwrap();
// {
// let mut insert_stmt = trans
// .prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
// .unwrap();
for path in chunkk {
compress_file(&path, input, &params, &ttx);
}
})
})
.collect::<Vec<JoinHandle<()>>>();
let collector = thread::spawn(|| {
let mut conn = make_conn().unwrap();
println!("thread");
let trans = conn.transaction().unwrap();
{
let mut insert_stmt = trans
.prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
.unwrap();
for entry in rx {
insert_stmt.execute(entry).unwrap();
}
}
trans.commit().unwrap();
});
drop(tx);
collector.join().unwrap();
for thread in threads {
thread.join().unwrap();
}
// .map(|path| -> Option<()> {
// insert_file(path, input, params, insert_stmt);
// Some(())
// })
// .collect::<Vec<Option<()>>>();
// if iter.iter().any(|o| o.is_none()) {
// println!("Algo falló");
// }
// recurse(dict, conn)?;
println!("Hello, world!");
Ok(())
}
// fn insert_files(paths: &[PathBuf]) {
// let input = Path::new("./input");
// let params = file_compress_params();
// // let mut conn = make_conn().unwrap();
// println!("thread");
// // let trans = conn.transaction().unwrap();
// // {
// // let mut insert_stmt = trans
// // .prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
// // .unwrap();
// for path in paths {
// compress_file(path, input, &params, tx);
// }
// // insert_stmt.execute((path_str, content, false)).unwrap();
// // }
// // trans.commit().unwrap();
// }
// fn insert_file(
// path: &Path,
// input: &Path,
// params: &BrotliEncoderParams,
// insert_stmt: &mut rusqlite::CachedStatement,
// ) {
// // let mut path_comp = zstd::bulk::Compressor::with_dictionary(0, &dict).unwrap();
// // println!("{:?}", path);
// // let path_compressed = path_comp
// // .compress(strip_input(&path, input).to_string_lossy().as_bytes())
// // .unwrap();
// let path_strip = strip_input(&path, input);
// let path_str = path_strip.to_string_lossy();
// if compressable(path) {
// let mut content_compressed = Vec::new();
// let mut file = File::open(path).unwrap();
// brotli::BrotliCompress(&mut file, &mut content_compressed, &params).unwrap();
// // let content_compressed = enc.compress(&content)?;
// insert_stmt
// .execute((path_str, content_compressed, true))
// .unwrap();
// } else {
// let content = read(path.clone()).unwrap();
// insert_stmt.execute((path_str, content, false)).unwrap();
// }
// }
fn make_conn() -> Result<Connection, Box<dyn Error>> {
let conn = Connection::open(db_path())?;
conn.pragma_update(None, "journal_mode", "OFF")?;
conn.pragma_update(None, "synchronous", 0)?;
Ok(conn)
}
type Entry = (String, Vec<u8>, bool);
fn compress_file(path: &Path, input: &Path, params: &BrotliEncoderParams, tx: &Sender<Entry>) {
let path_strip = strip_input(&path, input);
let path_str = path_strip.to_string_lossy();
if compressable(path) {
let mut content_compressed = Vec::new();
let mut file = File::open(path).unwrap();
brotli::BrotliCompress(&mut file, &mut content_compressed, &params).unwrap();
// let content_compressed = enc.compress(&content)?;
// insert_stmt
// .execute((path_str, content_compressed, true))
// .unwrap();
tx.send((path_str.to_string(), content_compressed, true))
.unwrap();
} else {
let content = read(path.clone()).unwrap();
// insert_stmt.execute((path_str, content, false)).unwrap();
tx.send((path_str.to_string(), content, false)).unwrap();
}
}
fn make_db_schema(conn: &Connection) -> Result<(), Box<dyn Error>> {
conn.execute(
"create table files(path text, content blob, compressed bool)",
[],
)?;
// conn.execute(
// "create table files(path blob, path_dictionary_id integer, content blob, compressed bool)",
// [],
// )?;
// conn.execute("create table path_dictionaries(dictionary blob)", [])?;
conn.execute("create unique index path on files(path)", [])?;
Ok(())
}
// fn make_dict(conn: Connection, input: &Path) -> Result<(Vec<u8>, i64), Box<dyn Error>> {
// let mut insert_dict_stmt =
// conn.prepare_cached("insert into path_dictionaries(dictionary) values(?1)")?;
// let mut all_paths = Vec::new();
// let iter = iterator(input);
// for entry in iter {
// all_paths.push(entry.to_string_lossy().as_bytes().to_vec());
// }
// println!("path n {}", all_paths.len());
// let dict = zstd::dict::from_samples(&all_paths, 999999)?;
// println!("dict");
// let dict_id = insert_dict_stmt.insert(params![&dict])?;
// Ok((dict, dict_id))
// }
fn file_compress_params() -> BrotliEncoderParams {
let mut params = BrotliEncoderParams::default();
params.quality = 0;
params.mode = BrotliEncoderMode::BROTLI_MODE_TEXT;
params
}
fn iterator(input: &Path) -> impl Iterator<Item = PathBuf> {
WalkDir::new(input)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.map(|e| e.path().to_owned())
.filter(|e| !already_compressed(e))
}
fn strip_input(path: &Path, input: &Path) -> PathBuf {
path.strip_prefix(input).unwrap().to_owned()
}
fn already_compressed(path: &PathBuf) -> bool {
let p = path.to_string_lossy();
p.ends_with(".br") || p.ends_with(".gz")
}
fn compressable(path: &Path) -> bool {
let p = path.to_string_lossy();
p.ends_with(".html")
|| p.ends_with(".css")
|| p.ends_with(".js")
|| p.ends_with(".json")
|| p.ends_with(".svg")
}