Compare commits
2 commits
a45e4f196a
...
63ebf48da6
Author | SHA1 | Date | |
---|---|---|---|
63ebf48da6 | |||
aaa5d81e96 |
6 changed files with 483 additions and 1 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -1,5 +1,10 @@
|
|||
node_modules/
|
||||
input*/
|
||||
keywords-no-compressed/
|
||||
keywords-no-compressed.zip
|
||||
*.sqlite3
|
||||
*.sqlite3-shm
|
||||
*.sqlite3-wal
|
||||
rust/*.sqlite3
|
||||
rust/*.sqlite3-shm
|
||||
rust/*.sqlite3-wal
|
||||
|
|
|
@ -42,7 +42,7 @@ async function recurse(parent, strip) {
|
|||
const brotli = createBrotliCompress({
|
||||
params: {
|
||||
[constants.BROTLI_PARAM_MODE]: constants.BROTLI_MODE_TEXT,
|
||||
[constants.BROTLI_PARAM_QUALITY]: constants.BROTLI_MIN_QUALITY,
|
||||
[constants.BROTLI_PARAM_QUALITY]: 11,
|
||||
[constants.BROTLI_PARAM_SIZE_HINT]: (await stat(realPath)).size,
|
||||
},
|
||||
});
|
||||
|
|
3
rust/.gitignore
vendored
Normal file
3
rust/.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
target/
|
||||
perf.data*
|
||||
flamegraph.svg
|
235
rust/Cargo.lock
generated
Normal file
235
rust/Cargo.lock
generated
Normal file
|
@ -0,0 +1,235 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-stdlib"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3"
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "3.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
"brotli-decompressor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "2.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
|
||||
|
||||
[[package]]
|
||||
name = "fallible-streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.142"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
|
||||
|
||||
[[package]]
|
||||
name = "libsqlite3-sys"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
||||
|
||||
[[package]]
|
||||
name = "rusqlite"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"fallible-iterator",
|
||||
"fallible-streaming-iterator",
|
||||
"hashlink",
|
||||
"libsqlite3-sys",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"rusqlite",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
15
rust/Cargo.toml
Normal file
15
rust/Cargo.toml
Normal file
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "rust"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
brotli = "3.3.4"
|
||||
# rayon = "1.7.0"
|
||||
rusqlite = { version = "0.29.0", features = ["bundled", "unlock_notify"] }
|
||||
# smol-potat = { version = "1.1.2", features = ["auto"] }
|
||||
# sqlite-zstd = "0.3.2"
|
||||
walkdir = "2.3.3"
|
||||
# zstd = "0.12.3"
|
224
rust/src/main.rs
Normal file
224
rust/src/main.rs
Normal file
|
@ -0,0 +1,224 @@
|
|||
use std::{
|
||||
error::Error,
|
||||
fs::{read, remove_file, File},
|
||||
path::{Path, PathBuf},
|
||||
sync::mpsc::{self, Receiver, Sender},
|
||||
thread::{self, JoinHandle},
|
||||
};
|
||||
|
||||
use brotli::enc::{backward_references::BrotliEncoderMode, BrotliEncoderParams};
|
||||
use rusqlite::Connection;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
fn db_path() -> &'static str {
|
||||
"./archiveee.sqlite3"
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
_ = remove_file(db_path());
|
||||
let conn = make_conn()?;
|
||||
make_db_schema(&conn)?;
|
||||
let input = Path::new("./input");
|
||||
|
||||
// let (dict, dict_id) = make_dict(conn, input)?;
|
||||
|
||||
// let mut enc = zstd::bulk::Compressor::new(5)?;
|
||||
let paths = iterator(input).collect::<Vec<PathBuf>>();
|
||||
|
||||
let chunks = paths.chunks(paths.len() / 4);
|
||||
println!("{}", chunks.len());
|
||||
|
||||
let (tx, rx): (Sender<Entry>, Receiver<Entry>) = mpsc::channel();
|
||||
|
||||
let threads = chunks
|
||||
.map(|chunk| {
|
||||
let chunkk = chunk.to_vec();
|
||||
let ttx = tx.clone();
|
||||
thread::spawn(move || {
|
||||
let input = Path::new("./input");
|
||||
let params = file_compress_params();
|
||||
// let mut conn = make_conn().unwrap();
|
||||
println!("thread");
|
||||
// let trans = conn.transaction().unwrap();
|
||||
// {
|
||||
// let mut insert_stmt = trans
|
||||
// .prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
|
||||
// .unwrap();
|
||||
for path in chunkk {
|
||||
compress_file(&path, input, ¶ms, &ttx);
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<Vec<JoinHandle<()>>>();
|
||||
let collector = thread::spawn(|| {
|
||||
let mut conn = make_conn().unwrap();
|
||||
println!("thread");
|
||||
let trans = conn.transaction().unwrap();
|
||||
{
|
||||
let mut insert_stmt = trans
|
||||
.prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
|
||||
.unwrap();
|
||||
for entry in rx {
|
||||
insert_stmt.execute(entry).unwrap();
|
||||
}
|
||||
}
|
||||
trans.commit().unwrap();
|
||||
});
|
||||
drop(tx);
|
||||
|
||||
collector.join().unwrap();
|
||||
for thread in threads {
|
||||
thread.join().unwrap();
|
||||
}
|
||||
// .map(|path| -> Option<()> {
|
||||
// insert_file(path, input, params, insert_stmt);
|
||||
// Some(())
|
||||
// })
|
||||
// .collect::<Vec<Option<()>>>();
|
||||
// if iter.iter().any(|o| o.is_none()) {
|
||||
// println!("Algo falló");
|
||||
// }
|
||||
|
||||
// recurse(dict, conn)?;
|
||||
|
||||
println!("Hello, world!");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// fn insert_files(paths: &[PathBuf]) {
|
||||
// let input = Path::new("./input");
|
||||
// let params = file_compress_params();
|
||||
// // let mut conn = make_conn().unwrap();
|
||||
// println!("thread");
|
||||
// // let trans = conn.transaction().unwrap();
|
||||
// // {
|
||||
// // let mut insert_stmt = trans
|
||||
// // .prepare_cached("insert into files(path, content, compressed) values(?, ?, ?)")
|
||||
// // .unwrap();
|
||||
// for path in paths {
|
||||
// compress_file(path, input, ¶ms, tx);
|
||||
// }
|
||||
// // insert_stmt.execute((path_str, content, false)).unwrap();
|
||||
// // }
|
||||
// // trans.commit().unwrap();
|
||||
// }
|
||||
|
||||
// fn insert_file(
|
||||
// path: &Path,
|
||||
// input: &Path,
|
||||
// params: &BrotliEncoderParams,
|
||||
// insert_stmt: &mut rusqlite::CachedStatement,
|
||||
// ) {
|
||||
// // let mut path_comp = zstd::bulk::Compressor::with_dictionary(0, &dict).unwrap();
|
||||
|
||||
// // println!("{:?}", path);
|
||||
// // let path_compressed = path_comp
|
||||
// // .compress(strip_input(&path, input).to_string_lossy().as_bytes())
|
||||
// // .unwrap();
|
||||
// let path_strip = strip_input(&path, input);
|
||||
// let path_str = path_strip.to_string_lossy();
|
||||
// if compressable(path) {
|
||||
// let mut content_compressed = Vec::new();
|
||||
// let mut file = File::open(path).unwrap();
|
||||
// brotli::BrotliCompress(&mut file, &mut content_compressed, ¶ms).unwrap();
|
||||
// // let content_compressed = enc.compress(&content)?;
|
||||
// insert_stmt
|
||||
// .execute((path_str, content_compressed, true))
|
||||
// .unwrap();
|
||||
// } else {
|
||||
// let content = read(path.clone()).unwrap();
|
||||
// insert_stmt.execute((path_str, content, false)).unwrap();
|
||||
// }
|
||||
// }
|
||||
|
||||
fn make_conn() -> Result<Connection, Box<dyn Error>> {
|
||||
let conn = Connection::open(db_path())?;
|
||||
conn.pragma_update(None, "journal_mode", "OFF")?;
|
||||
conn.pragma_update(None, "synchronous", 0)?;
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
type Entry = (String, Vec<u8>, bool);
|
||||
|
||||
fn compress_file(path: &Path, input: &Path, params: &BrotliEncoderParams, tx: &Sender<Entry>) {
|
||||
let path_strip = strip_input(&path, input);
|
||||
let path_str = path_strip.to_string_lossy();
|
||||
if compressable(path) {
|
||||
let mut content_compressed = Vec::new();
|
||||
let mut file = File::open(path).unwrap();
|
||||
brotli::BrotliCompress(&mut file, &mut content_compressed, ¶ms).unwrap();
|
||||
// let content_compressed = enc.compress(&content)?;
|
||||
// insert_stmt
|
||||
// .execute((path_str, content_compressed, true))
|
||||
// .unwrap();
|
||||
tx.send((path_str.to_string(), content_compressed, true))
|
||||
.unwrap();
|
||||
} else {
|
||||
let content = read(path.clone()).unwrap();
|
||||
// insert_stmt.execute((path_str, content, false)).unwrap();
|
||||
tx.send((path_str.to_string(), content, false)).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn make_db_schema(conn: &Connection) -> Result<(), Box<dyn Error>> {
|
||||
conn.execute(
|
||||
"create table files(path text, content blob, compressed bool)",
|
||||
[],
|
||||
)?;
|
||||
// conn.execute(
|
||||
// "create table files(path blob, path_dictionary_id integer, content blob, compressed bool)",
|
||||
// [],
|
||||
// )?;
|
||||
// conn.execute("create table path_dictionaries(dictionary blob)", [])?;
|
||||
conn.execute("create unique index path on files(path)", [])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// fn make_dict(conn: Connection, input: &Path) -> Result<(Vec<u8>, i64), Box<dyn Error>> {
|
||||
// let mut insert_dict_stmt =
|
||||
// conn.prepare_cached("insert into path_dictionaries(dictionary) values(?1)")?;
|
||||
// let mut all_paths = Vec::new();
|
||||
// let iter = iterator(input);
|
||||
// for entry in iter {
|
||||
// all_paths.push(entry.to_string_lossy().as_bytes().to_vec());
|
||||
// }
|
||||
// println!("path n {}", all_paths.len());
|
||||
// let dict = zstd::dict::from_samples(&all_paths, 999999)?;
|
||||
// println!("dict");
|
||||
// let dict_id = insert_dict_stmt.insert(params![&dict])?;
|
||||
// Ok((dict, dict_id))
|
||||
// }
|
||||
|
||||
fn file_compress_params() -> BrotliEncoderParams {
|
||||
let mut params = BrotliEncoderParams::default();
|
||||
params.quality = 0;
|
||||
params.mode = BrotliEncoderMode::BROTLI_MODE_TEXT;
|
||||
params
|
||||
}
|
||||
|
||||
fn iterator(input: &Path) -> impl Iterator<Item = PathBuf> {
|
||||
WalkDir::new(input)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.map(|e| e.path().to_owned())
|
||||
.filter(|e| !already_compressed(e))
|
||||
}
|
||||
|
||||
fn strip_input(path: &Path, input: &Path) -> PathBuf {
|
||||
path.strip_prefix(input).unwrap().to_owned()
|
||||
}
|
||||
|
||||
fn already_compressed(path: &PathBuf) -> bool {
|
||||
let p = path.to_string_lossy();
|
||||
p.ends_with(".br") || p.ends_with(".gz")
|
||||
}
|
||||
|
||||
fn compressable(path: &Path) -> bool {
|
||||
let p = path.to_string_lossy();
|
||||
p.ends_with(".html")
|
||||
|| p.ends_with(".css")
|
||||
|| p.ends_with(".js")
|
||||
|| p.ends_with(".json")
|
||||
|| p.ends_with(".svg")
|
||||
}
|
Loading…
Reference in a new issue