diff --git a/bun.lockb b/bun.lockb index 280e648..8427362 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/readme.md b/readme.md index 1775378..18c1a50 100644 --- a/readme.md +++ b/readme.md @@ -8,29 +8,19 @@ scrapeo "masivo" de precios y datos en supermercados argentinos (no hace falta correrlos porque ya hay listas armadas en [data/](./data/)) -- [warcificator](./warcificator/) descarga las paginas de productos y genera un archivo [WARC](https://iipc.github.io/warc-specifications/specifications/warc-format/warc-1.0/) con ellas -- el [scraper](./scraper/) procesa estos WARCs, extrayendo varios datos y guardandolos en una base de datos SQLite (definida en [db-datos](./db-datos/schema.ts)) +- el [scraper](./scraper/) descarga todos los links, extrayendo varios datos y guardandolos en una base de datos SQLite (definida en [db-datos](./db-datos/schema.ts)) - el [sitio](./sitio/) renderiza páginas a partir de la base de datos y hace gráficos lindos ## setup hay que instalar [Bun](https://bun.sh/), que lo estoy usando porque hacía que el scraper corra más rápido. quizás en el futuro lo reemplace con good old Node.js. -aparte, se necesita zstd, que se usa para comprimir los WARCs eficientemente. seguro está disponible en las repos de tu distro favorita :) - -empezá descargando un WARC con 50 páginas de sample, y recomprimilo con zstd: - -``` -wget --no-verbose --tries=3 --delete-after --input-file ./data/samples/Dia.txt --warc-file=dia-sample -gzip -dc dia-sample.warc.gz | zstd --long -15 --no-sparse -o dia-sample.warc.zst -``` - -después, scrapealo a una BD: +después, escrapea un sample de productos de Carrefour a una BD: ``` cd scraper/ bun install -bun cli.ts scrap ../dia-sample.warc.zst +bun cli.ts scrap ./data/samples/Carrefour.50.txt ``` ahora miralo en el sitio: diff --git a/scraper/Containerfile b/scraper/Containerfile index 7df71d2..8fbf68e 100644 --- a/scraper/Containerfile +++ b/scraper/Containerfile @@ -8,27 +8,12 @@ RUN bun install --frozen-lockfile \ && bun build scraper/cli.ts --target=bun --outfile=/tmp/cli.build.js \ && rm -rf node_modules/ -# https://dev.to/deciduously/use-multi-stage-docker-builds-for-statically-linked-rust-binaries-3jgd -FROM docker.io/rust:1.74 AS warcificator-builder -WORKDIR /usr/src/ -RUN rustup target add x86_64-unknown-linux-musl -RUN apt-get update && apt-get install -y musl-tools musl-dev - -RUN USER=root cargo new warcificator -WORKDIR /usr/src/warcificator -COPY ./warcificator/Cargo.toml ./warcificator/Cargo.lock ./ -RUN cargo build --release - -COPY ./warcificator/src ./src -RUN cargo install --target x86_64-unknown-linux-musl --path . - FROM base RUN apk add --no-cache wget zstd tini RUN printf "#!/bin/sh\nexec bun /bin/scraper auto\n" > /etc/periodic/daily/scraper \ && chmod +x /etc/periodic/daily/scraper COPY --from=builder /tmp/cli.build.js /bin/scraper -COPY --from=warcificator-builder /usr/local/cargo/bin/warcificator /bin/ COPY --from=builder /usr/src/app/db-datos/drizzle /bin/drizzle COPY --from=builder /usr/src/app/data /listas WORKDIR /app diff --git a/scraper/auto.ts b/scraper/auto.ts index 582cfd1..b4bd8d5 100644 --- a/scraper/auto.ts +++ b/scraper/auto.ts @@ -1,14 +1,10 @@ -import { mkdtemp, access, writeFile } from "node:fs/promises"; +import { mkdtemp, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; -import { join, resolve } from "node:path"; -import { spawn } from "node:child_process"; +import { join } from "node:path"; import { Supermercado, hosts } from "db-datos/supermercado.js"; import PQueue from "p-queue"; -import { format, formatDuration, intervalToDuration } from "date-fns"; -import { parseWarc } from "./scrap.js"; -import { S3Client } from "@aws-sdk/client-s3"; -import { Upload } from "@aws-sdk/lib-storage"; -import { BunFile } from "bun"; +import { formatDuration, intervalToDuration } from "date-fns"; +import { downloadList } from "./scrap.js"; import { db } from "db-datos/db.js"; import { like } from "drizzle-orm"; import { productoUrls } from "db-datos/schema.js"; @@ -23,7 +19,7 @@ const supermercados: Supermercado[] = [ ]; // hacemos una cola para el scrapeo para no tener varios writers a la BD y no sobrecargar la CPU -const scrapQueue = new PQueue({ concurrency: 1 }); +const scrapQueue = new PQueue({ concurrency: 4 }); export async function auto() { const a = new Auto(); @@ -31,35 +27,9 @@ export async function auto() { } class Auto { - s3Config?: { s3: S3Client; bucketName: string }; telegramConfig?: { token: string; chatId: string }; constructor() { - if ( - !process.env.S3_ACCESS_KEY_ID || - !process.env.S3_SECRET_ACCESS_KEY || - !process.env.S3_BUCKET_NAME - ) { - if (process.env.NODE_ENV === "development") { - console.warn("faltan creds de s3, no voy a subir a s3"); - } else { - throw new Error("faltan creds de s3"); - } - } else { - this.s3Config = { - // https://www.backblaze.com/docs/cloud-storage-use-the-aws-sdk-for-javascript-v3-with-backblaze-b2 - s3: new S3Client({ - endpoint: "https://s3.us-west-004.backblazeb2.com", - region: "us-west-004", - credentials: { - accessKeyId: process.env.S3_ACCESS_KEY_ID, - secretAccessKey: process.env.S3_SECRET_ACCESS_KEY, - }, - }), - bucketName: process.env.S3_BUCKET_NAME, - }; - } - if (!process.env.TELEGRAM_BOT_TOKEN) console.warn("no hay TELEGRAM_BOT_TOKEN, no voy a loggear por allá"); else if (!process.env.TELEGRAM_BOT_CHAT_ID) @@ -107,93 +77,29 @@ class Auto { const urls = results.map((r) => r.url); await writeFile(listPath, urls.join("\n") + "\n"); - const date = new Date(); - const zstdWarcName = `${supermercado}-${format( - date, - "yyyy-MM-dd-HH:mm" - )}.warc.zst`; - const zstdWarcPath = join(ctxPath, zstdWarcName); - const subproc = Bun.spawn({ - cmd: ["warcificator", listPath, zstdWarcPath], - stderr: "ignore", - stdout: "ignore", - cwd: ctxPath, - }); - const t0 = performance.now(); - await subproc.exited; - this.inform( - `[downloader] ${zstdWarcName} tardó ${formatMs(performance.now() - t0)}` - ); - - if (!(await fileExists(zstdWarcPath))) { - const err = this.report(`no encontré el ${zstdWarcPath}`); - throw err; - } - - this.scrapAndInform({ zstdWarcPath, zstdWarcName }); - - try { - await this.uploadToBucket({ - fileName: zstdWarcName, - file: Bun.file(zstdWarcPath), - }); - } catch (error) { - this.inform(`Falló subir ${zstdWarcName} a S3; ${error}`); - console.error(error); - } - + this.scrapAndInform({ listPath }); // TODO: borrar archivos temporales } - async scrapAndInform({ - zstdWarcPath, - zstdWarcName, - }: { - zstdWarcPath: string; - zstdWarcName: string; - }) { + async scrapAndInform({ listPath }: { listPath: string }) { const res = await scrapQueue.add(async () => { const t0 = performance.now(); - const progress = await parseWarc(zstdWarcPath); + const progress = await downloadList(listPath); return { took: performance.now() - t0, progress }; }); if (res) { const { took, progress } = res; this.inform( - `Procesado ${zstdWarcName} (${progress.done} ok, ${ - progress.errors.length - } errores) (tardó ${formatMs(took)})` + `Procesado ${listPath} (${progress.done} ok, ${ + progress.skipped + } skipped, ${progress.errors.length} errores) (tardó ${formatMs(took)})` ); } else { - this.inform(`Algo falló en ${zstdWarcName}`); + this.inform(`Algo falló en ${listPath}`); } } - async uploadToBucket({ - fileName, - file, - }: { - fileName: string; - file: BunFile; - }) { - if (!this.s3Config) { - this.inform( - `[s3] Se intentó subir ${fileName} pero no tenemos creds de S3` - ); - return; - } - const parallelUploads3 = new Upload({ - client: this.s3Config.s3, - params: { - Bucket: this.s3Config.bucketName, - Key: fileName, - Body: file, - }, - }); - await parallelUploads3.done(); - } - inform(msg: string) { this.sendTelegramMsg(msg); console.info(msg); @@ -216,16 +122,6 @@ class Auto { } } -// no se llama exists porque bun tiene un bug en el que usa fs.exists por mas que exista una funcion llamada exists -async function fileExists(path: string) { - try { - access(path); - return true; - } catch { - return false; - } -} - function formatMs(ms: number) { return formatDuration(intervalToDuration({ start: 0, end: Math.round(ms) })); } diff --git a/scraper/cli.ts b/scraper/cli.ts index 9507090..3e8af80 100644 --- a/scraper/cli.ts +++ b/scraper/cli.ts @@ -2,7 +2,7 @@ import { scrapCarrefourProducts } from "../carrefour-link-scraper/index.js"; import { scrapCotoProducts } from "../coto-link-scraper/index.js"; import { scrapDiaProducts } from "../dia-link-scraper/index.js"; import { auto } from "./auto.js"; -import { parseWarc } from "./scrap.js"; +import { downloadList } from "./scrap.js"; if (process.argv[2] === "auto") { await auto(); @@ -13,16 +13,16 @@ if (process.argv[2] === "auto") { } else if (process.argv[2] === "scrap-coto-links") { await scrapCotoProducts(); } else if (process.argv[2] === "scrap") { - const warcPaths = process.argv.slice(3); - if (warcPaths.length > 0) { - for (const path of warcPaths) { - const res = await parseWarc(path); + const urlLists = process.argv.slice(3); + if (urlLists.length > 0) { + for (const path of urlLists) { + const res = await downloadList(path); console.info("======================================="); console.info(path, res); console.info("======================================="); } } else { - console.error("Especificá WARCs para scrapear."); + console.error("Especificá listas de urls para scrapear."); process.exit(1); } } else { diff --git a/scraper/package.json b/scraper/package.json index 1ca6dd7..edaf0ca 100644 --- a/scraper/package.json +++ b/scraper/package.json @@ -19,8 +19,8 @@ "drizzle-orm": "=0.29.1", "linkedom": "^0.16.5", "nanoid": "^5.0.4", + "p-map": "^7.0.1", "p-queue": "^8.0.1", - "warcio": "^2.2.1", "zod": "^3.22.4" }, "devDependencies": { diff --git a/scraper/scrap.ts b/scraper/scrap.ts index 0698ffe..07a473f 100644 --- a/scraper/scrap.ts +++ b/scraper/scrap.ts @@ -1,68 +1,52 @@ import * as schema from "db-datos/schema.js"; -import { WARCParser } from "warcio"; import { writeFile } from "fs/promises"; import { createHash } from "crypto"; import { getCarrefourProduct } from "./parsers/carrefour.js"; import { getDiaProduct } from "./parsers/dia.js"; import { getCotoProduct } from "./parsers/coto.js"; import { join } from "path"; -import { and, eq, sql } from "drizzle-orm"; import { db } from "db-datos/db.js"; +import pMap from "p-map"; const DEBUG = false; const PARSER_VERSION = 4; -const getPrevPrecio = db - .select({ id: schema.precios.id }) - .from(schema.precios) - .where( - and( - eq(schema.precios.warcRecordId, sql.placeholder("warcRecordId")), - eq(schema.precios.parserVersion, PARSER_VERSION) - ) - ) - .limit(1) - .prepare(); - export type Precio = typeof schema.precios.$inferInsert; export type Precioish = Omit< Precio, "fetchedAt" | "url" | "id" | "warcRecordId" | "parserVersion" >; -export async function parseWarc(path: string) { - // const warc = createReadStream(path); +export async function downloadList(path: string) { let progress: { done: number; - errors: { error: any; warcRecordId: string; path: string }[]; - } = { done: 0, errors: [] }; + skipped: number; + errors: { error: any; url: string; path: string }[]; + } = { done: 0, skipped: 0, errors: [] }; - const proc = Bun.spawn(["zstdcat", "-d", path], {}); - const warc = proc.stdout; - // TODO: tirar error si falla zstd + let list = (await Bun.file(path).text()) + .split("\n") + .filter((s) => s.length > 0); - const parser = new WARCParser(warc); - for await (const record of parser) { - if (record.warcType === "response") { - if (!record.warcTargetURI) continue; - const warcRecordId = record.warcHeader("WARC-Record-ID"); - if (!warcRecordId) throw new Error("No tiene WARC-Record-ID"); - - if (getPrevPrecio.get({ warcRecordId })) { - console.debug(`skipped ${warcRecordId}`); - continue; + await pMap( + list, + async (urlS) => { + let url; + try { + url = new URL(urlS); + } catch (err) { + console.error("error parseando", urlS); + return; } - if (record.httpHeaders?.statusCode !== 200) { - console.debug( - `skipped ${warcRecordId} because status=${record.httpHeaders?.statusCode} (!=200)` - ); - continue; + const res = await fetch(url); + if (!res.ok) { + console.debug(`skipped ${urlS} because status=${res.status} (!=200)`); + progress.skipped++; + return; } - // TODO: sobreescribir si existe el mismo record-id pero con version mas bajo? - const html = await record.contentText(); + const html = await res.text(); - const url = new URL(record.warcTargetURI); try { let ish: Precioish | undefined = undefined; if (url.hostname === "www.carrefour.com.ar") @@ -75,9 +59,8 @@ export async function parseWarc(path: string) { const p: Precio = { ...ish, - fetchedAt: new Date(record.warcDate!), - url: record.warcTargetURI, - warcRecordId, + fetchedAt: new Date(), + url: urlS, parserVersion: PARSER_VERSION, }; @@ -85,28 +68,23 @@ export async function parseWarc(path: string) { progress.done++; } catch (error) { - console.error({ path, warcRecordId, error }); + console.error({ path, urlS, error }); progress.errors.push({ path, - warcRecordId, + url: urlS, error, }); if (DEBUG) { - const urlHash = createHash("md5") - .update(record.warcTargetURI!) - .digest("hex"); + const urlHash = createHash("md5").update(urlS).digest("hex"); const output = join("debug", `${urlHash}.html`); await writeFile(output, html); console.error(`wrote html to ${output}`); } } - } - } - - if ((await proc.exited) !== 0) { - throw new Error("zstd tiró un error"); - } + }, + { concurrency: 32 } + ); return progress; } diff --git a/scraper/warc.ts b/scraper/warc.ts deleted file mode 100644 index 4e719c6..0000000 --- a/scraper/warc.ts +++ /dev/null @@ -1,157 +0,0 @@ -const crlf = "\r\n"; -const crlfB = Buffer.from(crlf, "utf-8"); -const crlfcrlf = crlf + crlf; -const crlfcrlfB = Buffer.from(crlfcrlf, "utf-8"); -const warc10B = Buffer.from("WARC/1.0", "utf-8"); -const emptyBuffer = Buffer.from("", "utf-8"); - -export async function* parseWARC(path: string) { - const warc = Bun.spawn(["zstd", "-do", "/dev/stdout", path], { - stderr: "ignore", - }).stdout; - - // const warc = Bun.stdin.stream(1024 * 1024 * 128); - - // let buffer: Uint8Array[] = []; - // const transform = new TransformStream({ - // transform(chunk, controller) { - // buffer.push(chunk); - // if ( - // buffer.reduce((prev, curr) => prev + curr.length, 0) > - // 1024 * 1024 * 64 - // ) { - // controller.enqueue(Buffer.concat(buffer)); - // buffer = []; - // } - // }, - // flush(controller) { - // controller.enqueue(Buffer.concat(buffer)); - // }, - // }); - - // warc.pipeTo(transform.writable); - - const reader = warc.getReader(); - // const reader = transform.readable.getReader(); - - // const warc = process.stdin; - - let arrays: Buffer[] = []; - let done = false; - while (!done) { - const r = await reader.readMany(); - if (r.done) { - done = true; - } else { - arrays = arrays.concat(r.value.map((x) => Buffer.from(x))); - if ( - arrays.reduce((prev, curr) => prev + curr.length, 0) < - 1024 * 1024 * 10 - ) - continue; - } - let buf: Buffer; - while ( - ((buf = arrays.length === 1 ? arrays[0] : Buffer.concat(arrays)), - buf.subarray(warc10B.length).includes(warc10B)) - ) { - const until = buf.indexOf(crlfcrlfB); - const header = buf.subarray(0, until); - - const lines = splitBuffer(header, crlfB); - let i = 0; - const nextLine = () => { - const line = lines[i]; - i++; - return line ? line : emptyBuffer; - }; - let line: Buffer; - if (!(line = nextLine()).equals(warc10B)) { - throw new Error(`No WARC 1.0 header in '${line}'`); - } - - let field; - let fields = new Map(); - while ( - ((line = nextLine()), - (field = parseField(line.toString("utf8"))), - line.length !== 0) - ) { - fields.set(field[0], field[1]); - } - const length = parseInt(fields.get("Content-Length")!); - - const rawHttp = buf.subarray( - until + crlfcrlfB.length, - until + crlfcrlfB.length + length - ); - const rawHttpHeaders = rawHttp - .subarray( - rawHttp.indexOf(crlfB) + crlfB.length, - rawHttp.indexOf(crlfcrlfB) + crlfcrlfB.length - ) - .toString(); - - let httpHeaders = new Map(); - rawHttpHeaders.split(crlf).forEach((line) => { - if (!line.length) return; - const [key, val] = line.split(": "); - httpHeaders.set(key, val); - }); - - let content = rawHttp.subarray( - rawHttp.indexOf(crlfcrlfB) + crlfcrlfB.length - ); - - if (httpHeaders.get("Transfer-Encoding") === "chunked") { - content = dechunk(content); - } - - // console.debug(fields.get("WARC-Date"), content.length); - - yield { - fields, - content, - }; - - arrays = [ - buf.subarray(until + crlfcrlfB.length + length + crlfcrlfB.length), - ]; - if (!arrays[0].length) break; - } - } -} - -function splitBuffer(buffer: Buffer, val: Buffer): Buffer[] { - let bufs = []; - let rest = buffer; - let i; - while (((i = rest.indexOf(val)), i !== -1)) { - bufs.push(rest.subarray(0, i)); - rest = rest.subarray(i + val.length); - } - bufs.push(rest); - return bufs; -} - -function parseField(line: string): [string, string] { - const [key, val] = line.split(": "); - return [key, val]; -} - -function dechunk(content: Buffer): Buffer { - let actualContent = []; - - while (true) { - let until = content.indexOf(crlf); - const hexLen = content.subarray(0, until).toString(); - if (hexLen.length === 0) break; - const len = parseInt(hexLen, 16); - actualContent.push( - content.subarray(until + crlfB.length, until + crlfB.length + len) - ); - content = content.subarray(until + crlfB.length + len + crlfB.length); - } - - return Buffer.concat(actualContent); -} diff --git a/warcificator/Cargo.lock b/warcificator/Cargo.lock deleted file mode 100644 index 4aaf9e8..0000000 --- a/warcificator/Cargo.lock +++ /dev/null @@ -1,1373 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "adler32" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - -[[package]] -name = "async-channel" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ca33f4bc4ed1babef42cad36cc1f51fa88be00420404e5b1e80ab1b18f7678c" -dependencies = [ - "concurrent-queue", - "event-listener", - "event-listener-strategy", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-compression" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5" -dependencies = [ - "brotli", - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "base64" -version = "0.21.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "brotli" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bumpalo" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" - -[[package]] -name = "bytes" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" - -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "libc", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-targets 0.48.5", -] - -[[package]] -name = "concurrent-queue" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "encoding_rs" -version = "0.8.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "event-listener" -version = "4.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84f2cdcf274580f2d63697192d744727b3198894b1bf02923643bf59e2c26712" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3" -dependencies = [ - "event-listener", - "pin-project-lite", -] - -[[package]] -name = "flate2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-core", - "futures-task", - "pin-project-lite", - "pin-utils", -] - -[[package]] -name = "getrandom" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "h2" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6250322ef6e60f93f9a2162799302cd6f68f79f6e5d85c8c16f14d1d958178" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" - -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - -[[package]] -name = "http" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "hyper" -version = "0.14.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-rustls" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" -dependencies = [ - "futures-util", - "http", - "hyper", - "rustls", - "tokio", - "tokio-rustls", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "ipnet" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "itoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" - -[[package]] -name = "js-sys" -version = "0.3.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lexical-core" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" -dependencies = [ - "arrayvec", - "bitflags", - "cfg-if", - "ryu", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.151" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" - -[[package]] -name = "libflate" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ff4ae71b685bbad2f2f391fe74f6b7659a34871c08b210fdc039e43bee07d18" -dependencies = [ - "adler32", - "crc32fast", - "libflate_lz77", -] - -[[package]] -name = "libflate_lz77" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a52d3a8bfc85f250440e4424db7d857e241a3aebbbe301f3eb606ab15c39acbf" -dependencies = [ - "rle-decode-fast", -] - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" - -[[package]] -name = "memchr" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "miniz_oxide" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" -dependencies = [ - "libc", - "wasi", - "windows-sys", -] - -[[package]] -name = "nom" -version = "5.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" -dependencies = [ - "lexical-core", - "memchr", - "version_check", -] - -[[package]] -name = "num-traits" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "parking" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "proc-macro2" -version = "1.0.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags", -] - -[[package]] -name = "reqwest" -version = "0.11.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" -dependencies = [ - "async-compression", - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "system-configuration", - "tokio", - "tokio-rustls", - "tokio-util", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "webpki-roots", - "winreg", -] - -[[package]] -name = "ring" -version = "0.17.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" -dependencies = [ - "cc", - "getrandom", - "libc", - "spin", - "untrusted", - "windows-sys", -] - -[[package]] -name = "rle-decode-fast" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustls" -version = "0.21.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" -dependencies = [ - "log", - "ring", - "rustls-webpki", - "sct", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64", -] - -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "ryu" -version = "1.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - -[[package]] -name = "serde" -version = "1.0.193" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.193" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" - -[[package]] -name = "socket2" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.35.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "windows-sys", -] - -[[package]] -name = "tokio-macros" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", -] - -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" - -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "pin-project-lite", - "tracing-core", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", -] - -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - -[[package]] -name = "unicode-bidi" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "url" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - -[[package]] -name = "warc" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e44585619c04f9a4bcfdfbbbefff9e7c4e277a693424162b30c88aaf3abd785e" -dependencies = [ - "chrono", - "libflate", - "nom", - "url", - "uuid", -] - -[[package]] -name = "warcificator" -version = "0.1.0" -dependencies = [ - "async-channel", - "http", - "reqwest", - "tokio", - "warc", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" - -[[package]] -name = "web-sys" -version = "0.3.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "webpki-roots" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.0", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" -dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" - -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys", -] diff --git a/warcificator/Cargo.toml b/warcificator/Cargo.toml deleted file mode 100644 index 723f0dc..0000000 --- a/warcificator/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "warcificator" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -async-channel = "2.1.1" -http = "0.2.11" -reqwest = { version = "0.11.23", default-features = false, features = [ - "rustls-tls", - "gzip", - "brotli", -] } -tokio = { version = "1.35.1", features = ["full"] } -warc = "0.3.1" diff --git a/warcificator/src/main.rs b/warcificator/src/main.rs deleted file mode 100644 index 8743907..0000000 --- a/warcificator/src/main.rs +++ /dev/null @@ -1,199 +0,0 @@ -use async_channel::{Receiver, Sender}; -use std::{ - env::args, - fs, - net::SocketAddr, - process::{Command, Stdio}, -}; -use tokio::io::{stderr, AsyncWriteExt}; -use warc::{RecordBuilder, WarcHeader, WarcWriter}; - -struct FullExchange { - socket_addr: Option, - request: http::Request<&'static str>, - response: http::Response>, -} - -#[tokio::main] -async fn main() { - let mut args = args().skip(1); - let links_list_path = args.next().unwrap(); - let output_zstd_path = args.next().unwrap(); - let links_str = fs::read_to_string(links_list_path).unwrap(); - let links = links_str - .split("\n") - .map(|s| s.trim()) - .filter(|s| s.len() > 0) - .map(|s| s.to_owned()) - .collect::>(); - - let handle = { - let (sender, receiver) = async_channel::bounded::(1); - let (res_sender, res_receiver) = async_channel::unbounded::(); - - let mut handles = Vec::new(); - for _ in 1..16 { - let rx = receiver.clone(); - let tx = res_sender.clone(); - handles.push(tokio::spawn(worker(rx, tx))); - } - - let warc_writer_handle = tokio::spawn(warc_writer(res_receiver, output_zstd_path)); - - for link in links { - sender.send_blocking(link).unwrap(); - } - sender.close(); - - for handle in handles { - handle.await.unwrap(); - } - - warc_writer_handle - }; - handle.await.unwrap(); -} - -async fn worker(rx: Receiver, tx: Sender) { - let client = reqwest::ClientBuilder::default().build().unwrap(); - while let Ok(url) = rx.recv().await { - let res = fetch(&client, url.clone()).await; - match res { - Ok(ex) => { - tx.send(ex).await.unwrap(); - } - Err(err) => { - stderr() - .write_all(format!("Failed to fetch {}: {:#?}", url.as_str(), err).as_bytes()) - .await - .unwrap(); - } - } - } -} - -async fn fetch(client: &reqwest::Client, url: String) -> Result { - let request = client.get(url).build().unwrap(); - let mut http_request_builder = http::Request::builder() - .method(request.method()) - .uri(request.url().as_str()); - for (key, val) in request.headers() { - http_request_builder = http_request_builder.header(key, val); - } - let response = client.execute(request).await?; - - let ip_address = response.remote_addr(); - - let http_request = { - http_request_builder - .version(response.version()) - .body("") - .unwrap() - }; - - let http_response = { - let mut http_response_builder = http::Response::<()>::builder() - .status(response.status()) - .version(response.version()); - for (key, val) in response.headers() { - http_response_builder = http_response_builder.header(key, val); - } - let body = response.bytes().await?; - http_response_builder.body(body.to_vec()).unwrap() - }; - - Ok(FullExchange { - socket_addr: ip_address, - request: http_request, - response: http_response, - }) -} - -async fn warc_writer(rx: Receiver, output_zstd_path: String) { - let zstd_proc = Command::new("zstd") - .args(&["-T0", "-15", "--long", "-o", &output_zstd_path]) - .stdin(Stdio::piped()) - .stderr(Stdio::null()) - .stdout(Stdio::null()) - .spawn() - .unwrap(); - - let mut writer = WarcWriter::new(zstd_proc.stdin.unwrap()); - writer - .write( - &RecordBuilder::default() - .version("1.0".to_owned()) - .warc_type(warc::RecordType::WarcInfo) - .header(WarcHeader::ContentType, "application/warc-fields") - .body(format!("software: preciazo-warcificator/0.0.0\nformat: WARC file version 1.0\nconformsTo: http://www.archive.org/documents/WarcFileFormat-1.0.html").into()) - .build() - .unwrap(), - ) - .unwrap(); - while let Ok(res) = rx.recv().await { - let uri = res.request.uri().to_string(); - let req_record = { - let mut builder = RecordBuilder::default() - .version("1.0".to_owned()) - .warc_type(warc::RecordType::Request) - .header(WarcHeader::TargetURI, uri.clone()) - .header(WarcHeader::ContentType, "application/http;msgtype=request") - .header( - WarcHeader::Unknown("X-Warcificator-Lying".to_string()), - "the request contains other headers not included here", - ); - if let Some(addr) = res.socket_addr { - builder = builder.header(WarcHeader::IPAddress, addr.ip().to_string()); - } - builder - .body(format_http11_request(res.request).into_bytes()) - .build() - .unwrap() - }; - writer.write(&req_record).unwrap(); - writer - .write(&{ - let mut builder = RecordBuilder::default() - .version("1.0".to_owned()) - .warc_type(warc::RecordType::Response) - .header(WarcHeader::TargetURI, uri) - .header(WarcHeader::ConcurrentTo, req_record.warc_id()) - .header(WarcHeader::ContentType, "application/http;msgtype=response"); - if let Some(addr) = res.socket_addr { - builder = builder.header(WarcHeader::IPAddress, addr.ip().to_string()); - } - builder - .body(format_http11_response(res.response)) - .build() - .unwrap() - }) - .unwrap(); - } -} - -fn format_http11_request(req: http::Request<&'static str>) -> String { - let start_line = format!("{} {} HTTP/1.1", req.method().as_str(), req.uri().path()); - let headers_str = req - .headers() - .iter() - .map(|(key, val)| format!("{}: {}\r\n", key, val.to_str().unwrap())) - .collect::(); - - [start_line.as_str(), headers_str.as_str(), req.body()].join("\r\n") -} - -fn format_http11_response(res: http::Response>) -> Vec { - let start_line = format!( - "HTTP/1.1 {} {}", - res.status().as_str(), - res.status().canonical_reason().unwrap_or("") - ); - let headers_str = res - .headers() - .iter() - .map(|(key, val)| format!("{}: {}\r\n", key, val.to_str().unwrap())) - .collect::(); - - let crlf: &[u8] = &[13, 10]; - [start_line.as_bytes(), headers_str.as_bytes(), res.body()].join(crlf) -}