mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-26 11:36:20 +00:00
no crear db en scraper
This commit is contained in:
parent
e65328d025
commit
ea4026855e
1 changed files with 5 additions and 16 deletions
|
@ -1,11 +1,9 @@
|
||||||
/// <reference lib="dom" />
|
/// <reference lib="dom" />
|
||||||
/// <reference lib="dom.iterable" />
|
/// <reference lib="dom.iterable" />
|
||||||
/// <reference types="node" />
|
|
||||||
import { Database } from "bun:sqlite";
|
import { Database } from "bun:sqlite";
|
||||||
import { drizzle } from "drizzle-orm/bun-sqlite";
|
import { drizzle } from "drizzle-orm/bun-sqlite";
|
||||||
import { precios } from "db-datos/schema.js";
|
import * as schema from "db-datos/schema.js";
|
||||||
import { WARCParser } from "warcio";
|
import { WARCParser } from "warcio";
|
||||||
import { createReadStream, createWriteStream } from "fs";
|
|
||||||
import { writeFile } from "fs/promises";
|
import { writeFile } from "fs/promises";
|
||||||
import { createHash } from "crypto";
|
import { createHash } from "crypto";
|
||||||
import { getCarrefourProduct } from "./carrefour.js";
|
import { getCarrefourProduct } from "./carrefour.js";
|
||||||
|
@ -15,35 +13,26 @@ import { join } from "path";
|
||||||
import pMap from "p-map";
|
import pMap from "p-map";
|
||||||
|
|
||||||
const DEBUG = false;
|
const DEBUG = false;
|
||||||
|
const PARSER_VERSION = 1;
|
||||||
|
|
||||||
const sqlite = new Database("sqlite.db");
|
const sqlite = new Database("sqlite.db");
|
||||||
const db = drizzle(sqlite);
|
const db = drizzle(sqlite, { schema });
|
||||||
|
|
||||||
sqlite.run(`
|
sqlite.run(`
|
||||||
pragma journal_mode = WAL;
|
pragma journal_mode = WAL;
|
||||||
PRAGMA synchronous = NORMAL;
|
PRAGMA synchronous = NORMAL;
|
||||||
`);
|
`);
|
||||||
sqlite.run(`
|
|
||||||
create table if not exists precios(
|
|
||||||
id integer primary key autoincrement,
|
|
||||||
ean text not null,
|
|
||||||
fetched_at text not null,
|
|
||||||
precio_centavos integer,
|
|
||||||
in_stock integer,
|
|
||||||
url text
|
|
||||||
);
|
|
||||||
`);
|
|
||||||
|
|
||||||
let progress = { done: 0, errors: 0 };
|
let progress = { done: 0, errors: 0 };
|
||||||
await pMap(process.argv.slice(2), (path) => parseWarc(path), {
|
await pMap(process.argv.slice(2), (path) => parseWarc(path), {
|
||||||
concurrency: 40,
|
concurrency: 40,
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Precio = typeof precios.$inferInsert;
|
export type Precio = typeof schema.precios.$inferInsert;
|
||||||
export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">;
|
export type Precioish = Omit<Precio, "fetchedAt" | "url" | "id">;
|
||||||
|
|
||||||
async function storePrecioPoint(point: Precio) {
|
async function storePrecioPoint(point: Precio) {
|
||||||
await db.insert(precios).values(point);
|
await db.insert(schema.precios).values(point);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function parseWarc(path: string) {
|
async function parseWarc(path: string) {
|
||||||
|
|
Loading…
Reference in a new issue