Compare commits

..

No commits in common. "8ccd69de1bb46ec014137b623b7f2458b581134a" and "b66cb6782cd8e3c0998f42172f1c05ee71952346" have entirely different histories.

14 changed files with 2237 additions and 349 deletions

File diff suppressed because it is too large Load diff

View file

@ -11,12 +11,12 @@
"author": "",
"license": "ISC",
"dependencies": {
"drizzle-orm": "^0.29.1"
},
"devDependencies": {
"better-sqlite3": "^9.2.2",
"drizzle-kit": "^0.20.7",
"tsx": "^4.7.0",
"drizzle-orm": "^0.29.1",
"tsx": "^4.7.0"
},
"devDependencies": {
"@types/better-sqlite3": "^7.6.8"
}
}

View file

@ -103,13 +103,16 @@ importers:
dayjs:
specifier: ^1.11.10
version: 1.11.10
db-datos:
specifier: workspace:^
version: link:../db-datos
drizzle-orm:
specifier: ^0.29.1
version: 0.29.1(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
devDependencies:
'@sveltejs/adapter-node':
specifier: ^2.0.2
version: 2.0.2(@sveltejs/kit@2.0.6)
'@sveltejs/adapter-auto':
specifier: ^3.0.0
version: 3.0.1(@sveltejs/kit@2.0.6)
'@sveltejs/kit':
specifier: ^2.0.0
version: 2.0.6(@sveltejs/vite-plugin-svelte@3.0.1)(svelte@4.2.8)(vite@5.0.10)
@ -122,9 +125,6 @@ importers:
autoprefixer:
specifier: ^10.4.16
version: 10.4.16(postcss@8.4.32)
db-datos:
specifier: workspace:^
version: link:../db-datos
postcss:
specifier: ^8.4.32
version: 8.4.32
@ -938,70 +938,6 @@ packages:
resolution: {integrity: sha512-2LuNTFBIO0m7kKIQvvPHN6UE63VjpmL9rnEEaOOaiSPbZK+zUOYIzBAWcED+3XYzhYsd/0mD57VdxAEqqV52CQ==}
dev: true
/@rollup/plugin-commonjs@25.0.7(rollup@4.9.1):
resolution: {integrity: sha512-nEvcR+LRjEjsaSsc4x3XZfCCvZIaSMenZu/OiwOKGN2UhQpAYI7ru7czFvyWbErlpoGjnSX3D5Ch5FcMA3kRWQ==}
engines: {node: '>=14.0.0'}
peerDependencies:
rollup: ^2.68.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
dependencies:
'@rollup/pluginutils': 5.1.0(rollup@4.9.1)
commondir: 1.0.1
estree-walker: 2.0.2
glob: 8.1.0
is-reference: 1.2.1
magic-string: 0.30.5
rollup: 4.9.1
dev: true
/@rollup/plugin-json@6.1.0(rollup@4.9.1):
resolution: {integrity: sha512-EGI2te5ENk1coGeADSIwZ7G2Q8CJS2sF120T7jLw4xFw9n7wIOXHo+kIYRAoVpJAN+kmqZSoO3Fp4JtoNF4ReA==}
engines: {node: '>=14.0.0'}
peerDependencies:
rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
dependencies:
'@rollup/pluginutils': 5.1.0(rollup@4.9.1)
rollup: 4.9.1
dev: true
/@rollup/plugin-node-resolve@15.2.3(rollup@4.9.1):
resolution: {integrity: sha512-j/lym8nf5E21LwBT4Df1VD6hRO2L2iwUeUmP7litikRsVp1H6NWx20NEp0Y7su+7XGc476GnXXc4kFeZNGmaSQ==}
engines: {node: '>=14.0.0'}
peerDependencies:
rollup: ^2.78.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
dependencies:
'@rollup/pluginutils': 5.1.0(rollup@4.9.1)
'@types/resolve': 1.20.2
deepmerge: 4.3.1
is-builtin-module: 3.2.1
is-module: 1.0.0
resolve: 1.22.8
rollup: 4.9.1
dev: true
/@rollup/pluginutils@5.1.0(rollup@4.9.1):
resolution: {integrity: sha512-XTIWOPPcpvyKI6L1NHo0lFlCyznUEyPmPY1mc3KpPVDYulHSTvyeLNVW00QTLIAFNhR3kYnJTQHeGqU4M3n09g==}
engines: {node: '>=14.0.0'}
peerDependencies:
rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
dependencies:
'@types/estree': 1.0.5
estree-walker: 2.0.2
picomatch: 2.3.1
rollup: 4.9.1
dev: true
/@rollup/rollup-android-arm-eabi@4.9.1:
resolution: {integrity: sha512-6vMdBZqtq1dVQ4CWdhFwhKZL6E4L1dV6jUjuBvsavvNJSppzi6dLBbuV+3+IyUREaj9ZFvQefnQm28v4OCXlig==}
cpu: [arm]
@ -1106,16 +1042,13 @@ packages:
dev: true
optional: true
/@sveltejs/adapter-node@2.0.2(@sveltejs/kit@2.0.6):
resolution: {integrity: sha512-iboANjLIB7Af74+og5IEQVSPsIfaO+o9zldU/7ljCGCrBj0t2gQlINtuwUhjvwhLgw9vHgICYWWZFxLM2C0zrg==}
/@sveltejs/adapter-auto@3.0.1(@sveltejs/kit@2.0.6):
resolution: {integrity: sha512-OpilmvRN136lUgOa9F0zpSI6g+PouOmk+YvJQrB+/hAtllLghjjYuoyfUsrF7U6oJ52cxCtAJTPXgZdyyCffrQ==}
peerDependencies:
'@sveltejs/kit': ^2.0.0
dependencies:
'@rollup/plugin-commonjs': 25.0.7(rollup@4.9.1)
'@rollup/plugin-json': 6.1.0(rollup@4.9.1)
'@rollup/plugin-node-resolve': 15.2.3(rollup@4.9.1)
'@sveltejs/kit': 2.0.6(@sveltejs/vite-plugin-svelte@3.0.1)(svelte@4.2.8)(vite@5.0.10)
rollup: 4.9.1
import-meta-resolve: 4.0.0
dev: true
/@sveltejs/kit@2.0.6(@sveltejs/vite-plugin-svelte@3.0.1)(svelte@4.2.8)(vite@5.0.10):
@ -1214,10 +1147,6 @@ packages:
resolution: {integrity: sha512-Sk/uYFOBAB7mb74XcpizmH0KOR2Pv3D2Hmrh1Dmy5BmK3MpdSa5kqZcg6EKBdklU0bFXX9gCfzvpnyUehrPIuA==}
dev: true
/@types/resolve@1.20.2:
resolution: {integrity: sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==}
dev: true
/acorn-walk@8.3.1:
resolution: {integrity: sha512-TgUZgYvqZprrl7YldZNoa9OciCAyZR+Ejm9eXzKCmjsF5IKp/wgQ7Z/ZpjpGTIUPwrHQIcYeI8qDh4PsEwxMbw==}
engines: {node: '>=0.4.0'}
@ -1389,11 +1318,6 @@ packages:
ieee754: 1.2.1
dev: false
/builtin-modules@3.3.0:
resolution: {integrity: sha512-zhaCDicdLuWN5UbN5IMnFqNMhNfo919sH85y2/ea+5Yg9TsTkeZxpL+JLbp6cgYFS4sRLp3YV4S6yDuqVWHYOw==}
engines: {node: '>=6'}
dev: true
/bun-types@1.0.18:
resolution: {integrity: sha512-1XZ7AxOF8oO8FZtw1xj006JAKxEjulK3dUhsktZVN95vXBlsf4NIjQxfistVdpt24v3H2I9BwHp+UU+gXSSpAw==}
dev: true
@ -1516,10 +1440,6 @@ packages:
engines: {node: ^12.20.0 || >=14}
dev: false
/commondir@1.0.1:
resolution: {integrity: sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg==}
dev: true
/concat-map@0.0.1:
resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
dev: true
@ -1972,10 +1892,6 @@ packages:
resolution: {integrity: sha512-SqmZANLWS0mnatqbSfRP5g8OXZC12Fgg1IwNtLsyHDzJizORW4khDfjPqJZsemPWBB2uqykUah5YpQ6epsqC/w==}
dev: false
/estree-walker@2.0.2:
resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==}
dev: true
/estree-walker@3.0.3:
resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}
dependencies:
@ -2137,6 +2053,7 @@ packages:
inherits: 2.0.4
minimatch: 5.1.6
once: 1.4.0
dev: false
/globalyzer@0.1.0:
resolution: {integrity: sha512-40oNTM9UfG6aBmuKxk/giHn5nQ8RVz/SS4Ir6zgzOv9/qC3kKZ9v4etGTcJbEl/NyVQH7FGU7d+X1egr57Md2Q==}
@ -2197,6 +2114,10 @@ packages:
resolve-from: 4.0.0
dev: true
/import-meta-resolve@4.0.0:
resolution: {integrity: sha512-okYUR7ZQPH+efeuMJGlq4f8ubUgO50kByRPyt/Cy1Io4PSRsPjxME+YlVaCOx+NIToW7hCsZNFJyTPFFKepRSA==}
dev: true
/inflight@1.0.6:
resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==}
dependencies:
@ -2216,13 +2137,6 @@ packages:
dependencies:
binary-extensions: 2.2.0
/is-builtin-module@3.2.1:
resolution: {integrity: sha512-BSLE3HnV2syZ0FK0iMA/yUGplUeMmNz4AW5fnTunbCIqZi4vG3WjJT9FHMy5D69xmAYBHXQhJdALdpwVxV501A==}
engines: {node: '>=6'}
dependencies:
builtin-modules: 3.3.0
dev: true
/is-core-module@2.13.1:
resolution: {integrity: sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==}
dependencies:
@ -2243,10 +2157,6 @@ packages:
dependencies:
is-extglob: 2.1.1
/is-module@1.0.0:
resolution: {integrity: sha512-51ypPSPCoTEIN9dy5Oy+h4pShgJmPCygKfyRCISBI+JoWT/2oJvK8QPxmwv7b/p239jXrm9M1mlQbyKJ5A152g==}
dev: true
/is-number@7.0.0:
resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
engines: {node: '>=0.12.0'}
@ -2255,12 +2165,6 @@ packages:
resolution: {integrity: sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ==}
dev: false
/is-reference@1.2.1:
resolution: {integrity: sha512-U82MsXXiFIrjCK4otLT+o2NA2Cd2g5MLoOVXUZjIOhLurrRxpEXzI8O0KZHr3IjLvlAH1kTPYSuqer5T9ZVBKQ==}
dependencies:
'@types/estree': 1.0.5
dev: true
/is-reference@3.0.2:
resolution: {integrity: sha512-v3rht/LgVcsdZa3O2Nqs+NMowLOxeOm7Ay9+/ARQ2F+qEoANRcqrjAZKGN0v8ymUetZGgkp26LTnGT7H0Qo9Pg==}
dependencies:
@ -2452,6 +2356,7 @@ packages:
engines: {node: '>=10'}
dependencies:
brace-expansion: 2.0.1
dev: false
/minimatch@7.4.6:
resolution: {integrity: sha512-sBz8G/YjVniEz6lKPNpKxXwazJe4c19fEfV2GDMX6AjFz+MX9uDWIZW8XreVhkFW3fkIdTv/gxWr/Kks5FFAVw==}

1150
scraper/pnpm-lock.yaml Normal file

File diff suppressed because it is too large Load diff

View file

@ -11,7 +11,6 @@ import { getDiaProduct } from "./dia.js";
import { getCotoProduct } from "./coto.js";
import { join } from "path";
import pMap from "p-map";
import { and, eq, sql } from "drizzle-orm";
const DEBUG = false;
const PARSER_VERSION = 1;
@ -23,17 +22,6 @@ sqlite.run(`
pragma journal_mode = WAL;
PRAGMA synchronous = NORMAL;
`);
const getPrevPrecio = db
.select({ id: schema.precios.id })
.from(schema.precios)
.where(
and(
eq(schema.precios.warcRecordId, sql.placeholder("warcRecordId")),
eq(schema.precios.parserVersion, PARSER_VERSION)
)
)
.limit(1)
.prepare();
let progress = { done: 0, errors: 0 };
await pMap(process.argv.slice(2), (path) => parseWarc(path), {
@ -61,15 +49,6 @@ async function parseWarc(path: string) {
for await (const record of parser) {
if (record.warcType === "response") {
if (!record.warcTargetURI) continue;
const warcRecordId = record.warcHeader("WARC-Record-ID");
if (!warcRecordId) throw new Error("No tiene WARC-Record-ID");
if (getPrevPrecio.get({ warcRecordId })) {
console.debug(`skipped ${warcRecordId}`);
continue;
}
// TODO: sobreescribir si existe el mismo record-id pero con version mas bajo?
const html = await record.contentText();
const url = new URL(record.warcTargetURI);
@ -87,7 +66,7 @@ async function parseWarc(path: string) {
...ish,
fetchedAt: new Date(record.warcDate!),
url: record.warcTargetURI,
warcRecordId,
warcRecordId: record.warcHeader("WARC-Record-ID"),
parserVersion: PARSER_VERSION,
};

View file

@ -12,30 +12,22 @@
"format": "prettier --write ."
},
"devDependencies": {
"@sveltejs/adapter-node": "^2.0.2",
"@sveltejs/adapter-auto": "^3.0.0",
"@sveltejs/kit": "^2.0.0",
"@sveltejs/vite-plugin-svelte": "^3.0.0",
"@types/better-sqlite3": "^7.6.8",
"autoprefixer": "^10.4.16",
"db-datos": "workspace:^",
"postcss": "^8.4.32",
"postcss-load-config": "^5.0.2",
"prettier": "^3.1.1",
"prettier-plugin-svelte": "^3.1.2",
"prettier-plugin-tailwindcss": "^0.5.9",
"svelte": "^4.2.7",
"svelte-adapter-bun": "^0.5.1",
"svelte-check": "^3.6.0",
"tailwindcss": "^3.3.6",
"tslib": "^2.4.1",
"typescript": "^5.0.0",
"vite": "^5.0.3"
},
"type": "module",
"dependencies": {
"better-sqlite3": "^9.2.2",
"chart.js": "^4.4.1",
"chartjs-adapter-dayjs-4": "^1.0.4",
"dayjs": "^1.11.10",
"drizzle-orm": "^0.29.1"
}
"type": "module"
}

View file

@ -1,9 +0,0 @@
import Database from "better-sqlite3";
import { drizzle } from "drizzle-orm/better-sqlite3";
import * as schema from "db-datos/schema.js";
import { env } from "$env/dynamic/private";
const sqlite = new Database(env.DB_PATH ?? "../scraper/sqlite.db");
export const db = drizzle(sqlite, { schema });
export * as schema from "db-datos/schema.js";

View file

@ -1,18 +0,0 @@
import { error } from "@sveltejs/kit";
import type { PageServerLoad } from "./$types";
import { db, schema } from "$lib/server/db";
import { ilike, like, sql } from "drizzle-orm";
export const load: PageServerLoad = async ({ params }) => {
const q = db
.select({ ean: schema.precios.ean })
.from(schema.precios)
.where(
like(schema.precios.url, `https://diaonline.supermercadosdia.com.ar%`),
)
.groupBy(schema.precios.ean)
.orderBy(sql`random()`)
.limit(150);
const precios = await q;
return { precios };
};

View file

@ -1,17 +1,4 @@
<script lang="ts">
import type { PageData } from "./$types";
export let data: PageData;
</script>
<h1 class="text-xl">WIP</h1>
<ul>
{#each data.precios as product}
<li>
<a href={`/ean/${product.ean}`}>
{product.ean}
</a>
</li>
{/each}
</ul>
<h1>Welcome to SvelteKit</h1>
<p>
Visit <a href="https://kit.svelte.dev">kit.svelte.dev</a> to read the documentation
</p>

View file

@ -1,13 +0,0 @@
import { error } from "@sveltejs/kit";
import { eq } from "drizzle-orm";
import type { PageServerLoad } from "./$types";
import { db, schema } from "$lib/server/db";
export const load: PageServerLoad = async ({ params }) => {
const precios = await db.query.precios.findMany({
where: eq(schema.precios.ean, params.ean),
});
if (precios.length === 0) return error(404, "Not Found");
return { precios };
};

View file

@ -1,23 +0,0 @@
<script lang="ts">
import type { PageData } from "./$types";
import Chart from "./Chart.svelte";
export let data: PageData;
</script>
<ul>
{#each data.precios as precio}
<li>
{precio.url}
:
{#if precio.precioCentavos}
{precio.precioCentavos / 100}
{:else}
{precio.inStock}
{/if}
({precio.fetchedAt})
</li>
{/each}
</ul>
<Chart precios={data.precios} />

View file

@ -1,72 +0,0 @@
<script lang="ts">
import type { Precio } from "db-datos/schema";
// import dayjs from "dayjs";
import ChartJs from "./ChartJs.svelte";
export let precios: Precio[];
enum Supermercado {
Dia = "Dia",
Carrefour = "Carrefour",
Coto = "Coto",
}
const hosts: { [host: string]: Supermercado } = {
"diaonline.supermercadosdia.com.ar": Supermercado.Dia,
"www.carrefour.com.ar": Supermercado.Carrefour,
"www.cotodigital3.com.ar": Supermercado.Coto,
};
const colorBySupermercado: { [supermercado in Supermercado]: string } = {
[Supermercado.Dia]: "#d52b1e",
[Supermercado.Carrefour]: "#19549d",
[Supermercado.Coto]: "#e20025",
};
$: datasets = precios
.map((p) => new URL(p.url!).hostname)
.filter(onlyUnique)
.map((host) => {
const supermercado = hosts[host];
const ps = precios
.filter((p) => new URL(p.url!).hostname === host)
.filter(
(p): p is Precio & { precioCentavos: number } =>
p.precioCentavos !== null,
);
return {
label: supermercado,
data: [
...ps.map((p) => ({
x: p.fetchedAt,
y: p.precioCentavos / 100,
})),
// lie
// ...ps.map((p) => ({
// x: dayjs(p.fetchedAt).add(14, "day").toDate(),
// y: p.precioCentavos / 100 + 100,
// })),
],
fill: false,
borderColor: colorBySupermercado[supermercado],
tension: 0.1,
};
});
function onlyUnique(value: any, index: any, self: string | any[]) {
return self.indexOf(value) === index;
}
</script>
<div class="h-[300px] w-full min-w-[500px]">
<ChartJs
type="line"
data={{ datasets }}
options={{
responsive: true,
scales: {
x: { type: "time" },
},
}}
/>
</div>

View file

@ -1,48 +0,0 @@
<script lang="ts">
import {
Chart,
LineController,
type ChartData,
type ChartOptions,
type ChartType,
type Point,
CategoryScale,
LinearScale,
PointElement,
LineElement,
Filler,
TimeScale,
Tooltip,
Legend,
} from "chart.js";
import "chartjs-adapter-dayjs-4/dist/chartjs-adapter-dayjs-4.esm";
import { onMount } from "svelte";
Chart.register(
LineController,
LineElement,
CategoryScale,
LinearScale,
TimeScale,
PointElement,
Filler,
Tooltip,
Legend,
);
export let type: ChartType;
export let data: ChartData<typeof type, { x: Date; y: number }[]>;
export let options: ChartOptions<typeof type> = {};
let canvasEl: HTMLCanvasElement;
onMount(() => {
const chart = new Chart(canvasEl, {
type,
data,
options,
});
return () => chart.destroy();
});
</script>
<canvas bind:this={canvasEl} />

View file

@ -1,12 +1,17 @@
import adapter from "@sveltejs/adapter-node";
// import adapter from "svelte-adapter-bun";
// import adapter from "@sveltejs/adapter-auto";
import adapter from "svelte-adapter-bun";
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
/** @type {import('@sveltejs/kit').Config} */
const config = {
// Consult https://kit.svelte.dev/docs/integrations#preprocessors
// for more information about preprocessors
preprocess: [vitePreprocess({})],
kit: {
// adapter-auto only supports some environments, see https://kit.svelte.dev/docs/adapter-auto for a list.
// If your environment is not supported or you settled on a specific environment, switch out the adapter.
// See https://kit.svelte.dev/docs/adapters for more information about adapters.
adapter: adapter(),
},
};