Compare commits

..

4 commits

Author SHA1 Message Date
9cb7c0e27e rm coto-crawlee 2024-11-13 09:49:44 -03:00
9a82a556f9 WIP: coto-crawlee 2024-11-13 09:49:35 -03:00
258346e3d6 WIP: metadata scrapped 2024-11-13 09:48:37 -03:00
544b0471b9 usar maplibre 2024-11-13 09:19:27 -03:00
9 changed files with 374 additions and 79 deletions

View file

@ -266,6 +266,44 @@ async fn search(State(pool): State<SqlitePool>, Path(query): Path<String>) -> im
Json(results) Json(results)
} }
#[derive(sqlx::FromRow, Debug, Serialize)]
struct Metadata {
ean: String,
fetched_at: chrono::DateTime<Utc>,
precio_centavos: Option<i64>,
in_stock: Option<bool>,
url: String,
name: Option<String>,
image_url: Option<String>,
}
async fn dump_latest_metadata(State(pool): State<SqlitePool>) -> impl IntoResponse {
let precios = sqlx::query!("
SELECT p.id, p.ean, p.name, p.image_url, p.url, p.precio_centavos, p.in_stock, p.fetched_at
FROM precios p
INNER JOIN (
SELECT ean, MAX(fetched_at) as max_fetched_at
FROM precios
GROUP BY ean
) latest ON p.ean = latest.ean AND p.fetched_at = latest.max_fetched_at
WHERE p.name IS NOT NULL")
.fetch_all(&pool)
.await
.unwrap()
.into_iter()
.map(|r| Metadata {
ean: r.ean,
fetched_at: DateTime::from_timestamp(r.fetched_at, 0).unwrap(),
image_url: r.image_url,
name: r.name,
in_stock: r.in_stock.map(|x| x == 1),
precio_centavos: r.precio_centavos,
url: r.url,
})
.collect_vec();
Json(precios)
}
async fn get_info(State(pool): State<SqlitePool>) -> impl IntoResponse { async fn get_info(State(pool): State<SqlitePool>) -> impl IntoResponse {
#[derive(Serialize)] #[derive(Serialize)]
struct Info { struct Info {
@ -321,6 +359,7 @@ async fn main() {
.route("/api/0/ean/:ean/history", get(get_product_history)) .route("/api/0/ean/:ean/history", get(get_product_history))
.route("/api/0/info", get(get_info)) .route("/api/0/info", get(get_info))
.route("/api/0/search/:query", get(search)) .route("/api/0/search/:query", get(search))
.route("/api/0/internal/latest-metadata", get(dump_latest_metadata))
.with_state(pool); .with_state(pool);
let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap(); let listener = tokio::net::TcpListener::bind("0.0.0.0:8000").await.unwrap();

Binary file not shown.

View file

@ -11,6 +11,8 @@ import {
index, index,
pgMaterializedView, pgMaterializedView,
pgView, pgView,
timestamp,
boolean,
} from "drizzle-orm/pg-core"; } from "drizzle-orm/pg-core";
export const datasets = pgTable( export const datasets = pgTable(
@ -211,3 +213,22 @@ export const productos_descripcion_index = pgTable(
).using("gin", sql`to_tsvector('spanish', ${table.productos_descripcion})`), ).using("gin", sql`to_tsvector('spanish', ${table.productos_descripcion})`),
}) })
); );
// véase scripts/refresh-scrapped-metadata.ts
export const productos_metadata_scrapped = pgTable(
"productos_metadata_scrapped",
{
ean: bigint("ean", { mode: "bigint" }),
fetchedAt: timestamp("fetched_at").notNull(),
precioCentavos: integer("precio_centavos"),
inStock: boolean("in_stock"),
url: text("url").notNull(),
name: text("name"),
imageUrl: text("image_url"),
},
(table) => ({
productos_metadata_scrapped_ean_idx: index(
"productos_metadata_scrapped_ean_idx"
).on(table.ean),
})
);

View file

@ -3,7 +3,8 @@
"private": true, "private": true,
"type": "module", "type": "module",
"devDependencies": { "devDependencies": {
"@types/bun": "^1.1.7", "@types/bun": "^1.1.11",
"bun-types": "^1.1.30",
"@types/papaparse": "^5.3.14", "@types/papaparse": "^5.3.14",
"drizzle-kit": "^0.24.2" "drizzle-kit": "^0.24.2"
}, },

View file

@ -0,0 +1,43 @@
/**
* este script actualiza la base de datos "nueva" a partir de una base de datos
* generada por el scraper "viejo" de preciazo, que scrapea los sitios de los supermercados.
*
* solo guarda los últimos metadatos de cada producto.
*
* se le pasa la base de datos SQLite del scraper como parametro.
*/
import { drizzle } from "drizzle-orm/postgres-js";
import postgres from "postgres";
import * as schema from "../db/schema";
import { Database } from "bun:sqlite";
if (!process.argv[2]) {
console.error("falta pasar la base de datos del scraper como parametro");
process.exit(1);
}
const db = drizzle(postgres(), {
schema,
logger: true,
});
using scraperDb = new Database(process.argv[2], {
strict: true,
readonly: true,
});
const precios = scraperDb.query(`
SELECT p.id, p.ean, p.name, p.image_url, p.url, p.precio_centavos, p.in_stock, p.fetched_at
FROM precios p
INNER JOIN (
SELECT ean, MAX(fetched_at) as max_fetched_at
FROM precios
GROUP BY ean
) latest ON p.ean = latest.ean AND p.fetched_at = latest.max_fetched_at
WHERE p.name IS NOT NULL
`);
// @ts-expect-error bun 1.1.30 has outdated types, it's fixed in main branch
for (const row of precios.iterate()) {
console.log(row);
}

View file

@ -51,12 +51,14 @@
"d3-scale": "^4.0.2", "d3-scale": "^4.0.2",
"date-fns": "^4.1.0", "date-fns": "^4.1.0",
"drizzle-orm": "^0.33.0", "drizzle-orm": "^0.33.0",
"geojson": "^0.5.0",
"layerchart": "^0.44.0", "layerchart": "^0.44.0",
"leaflet": "^1.9.4", "leaflet": "^1.9.4",
"leaflet.markercluster": "^1.5.3", "leaflet.markercluster": "^1.5.3",
"lucide-svelte": "^0.441.0", "lucide-svelte": "^0.441.0",
"maplibre-gl": "^4.7.1", "maplibre-gl": "^4.7.1",
"postgres": "^3.4.4", "postgres": "^3.4.4",
"svelte-maplibre": "^0.9.14",
"tailwind-merge": "^2.5.2", "tailwind-merge": "^2.5.2",
"tailwind-variants": "^0.2.1" "tailwind-variants": "^0.2.1"
} }

View file

@ -16,8 +16,10 @@ const buttonVariants = tv({
size: { size: {
default: 'h-10 px-4 py-2', default: 'h-10 px-4 py-2',
sm: 'h-9 rounded-md px-3', sm: 'h-9 rounded-md px-3',
xs: 'h-8 rounded-md px-2',
lg: 'h-11 rounded-md px-8', lg: 'h-11 rounded-md px-8',
icon: 'h-10 w-10' icon: 'h-10 w-10',
icon_sm: 'h-8 w-8'
} }
}, },
defaultVariants: { defaultVariants: {

View file

@ -29,6 +29,13 @@ export const pesosFormatter = new Intl.NumberFormat('es-AR', {
currency: 'ARS' currency: 'ARS'
}); });
export const dateFormatter = Intl.DateTimeFormat('es-AR', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
weekday: 'long'
});
export function parseMarcas(marcas: readonly string[]) { export function parseMarcas(marcas: readonly string[]) {
const x = marcas const x = marcas
.map((m) => m.trim().replaceAll(/['`´]/g, '')) .map((m) => m.trim().replaceAll(/['`´]/g, ''))

View file

@ -1,15 +1,77 @@
<script lang="ts"> <script lang="ts">
import type { PageData } from './$types'; import type { PageData } from './$types';
import { ArrowLeft } from 'lucide-svelte'; import { ArrowLeft, ArrowRight, MapPin } from 'lucide-svelte';
import Map from '$lib/components/Map.svelte';
import Badge from '$lib/components/ui/badge/badge.svelte'; import Badge from '$lib/components/ui/badge/badge.svelte';
import { goto } from '$app/navigation'; import { goto } from '$app/navigation';
import { generateGoogleMapsLink, pesosFormatter, processBanderaNombre } from '$lib/sepa-utils'; import {
dateFormatter,
generateGoogleMapsLink,
pesosFormatter,
processBanderaNombre
} from '$lib/sepa-utils';
import { page } from '$app/stores'; import { page } from '$app/stores';
import {
DefaultMarker,
MapLibre,
Popup,
GeoJSON,
CircleLayer,
SymbolLayer,
HeatmapLayer
} from 'svelte-maplibre';
import style from '$lib/components/map_style.json';
import type { GeoJSON as GeoJSONType } from 'geojson';
import type { DataDrivenPropertyValueSpecification } from 'maplibre-gl';
import Button from '$lib/components/ui/button/button.svelte';
export let data: PageData; export let data: PageData;
$: id_producto = $page.params.id;
const query = $page.url.searchParams.get('query'); const query = $page.url.searchParams.get('query');
function generateGeoJSON(precios: (typeof data)['precios']): GeoJSONType {
const prices = data.precios.map((p) => p.productos_precio_lista);
const sortedPrices = prices.sort((a, b) => a - b);
const q1Index = Math.floor(sortedPrices.length * 0.1);
const q3Index = Math.floor(sortedPrices.length * 0.9);
const iqr = sortedPrices[q3Index] - sortedPrices[q1Index];
const lowerBound = sortedPrices[q1Index] - 1.5 * iqr;
const upperBound = sortedPrices[q3Index] + 1.5 * iqr;
const filteredPrices = sortedPrices.filter((p) => p >= lowerBound && p <= upperBound);
const min = Math.min(...filteredPrices);
const max = Math.max(...filteredPrices);
return {
type: 'FeatureCollection',
features: data.precios.map((precio) => ({
type: 'Feature',
geometry: {
type: 'Point',
coordinates: [precio.sucursales_longitud, precio.sucursales_latitud]
},
properties: {
id: Math.random(),
id_comercio: precio.id_comercio,
id_sucursal: precio.id_sucursal,
precio: precio.productos_precio_lista,
nombre: precio.sucursales_nombre,
descripcion: precio.productos_descripcion,
direccion: `${precio.sucursales_calle} ${precio.sucursales_numero ?? ''}`,
comercio: processBanderaNombre(precio),
fecha: precio.dataset_date
}
}))
};
}
$: geoJSON = generateGeoJSON(data.precios);
function hoverStateFilter(
offValue: number,
onValue: number
): DataDrivenPropertyValueSpecification<number> {
return ['case', ['boolean', ['feature-state', 'hover'], false], onValue, offValue];
}
</script> </script>
<svelte:head> <svelte:head>
@ -34,84 +96,202 @@
<Badge variant="outline">EAN {data.id_producto}</Badge> <Badge variant="outline">EAN {data.id_producto}</Badge>
</div> </div>
</div> </div>
<MapLibre
<Map style={style as any}
mapMap={(map, L) => { class="relative h-full max-h-full min-h-[50vh] w-full flex-1"
// var markers = L.MarkerClusterGroup(); standardControls
const myRenderer = L.canvas({ padding: 0.5 }); zoom={9}
const prices = data.precios.map((p) => p.productos_precio_lista); center={[-58.381944444444, -34.599722222222]}
const sortedPrices = prices.sort((a, b) => a - b); >
const q1Index = Math.floor(sortedPrices.length * 0.1); <!-- cluster={{
const q3Index = Math.floor(sortedPrices.length * 0.9); radius: 50,
const iqr = sortedPrices[q3Index] - sortedPrices[q1Index]; // maxZoom: 14,
const lowerBound = sortedPrices[q1Index] - 1.5 * iqr; maxZoom: 14,
const upperBound = sortedPrices[q3Index] + 1.5 * iqr; properties: {
const filteredPrices = sortedPrices.filter((p) => p >= lowerBound && p <= upperBound); total_precio: ['+', ['get', 'precio']],
const min = Math.min(...filteredPrices); precio_promedio: [
const max = Math.max(...filteredPrices); 'number',
console.log({ min, max, outliers: prices.length - filteredPrices.length }); ['/', ['+', ['number', ['get', 'precio']]], ['get', 'point_count']]
]
// For each row in data, create a marker and add it to the map
// For each row, columns `Latitude`, `Longitude`, and `Title` are required
for (const precio of data.precios) {
const normalizedPrice = (precio.productos_precio_lista - min) / (max - min);
// Safari doesn't support color-mix, so we'll use a fallback
const color = getSafeColor(normalizedPrice);
const createElement = () => {
const div = document.createElement('div');
[
`fecha del precio: ${precio.dataset_date}`,
`precio: ${pesosFormatter.format(precio.productos_precio_lista)}`,
`comercio: ${processBanderaNombre(precio)} (${precio.comercio_razon_social} CUIT ${precio.comercio_cuit})`,
`sucursal: ${precio.sucursales_nombre}`,
`dirección: ${precio.sucursales_calle} ${precio.sucursales_numero}`,
() => {
const a = document.createElement('a');
if (precio.sucursales_calle) {
a.href = generateGoogleMapsLink({
sucursales_calle: precio.sucursales_calle,
sucursales_numero: precio.sucursales_numero
});
}
a.target = '_blank';
a.append('ver en Google Maps');
return a;
},
`descripcion del producto segun el comercio: ${precio.productos_descripcion}`,
() => {
const a = document.createElement('a');
a.href = `/id_producto/${data.id_producto}/sucursal/${precio.id_comercio}/${precio.id_sucursal}`;
a.append('ver precios historicos');
return a;
}
].forEach((el) => {
div.append(typeof el === 'function' ? el() : el);
div.append(document.createElement('br'));
});
return div;
};
var marker = L.circleMarker([precio.sucursales_latitud, precio.sucursales_longitud], {
opacity: 1,
renderer: myRenderer,
color,
radius: 5
})
.bindPopup(createElement)
.addTo(map);
marker.on('click', function (this: L.CircleMarker) {
this.openPopup();
});
}
// Helper function to get a color that works in Safari
function getSafeColor(normalizedPrice: number) {
const r = Math.round(255 * normalizedPrice);
const g = Math.round(255 * (1 - normalizedPrice));
return `rgb(${r}, ${g}, 0)`;
} }
}} -->
<GeoJSON id="precios" data={geoJSON}>
<!-- <HeatmapLayer
paint={{
// Increase the heatmap weight based on price magnitude
'heatmap-weight': [
'interpolate',
['linear'],
['get', 'precio'], // Get precio from properties
Math.min(...data.precios.map((p) => p.productos_precio_lista)), // Start at 0 weight for minimum price
0,
Math.max(...data.precios.map((p) => p.productos_precio_lista)), // Adjust this max value based on your price range
1
],
// Increase the heatmap intensity by zoom level
'heatmap-intensity': ['interpolate', ['linear'], ['zoom'], 0, 1, 9, 3],
// Color ramp for heatmap. Domain is 0 (low) to 1 (high).
'heatmap-color': [
'interpolate',
['linear'],
['heatmap-density'],
0,
'rgba(0, 255, 0, 0)',
// 0.2,
// 'rgb(0, 204, 255)',
// 0.4,
// 'rgb(128, 255, 128)',
// 0.6,
// 'rgb(255, 255, 102)',
// 0.8,
// 'rgb(255, 128, 0)',
0.9,
'rgb(100, 255, 0)',
1,
'rgb(255, 0, 0)'
],
// Adjust the heatmap radius by zoom level
'heatmap-radius': [
'interpolate',
['linear'],
['get', 'precio'],
Math.min(...data.precios.map((p) => p.productos_precio_lista)),
2,
Math.max(...data.precios.map((p) => p.productos_precio_lista)),
10
],
'heatmap-opacity': 0.8
}} }}
/> /> -->
<!-- <CircleLayer
id="cluster_circles"
applyToClusters
hoverCursor="pointer"
paint={{
'circle-color': [
'step',
['get', 'total_precio'],
'#51bbd6',
10,
'#f1f075',
30,
'#f28cb1'
],
// 'circle-radius': ['step', ['get', 'point_count'], 15, 10, 20, 30, 25],
'circle-radius': 5,
'circle-stroke-color': '#fff',
'circle-stroke-width': 1,
'circle-stroke-opacity': hoverStateFilter(0, 1)
}}
manageHoverState
on:click={(e) => {
console.log(e);
}}
>
<!-- <Popup openOn="click" closeOnClickInside let:data>
{#if data?.properties}
<div class="p-2">
<p class="font-bold">Grupo de {data.properties.point_count} precios</p>
<p>Precio promedio: ${data.properties.precio_promedio.toFixed(2)}</p>
</div> </div>
{/if}
</Popup> --
</CircleLayer>
<SymbolLayer
id="cluster_labels"
interactive={false}
applyToClusters
layout={{
'text-field': [
'format',
['get', 'point_count_abbreviated'],
{},
'\n$',
{},
['number-format', ['get', 'total_precio'], { 'max-fraction-digits': 2 }],
{ 'font-scale': 0.8 }
],
'text-size': 12,
'text-offset': [0, -0.1]
}}
/>-->
<CircleLayer
id="precio_circle"
applyToClusters={false}
hoverCursor="pointer"
paint={{
'circle-color': [
'interpolate',
['linear'],
['get', 'precio'],
Math.min(...data.precios.map((p) => p.productos_precio_lista)),
'rgba(0,255,0,0)',
Math.max(...data.precios.map((p) => p.productos_precio_lista)),
'rgba(255,0,0,1)'
],
'circle-radius': ['interpolate', ['linear'], ['zoom'], 3, 4, 10, 6],
'circle-stroke-width': 1,
'circle-stroke-color': '#fff'
// 'circle-stroke-opacity': hoverStateFilter(0, 1)
}}
>
<Popup openOn="click" closeOnClickInside let:data>
{#if data?.properties}
<div class="flex flex-col gap-2 px-3 py-2">
<div class="flex flex-col gap-1">
<span class="text-xs uppercase leading-none text-neutral-500">
{dateFormatter.format(new Date(data.properties.fecha))}
</span>
<span class="text-xl font-bold leading-none">
{pesosFormatter.format(data.properties.precio)}
</span>
</div>
<div class="flex gap-2">
<div class="flex flex-col leading-none">
<span class="font-medium">{data.properties.comercio}</span>
<span class="text-sm">{data.properties.direccion}</span>
</div>
<Button
href={generateGoogleMapsLink({
sucursales_calle: data.properties.sucursales_calle,
sucursales_numero: data.properties.sucursales_numero
})}
target="_blank"
variant="outline"
size="icon_sm"
class="inline-flex items-center gap-1"
>
<MapPin class="size-4" />
</Button>
</div>
<div>
<Button
variant="default"
size="xs"
href={`/id_producto/${id_producto}/sucursal/${data.properties.id_comercio}/${data.properties.id_sucursal}`}
class="group"
>
Precios históricos
<ArrowRight class="mx-1 size-4 transition-transform group-hover:translate-x-1" />
</Button>
</div>
</div>
{/if}
</Popup>
</CircleLayer>
</GeoJSON>
</MapLibre>
</div>
<style>
:global(.maplibregl-popup-content) {
border-radius: 0.3rem;
padding: 0;
}
</style>