mirror of
https://github.com/catdevnull/preciazo.git
synced 2024-11-22 14:16:19 +00:00
Compare commits
9 commits
38b6b62aba
...
5ff556b7d3
Author | SHA1 | Date | |
---|---|---|---|
5ff556b7d3 | |||
739ecf03c7 | |||
470afa98af | |||
e9884ba7c1 | |||
114922906c | |||
cd6db5bef7 | |||
aea5112dfa | |||
25810578eb | |||
74640df37e |
4 changed files with 51 additions and 18 deletions
|
@ -12,4 +12,6 @@ Dockerfile
|
||||||
*.warc.zst
|
*.warc.zst
|
||||||
.git
|
.git
|
||||||
scraper/debug/
|
scraper/debug/
|
||||||
*/target/
|
*/target/
|
||||||
|
scraper-rs/target/
|
||||||
|
*.db*
|
||||||
|
|
54
.github/workflows/container.yml
vendored
54
.github/workflows/container.yml
vendored
|
@ -11,7 +11,7 @@ env:
|
||||||
jobs:
|
jobs:
|
||||||
check:
|
check:
|
||||||
name: chequear typescript del sitio
|
name: chequear typescript del sitio
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubicloud-standard-4
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: pnpm/action-setup@v3
|
- uses: pnpm/action-setup@v3
|
||||||
|
@ -31,7 +31,7 @@ jobs:
|
||||||
build-and-push-sitio:
|
build-and-push-sitio:
|
||||||
name: Compilar contenedor del sitio
|
name: Compilar contenedor del sitio
|
||||||
needs: check
|
needs: check
|
||||||
runs-on: ubuntu-latest
|
runs-on: buildjet-4vcpu-ubuntu-2204
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
|
@ -59,13 +59,13 @@ jobs:
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/sitio:buildcache
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/sitio:buildcache,mode=max
|
cache-to: type=inline
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64
|
||||||
|
|
||||||
build-and-push-scraper:
|
build-and-push-scraper-amd64:
|
||||||
name: Compilar contenedor del scraper
|
name: "[amd64] oci:scraper"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubicloud-standard-16
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
|
@ -91,6 +91,38 @@ jobs:
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper:buildcache
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper:buildcache,mode=max
|
cache-to: type=inline
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64
|
||||||
|
|
||||||
|
build-and-push-scraper-arm64:
|
||||||
|
name: "[arm64] oci:scraper"
|
||||||
|
runs-on: ubicloud-standard-16-arm
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
- name: Log in to the Container registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
- name: Extract metadata (tags, labels) for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: "{{defaultContext}}:scraper-rs/"
|
||||||
|
file: Dockerfile
|
||||||
|
push: true
|
||||||
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
cache-to: type=inline
|
||||||
|
platforms: linux/arm64
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
FROM cgr.dev/chainguard/wolfi-base AS base
|
FROM docker.io/node:20 AS base
|
||||||
WORKDIR /usr/src/app
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
FROM base as build
|
FROM base as build
|
||||||
RUN apk add --no-cache nodejs npm
|
|
||||||
RUN npm install --global pnpm
|
RUN npm install --global pnpm
|
||||||
COPY db-datos/package.json db-datos/package.json
|
COPY db-datos/package.json db-datos/package.json
|
||||||
COPY sitio/package.json sitio/package.json
|
COPY sitio/package.json sitio/package.json
|
||||||
COPY pnpm-lock.yaml pnpm-workspace.yaml .
|
COPY pnpm-lock.yaml pnpm-workspace.yaml ./
|
||||||
RUN cd sitio && pnpm install
|
RUN cd sitio && pnpm install
|
||||||
COPY . .
|
COPY . .
|
||||||
COPY db-datos/drizzle .
|
COPY db-datos/drizzle .
|
||||||
|
@ -16,7 +15,7 @@ RUN cd sitio && \
|
||||||
|
|
||||||
FROM base
|
FROM base
|
||||||
ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
RUN apk add --no-cache nodejs npm jq sqlite
|
RUN apt-get update && apt-get install -y jq sqlite3 && apt-get clean
|
||||||
|
|
||||||
# Sitio
|
# Sitio
|
||||||
COPY --from=build /usr/src/app/sitio/package.json package.real.json
|
COPY --from=build /usr/src/app/sitio/package.json package.real.json
|
||||||
|
@ -28,4 +27,4 @@ COPY --from=build /usr/src/app/db-datos/drizzle drizzle
|
||||||
ENV DB_PATH=/db/db.db
|
ENV DB_PATH=/db/db.db
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
CMD ["node", "."]
|
CMD ["node", "."]
|
||||||
|
|
|
@ -82,7 +82,7 @@ pub async fn get_urls() -> anyhow::Result<Vec<String>> {
|
||||||
let client = build_client();
|
let client = build_client();
|
||||||
let initial = Url::parse("https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29")?;
|
let initial = Url::parse("https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29")?;
|
||||||
|
|
||||||
let page_size = 100;
|
let page_size = 50;
|
||||||
let handles: Vec<Vec<String>> = stream::iter(0..29000 / page_size)
|
let handles: Vec<Vec<String>> = stream::iter(0..29000 / page_size)
|
||||||
.map(|i| {
|
.map(|i| {
|
||||||
let mut u = initial.clone();
|
let mut u = initial.clone();
|
||||||
|
|
Loading…
Reference in a new issue