Compare commits

...

9 commits

Author SHA1 Message Date
5ff556b7d3 ci: usar ubicloud 2024-06-17 21:55:45 -03:00
739ecf03c7 ci: disable arm build 2024-06-17 21:51:56 -03:00
470afa98af downgrade 8vcpu 2024-06-17 21:50:56 -03:00
e9884ba7c1 ci: cache 2024-06-17 21:50:02 -03:00
114922906c ci: buildjet 2024-06-17 21:49:40 -03:00
cd6db5bef7 ci: arm 2024-06-17 21:46:22 -03:00
aea5112dfa ci: buildjet 2024-06-17 21:44:03 -03:00
25810578eb docker: usar debian 2024-06-17 21:13:24 -03:00
74640df37e coto: hacer páginas más chiquitas 2024-06-17 21:04:28 -03:00
4 changed files with 51 additions and 18 deletions

View file

@ -12,4 +12,6 @@ Dockerfile
*.warc.zst *.warc.zst
.git .git
scraper/debug/ scraper/debug/
*/target/ */target/
scraper-rs/target/
*.db*

View file

@ -11,7 +11,7 @@ env:
jobs: jobs:
check: check:
name: chequear typescript del sitio name: chequear typescript del sitio
runs-on: ubuntu-latest runs-on: ubicloud-standard-4
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: pnpm/action-setup@v3 - uses: pnpm/action-setup@v3
@ -31,7 +31,7 @@ jobs:
build-and-push-sitio: build-and-push-sitio:
name: Compilar contenedor del sitio name: Compilar contenedor del sitio
needs: check needs: check
runs-on: ubuntu-latest runs-on: buildjet-4vcpu-ubuntu-2204
permissions: permissions:
contents: read contents: read
packages: write packages: write
@ -59,13 +59,13 @@ jobs:
push: true push: true
tags: ${{ steps.meta.outputs.tags }} tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/sitio:buildcache cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/sitio:buildcache,mode=max cache-to: type=inline
platforms: linux/amd64,linux/arm64 platforms: linux/amd64
build-and-push-scraper: build-and-push-scraper-amd64:
name: Compilar contenedor del scraper name: "[amd64] oci:scraper"
runs-on: ubuntu-latest runs-on: ubicloud-standard-16
permissions: permissions:
contents: read contents: read
packages: write packages: write
@ -91,6 +91,38 @@ jobs:
push: true push: true
tags: ${{ steps.meta.outputs.tags }} tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper:buildcache cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
cache-to: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper:buildcache,mode=max cache-to: type=inline
platforms: linux/amd64,linux/arm64 platforms: linux/amd64
build-and-push-scraper-arm64:
name: "[arm64] oci:scraper"
runs-on: ubicloud-standard-16-arm
permissions:
contents: read
packages: write
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}/scraper
- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
context: "{{defaultContext}}:scraper-rs/"
file: Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
cache-to: type=inline
platforms: linux/arm64

View file

@ -1,12 +1,11 @@
FROM cgr.dev/chainguard/wolfi-base AS base FROM docker.io/node:20 AS base
WORKDIR /usr/src/app WORKDIR /usr/src/app
FROM base as build FROM base as build
RUN apk add --no-cache nodejs npm
RUN npm install --global pnpm RUN npm install --global pnpm
COPY db-datos/package.json db-datos/package.json COPY db-datos/package.json db-datos/package.json
COPY sitio/package.json sitio/package.json COPY sitio/package.json sitio/package.json
COPY pnpm-lock.yaml pnpm-workspace.yaml . COPY pnpm-lock.yaml pnpm-workspace.yaml ./
RUN cd sitio && pnpm install RUN cd sitio && pnpm install
COPY . . COPY . .
COPY db-datos/drizzle . COPY db-datos/drizzle .
@ -16,7 +15,7 @@ RUN cd sitio && \
FROM base FROM base
ENV NODE_ENV=production ENV NODE_ENV=production
RUN apk add --no-cache nodejs npm jq sqlite RUN apt-get update && apt-get install -y jq sqlite3 && apt-get clean
# Sitio # Sitio
COPY --from=build /usr/src/app/sitio/package.json package.real.json COPY --from=build /usr/src/app/sitio/package.json package.real.json
@ -28,4 +27,4 @@ COPY --from=build /usr/src/app/db-datos/drizzle drizzle
ENV DB_PATH=/db/db.db ENV DB_PATH=/db/db.db
EXPOSE 3000 EXPOSE 3000
CMD ["node", "."] CMD ["node", "."]

View file

@ -82,7 +82,7 @@ pub async fn get_urls() -> anyhow::Result<Vec<String>> {
let client = build_client(); let client = build_client();
let initial = Url::parse("https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29")?; let initial = Url::parse("https://www.cotodigital3.com.ar/sitios/cdigi/browse?Nf=product.endDate%7CGTEQ+1.7032032E12%7C%7Cproduct.startDate%7CLTEQ+1.7032032E12&Nr=AND%28product.sDisp_200%3A1004%2Cproduct.language%3Aespa%C3%B1ol%2COR%28product.siteId%3ACotoDigital%29%29")?;
let page_size = 100; let page_size = 50;
let handles: Vec<Vec<String>> = stream::iter(0..29000 / page_size) let handles: Vec<Vec<String>> = stream::iter(0..29000 / page_size)
.map(|i| { .map(|i| {
let mut u = initial.clone(); let mut u = initial.clone();