mejorar scraper

This commit is contained in:
Cat /dev/Nulo 2023-12-22 14:41:43 -03:00
parent 2f8dab4614
commit 70679798d7
3 changed files with 86 additions and 321 deletions

View file

@ -44,9 +44,9 @@ importers:
drizzle-orm:
specifier: ^0.29.1
version: 0.29.1(@types/better-sqlite3@7.6.8)(better-sqlite3@9.2.2)
jsdom:
specifier: ^23.0.1
version: 23.0.1
linkedom:
specifier: ^0.16.5
version: 0.16.5
nanoid:
specifier: ^5.0.4
version: 5.0.4
@ -63,9 +63,6 @@ importers:
'@types/better-sqlite3':
specifier: ^7.6.8
version: 7.6.8
'@types/jsdom':
specifier: ^21.1.6
version: 21.1.6
'@types/node':
specifier: ^20.10.5
version: 20.10.5
@ -272,32 +269,11 @@ packages:
dependencies:
'@types/node': 20.10.5
/@types/jsdom@21.1.6:
resolution: {integrity: sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==}
dependencies:
'@types/node': 20.10.5
'@types/tough-cookie': 4.0.5
parse5: 7.1.2
dev: true
/@types/node@20.10.5:
resolution: {integrity: sha512-nNPsNE65wjMxEKI93yOP+NPGGBJz/PoN3kZsVLee0XMiJolxSekEVD8wRwBUBqkwc7UWop0edW50yrCQW4CyRw==}
dependencies:
undici-types: 5.26.5
/@types/tough-cookie@4.0.5:
resolution: {integrity: sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==}
dev: true
/agent-base@7.1.0:
resolution: {integrity: sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==}
engines: {node: '>= 14'}
dependencies:
debug: 4.3.4
transitivePeerDependencies:
- supports-color
dev: false
/ansi-regex@5.0.1:
resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
engines: {node: '>=8'}
@ -310,10 +286,6 @@ packages:
color-convert: 2.0.1
dev: false
/asynckit@0.4.0:
resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
dev: false
/base32-encode@2.0.0:
resolution: {integrity: sha512-mlmkfc2WqdDtMl/id4qm3A7RjW6jxcbAoMjdRmsPiwQP0ufD4oXItYMnPgVHe80lnAIy+1xwzhHE1s4FoIceSw==}
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@ -382,13 +354,6 @@ packages:
resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
dev: false
/combined-stream@1.0.8:
resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==}
engines: {node: '>= 0.8'}
dependencies:
delayed-stream: 1.0.0
dev: false
/crypto-random-string@4.0.0:
resolution: {integrity: sha512-x8dy3RnvYdlUcPOjkEHqozhiwzKNSq7GcPuXFbnyMOCHxX8V3OgIg/pYuabl2sbUPfIJaeAQB7PMOK8DFIdoRA==}
engines: {node: '>=12'}
@ -415,37 +380,6 @@ packages:
resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==}
dev: false
/cssstyle@3.0.0:
resolution: {integrity: sha512-N4u2ABATi3Qplzf0hWbVCdjenim8F3ojEXpBDF5hBpjzW182MjNGLqfmQ0SkSPeQ+V86ZXgeH8aXj6kayd4jgg==}
engines: {node: '>=14'}
dependencies:
rrweb-cssom: 0.6.0
dev: false
/data-urls@5.0.0:
resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==}
engines: {node: '>=18'}
dependencies:
whatwg-mimetype: 4.0.0
whatwg-url: 14.0.0
dev: false
/debug@4.3.4:
resolution: {integrity: sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==}
engines: {node: '>=6.0'}
peerDependencies:
supports-color: '*'
peerDependenciesMeta:
supports-color:
optional: true
dependencies:
ms: 2.1.2
dev: false
/decimal.js@10.4.3:
resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==}
dev: false
/decompress-response@6.0.0:
resolution: {integrity: sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==}
engines: {node: '>=10'}
@ -458,11 +392,6 @@ packages:
engines: {node: '>=4.0.0'}
dev: false
/delayed-stream@1.0.0:
resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==}
engines: {node: '>=0.4.0'}
dev: false
/detect-libc@2.0.2:
resolution: {integrity: sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==}
engines: {node: '>=8'}
@ -574,6 +503,7 @@ packages:
/entities@4.5.0:
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
engines: {node: '>=0.12'}
dev: false
/esbuild@0.19.10:
resolution: {integrity: sha512-S1Y27QGt/snkNYrRcswgRFqZjaTG5a5xM3EQo97uNBnH505pdzSNe/HLBq1v0RO7iK/ngdbhJB6mDAp0OK+iUA==}
@ -623,15 +553,6 @@ packages:
resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==}
dev: false
/form-data@4.0.0:
resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==}
engines: {node: '>= 6'}
dependencies:
asynckit: 0.4.0
combined-stream: 1.0.8
mime-types: 2.1.35
dev: false
/fs-constants@1.0.0:
resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
dev: false
@ -661,13 +582,6 @@ packages:
resolution: {integrity: sha512-HVusNXlVqHe0fzIzdQOGolnFN6mX/fqcrSAOcTBXdvzrXVHwTz11vXeKRmkR5gTuwVpvHZEIyKoePDvuAR+XwQ==}
dev: false
/html-encoding-sniffer@4.0.0:
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
engines: {node: '>=18'}
dependencies:
whatwg-encoding: 3.1.1
dev: false
/html-escaper@3.0.3:
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
dev: false
@ -681,33 +595,6 @@ packages:
entities: 4.5.0
dev: false
/http-proxy-agent@7.0.0:
resolution: {integrity: sha512-+ZT+iBxVUQ1asugqnD6oWoRiS25AkjNfG085dKJGtGxkdwLQrMKU5wJr2bOOFAXzKcTuqq+7fZlTMgG3SRfIYQ==}
engines: {node: '>= 14'}
dependencies:
agent-base: 7.1.0
debug: 4.3.4
transitivePeerDependencies:
- supports-color
dev: false
/https-proxy-agent@7.0.2:
resolution: {integrity: sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==}
engines: {node: '>= 14'}
dependencies:
agent-base: 7.1.0
debug: 4.3.4
transitivePeerDependencies:
- supports-color
dev: false
/iconv-lite@0.6.3:
resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
engines: {node: '>=0.10.0'}
dependencies:
safer-buffer: 2.1.2
dev: false
/ieee754@1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: false
@ -725,51 +612,11 @@ packages:
engines: {node: '>=8'}
dev: false
/is-potential-custom-element-name@1.0.1:
resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
dev: false
/is-stream@3.0.0:
resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
dev: false
/jsdom@23.0.1:
resolution: {integrity: sha512-2i27vgvlUsGEBO9+/kJQRbtqtm+191b5zAZrU/UezVmnC2dlDAFLgDYJvAEi94T4kjsRKkezEtLQTgsNEsW2lQ==}
engines: {node: '>=18'}
peerDependencies:
canvas: ^2.11.2
peerDependenciesMeta:
canvas:
optional: true
dependencies:
cssstyle: 3.0.0
data-urls: 5.0.0
decimal.js: 10.4.3
form-data: 4.0.0
html-encoding-sniffer: 4.0.0
http-proxy-agent: 7.0.0
https-proxy-agent: 7.0.2
is-potential-custom-element-name: 1.0.1
nwsapi: 2.2.7
parse5: 7.1.2
rrweb-cssom: 0.6.0
saxes: 6.0.0
symbol-tree: 3.2.4
tough-cookie: 4.1.3
w3c-xmlserializer: 5.0.0
webidl-conversions: 7.0.0
whatwg-encoding: 3.1.1
whatwg-mimetype: 4.0.0
whatwg-url: 14.0.0
ws: 8.15.1
xml-name-validator: 5.0.0
transitivePeerDependencies:
- bufferutil
- supports-color
- utf-8-validate
dev: false
/linkedom@0.16.5:
resolution: {integrity: sha512-FtcuLuxDtlKWWilm5Z0HgmrfMwO0tOfC6tu47fRXj2/KGEeDSh4ihiDwFKZSbJj6zh520r8XZjZ7v2Jb30HAQA==}
dependencies:
@ -787,18 +634,6 @@ packages:
yallist: 4.0.0
dev: false
/mime-db@1.52.0:
resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==}
engines: {node: '>= 0.6'}
dev: false
/mime-types@2.1.35:
resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
engines: {node: '>= 0.6'}
dependencies:
mime-db: 1.52.0
dev: false
/mimic-response@3.1.0:
resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==}
engines: {node: '>=10'}
@ -812,10 +647,6 @@ packages:
resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==}
dev: false
/ms@2.1.2:
resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==}
dev: false
/nanoid@5.0.4:
resolution: {integrity: sha512-vAjmBf13gsmhXSgBrtIclinISzFFy22WwCYoyilZlsrRXNIHSwgFQ1bEdjRwMT3aoadeIF6HMuDRlOxzfXV8ig==}
engines: {node: ^18 || >=20}
@ -839,10 +670,6 @@ packages:
boolbase: 1.0.0
dev: false
/nwsapi@2.2.7:
resolution: {integrity: sha512-ub5E4+FBPKwAZx0UwIQOjYWGHTEq5sPqHQNRN8Z9e4A7u3Tj1weLJsL59yH9vmvqEtBHaOmT6cYQKIZOxp35FQ==}
dev: false
/once@1.4.0:
resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
dependencies:
@ -871,11 +698,6 @@ packages:
resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==}
dev: false
/parse5@7.1.2:
resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
dependencies:
entities: 4.5.0
/prebuild-install@7.1.1:
resolution: {integrity: sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==}
engines: {node: '>=10'}
@ -895,10 +717,6 @@ packages:
tunnel-agent: 0.6.0
dev: false
/psl@1.9.0:
resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==}
dev: false
/pump@3.0.0:
resolution: {integrity: sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==}
dependencies:
@ -906,15 +724,6 @@ packages:
once: 1.4.0
dev: false
/punycode@2.3.1:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'}
dev: false
/querystringify@2.2.0:
resolution: {integrity: sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==}
dev: false
/rc@1.2.8:
resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==}
hasBin: true
@ -939,32 +748,13 @@ packages:
engines: {node: '>=0.10.0'}
dev: false
/requires-port@1.0.0:
resolution: {integrity: sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==}
dev: false
/resolve-pkg-maps@1.0.0:
resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
/rrweb-cssom@0.6.0:
resolution: {integrity: sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==}
dev: false
/safe-buffer@5.2.1:
resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
dev: false
/safer-buffer@2.1.2:
resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
dev: false
/saxes@6.0.0:
resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==}
engines: {node: '>=v12.22.7'}
dependencies:
xmlchars: 2.2.0
dev: false
/semver@7.5.4:
resolution: {integrity: sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==}
engines: {node: '>=10'}
@ -1012,10 +802,6 @@ packages:
engines: {node: '>=0.10.0'}
dev: false
/symbol-tree@3.2.4:
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
dev: false
/tar-fs@2.1.1:
resolution: {integrity: sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==}
dependencies:
@ -1056,23 +842,6 @@ packages:
engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
dev: false
/tough-cookie@4.1.3:
resolution: {integrity: sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==}
engines: {node: '>=6'}
dependencies:
psl: 1.9.0
punycode: 2.3.1
universalify: 0.2.0
url-parse: 1.5.10
dev: false
/tr46@5.0.0:
resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==}
engines: {node: '>=18'}
dependencies:
punycode: 2.3.1
dev: false
/tsx@4.7.0:
resolution: {integrity: sha512-I+t79RYPlEYlHn9a+KzwrvEwhJg35h/1zHsLC2JXvhC2mdynMv6Zxzvhv5EMV6VF5qJlLlkSnMVvdZV3PSIGcg==}
engines: {node: '>=18.0.0'}
@ -1126,18 +895,6 @@ packages:
crypto-random-string: 4.0.0
dev: false
/universalify@0.2.0:
resolution: {integrity: sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==}
engines: {node: '>= 4.0.0'}
dev: false
/url-parse@1.5.10:
resolution: {integrity: sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==}
dependencies:
querystringify: 2.2.0
requires-port: 1.0.0
dev: false
/util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
dev: false
@ -1146,13 +903,6 @@ packages:
resolution: {integrity: sha512-UOzej0Le/UgkbWEO8flm+0y+G+ljUon1QWTEZOq1rnMAsxo2+SckbiZdKzAHHlVh6gJqI1TjC/xwgR50MuCrBQ==}
dev: false
/w3c-xmlserializer@5.0.0:
resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==}
engines: {node: '>=18'}
dependencies:
xml-name-validator: 5.0.0
dev: false
/warcio@2.2.1:
resolution: {integrity: sha512-KPLoz3aFtdTjexG+QQaubMyuLiNANzvcadGMyNKdpcmhl0k6lBHQQVpxZw3Hx9+4pbyqDXyiF4cr/h2tS8kvcw==}
engines: {node: '>=18.0.0'}
@ -1166,31 +916,6 @@ packages:
yargs: 17.7.2
dev: false
/webidl-conversions@7.0.0:
resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==}
engines: {node: '>=12'}
dev: false
/whatwg-encoding@3.1.1:
resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
engines: {node: '>=18'}
dependencies:
iconv-lite: 0.6.3
dev: false
/whatwg-mimetype@4.0.0:
resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
engines: {node: '>=18'}
dev: false
/whatwg-url@14.0.0:
resolution: {integrity: sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==}
engines: {node: '>=18'}
dependencies:
tr46: 5.0.0
webidl-conversions: 7.0.0
dev: false
/wrap-ansi@7.0.0:
resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
engines: {node: '>=10'}
@ -1204,28 +929,6 @@ packages:
resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
dev: false
/ws@8.15.1:
resolution: {integrity: sha512-W5OZiCjXEmk0yZ66ZN82beM5Sz7l7coYxpRkzS+p9PP+ToQry8szKh+61eNktr7EA9DOwvFGhfC605jDHbP6QQ==}
engines: {node: '>=10.0.0'}
peerDependencies:
bufferutil: ^4.0.1
utf-8-validate: '>=5.0.2'
peerDependenciesMeta:
bufferutil:
optional: true
utf-8-validate:
optional: true
dev: false
/xml-name-validator@5.0.0:
resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==}
engines: {node: '>=18'}
dev: false
/xmlchars@2.2.0:
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
dev: false
/y18n@5.0.8:
resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
engines: {node: '>=10'}

View file

@ -13,7 +13,7 @@
"dependencies": {
"better-sqlite3": "^9.2.2",
"drizzle-orm": "^0.29.1",
"jsdom": "^23.0.1",
"linkedom": "^0.16.5",
"nanoid": "^5.0.4",
"p-map": "^7.0.0",
"undici": "^6.2.0",
@ -21,7 +21,6 @@
},
"devDependencies": {
"@types/better-sqlite3": "^7.6.8",
"@types/jsdom": "^21.1.6",
"@types/node": "^20.10.5",
"tsx": "^4.7.0",
"typescript": "^5.3.3"

View file

@ -1,12 +1,14 @@
import { JSDOM } from "jsdom";
import { getHtml } from "./fetch.js";
/// <reference lib="dom" />
/// <reference lib="dom.iterable" />
/// <reference types="node" />
import { parseHTML } from "linkedom";
import { drizzle } from "drizzle-orm/better-sqlite3";
import { migrate } from "drizzle-orm/better-sqlite3/migrator";
import Database from "better-sqlite3";
import { precios } from "./db/schema.js";
import { open } from "fs/promises";
import { WARCParser } from "warcio";
import { createReadStream } from "fs";
import { writeFile } from "fs/promises";
import { createHash } from "crypto";
const sqlite = new Database("sqlite.db");
const db = drizzle(sqlite);
@ -17,16 +19,7 @@ async function storePrecioPoint(point: Precio) {
await db.insert(precios).values(point);
}
function getCarrefourProduct(html: string | Buffer): Precio {
const dom = new JSDOM(html);
const listPriceValueEl = dom.window.document.querySelector(
".valtech-carrefourar-product-price-0-x-listPriceValue"
);
const matches = listPriceValueEl?.textContent?.match(/([\d,]+)/);
if (!matches || !matches[1]) throw new Error("No encontré el precio");
const precio = parseFloat(matches[1].replace(",", "."));
function getEanByTable(dom: Window): string {
const eanLabelEl = dom.window.document.querySelector(
'td[data-specification="EAN"]'
);
@ -37,11 +30,71 @@ function getCarrefourProduct(html: string | Buffer): Precio {
!eanValueEl.dataset.specification
)
throw new Error("No encontré el EAN");
const ean = eanValueEl.dataset.specification;
return eanValueEl.dataset.specification;
}
function parseJsonLds(dom: Window): object[] {
const scripts = dom.window.document.querySelectorAll(
'script[type="application/ld+json"]'
);
return [...scripts].map((scripts) => JSON.parse(scripts.innerHTML));
}
function findJsonLd(dom: Window, type: string): object | undefined {
return parseJsonLds(dom).find((x) => "@type" in x && x["@type"] === type);
}
function parseScriptJson<T>(dom: Window, varname: string): T {
const script = dom.window.document.querySelector<HTMLTemplateElement>(
`template[data-type="json"][data-varname="${varname}"]`
)?.content?.children[0];
if (!script) throw new Error("no encuentro el script");
return JSON.parse(script.innerHTML);
}
function eanFromSeedState(dom: Window): string {
const json = parseScriptJson<object>(dom, "__STATE__");
const productJson = Object.entries(json).find(
([key, val]) => key.startsWith("Product:") && val.__typename === "Product"
);
if (!productJson) throw new Error("no encontré el product en el json");
const productSkuJson = Object.entries(json).find(
([key, val]) =>
key.startsWith(`Product:${productJson[1].cacheId}`) &&
val.__typename === "SKU"
);
if (!productSkuJson) throw new Error("no encontré el sku en el json");
return productSkuJson[1].ean;
}
function eanFromDynamicYieldScript(dom: Window): string {
const scriptEl = dom.window.document.querySelector(
`script[src^="//st.dynamicyield.com/st?"]`
);
if (!scriptEl || !(scriptEl instanceof dom.window.HTMLScriptElement))
throw new Error("no encuentro el script de dynamicyield");
const url = new URL(scriptEl.src);
const ctx = url.searchParams.get("ctx");
if (!ctx) throw new Error("no hay ctx");
return JSON.parse(ctx).data[0];
}
function getCarrefourProduct(html: string | Buffer): Precio {
const dom = parseHTML(html);
const precioMeta = dom.window.document
.querySelector(`meta[property="product:price:amount"]`)
?.getAttribute("content");
if (!precioMeta) throw new Error("No encontré el precio");
const precioCentavos = parseFloat(precioMeta) * 100;
// const productLd = findJsonLd(dom, "Product");
const ean = eanFromSeedState(dom);
return {
ean,
precioCentavos: precio * 100,
precioCentavos,
fetchedAt: new Date(),
};
}
@ -59,8 +112,18 @@ function getCarrefourProduct(html: string | Buffer): Precio {
if (record.warcType === "response") {
console.log(record.warcTargetURI);
const html = await record.contentText();
try {
const product = getCarrefourProduct(html);
console.log(product);
} catch (error) {
console.error(error);
const urlHash = createHash("md5")
.update(record.warcTargetURI!)
.digest("hex");
const output = `${urlHash}.html`;
await writeFile(output, html);
console.error(`wrote html to ${output}`);
}
}
}
})();