From f0798e8620ef14f4fbe3d4980ddcca28f17909fd Mon Sep 17 00:00:00 2001 From: Nulo Date: Thu, 4 Jan 2024 18:12:55 -0300 Subject: [PATCH] link-scrapers: reutilizar codigo sitemaps --- bun.lockb | Bin 196048 -> 196064 bytes link-scrapers/carrefour.ts | 14 ++------------ link-scrapers/common.ts | 14 ++++++++++++++ link-scrapers/dia.ts | 26 ++++++++++---------------- link-scrapers/package.json | 1 + scraper/package.json | 1 - 6 files changed, 27 insertions(+), 29 deletions(-) create mode 100644 link-scrapers/common.ts diff --git a/bun.lockb b/bun.lockb index 39be541d5637f92d6de7e9f04fc42f1eab621f1c..b925de1165e4291f4b193e61e8f15280fc9a87ff 100755 GIT binary patch delta 13913 zcmeHt2{cvT`}aA=5Sb%f^Ozy?JWplJSP==CA|YjNAY)VtR~;(Rq*S7WzNA#7IfW>M zBvgoouS&enx$f2N*Z2Le-+I??y=%Si`rUQ+{ha67&wlo2Kl|BxpL@@3`N7urgDqP{ zQnj`)Zmj=SSo5Zzd(E8f_UrnfQ`)zqFK)u+m&>h;uy(%Wbd#NDdm@4eAjq!RXardV zC`*>)5J&UK<{Uu^{b>jy1-d(c!hnmT5JU`6Xg7jL1D*mT0dkX{atN^cWFQDPNW$a^ z4r?BleF(w_QuThKwiED)bszc@xTS{(8Wvwh&!o`A0 zf+$DMj%KZVq2gr1P;&1cZoU+iC1+c(R(gbSp;#CO7E8$4kX(wU#aPIl z+u3Pi%L-O9X~zL_aFQCia|b*5l&2P1a3?oKcHh~83eI%{jocHm$-`o5WWhvgZK5;E zK405OXceIr!Y7}sMs}xSK7qTyH(-FLJefO5kSs3?^-0|1;WRaJeiAn-@yiQn&x)(D zwk05l3xoh8A6L|6m4!)lqe%S{x=gtsIg&+_4Vn7j%98ArWXNQ?3qf2dS9x&dLb)1* ztM%lb6^2ZfNeJRZ7ELf@t$-_ciYH$wT^^QX1PLT0WbQRTR@N}6(VpwyK$rCdNNZ`W zSf@c+Pmv^Lba~dLP?=JF2+fT}f|Ug-)WD2|>qflK|hg1YHrb$eDK%$Um z?S-qkVM~IQ8szD+d82lZKa4B7w4&kxjLfB0tY2elcYCgznxG6$yp4V}Jy) zB0$3584NaJLUERAa1M|lR1QcuybMUxS5oV%sP(nf`g&^pb*g+5kcdbVAiR-gD&7NJ z0CEc;GvITozB7?p(MwhI0}>s-p~`QmI1ES-8>QkTAVKUCRh|MQUIY^QZ&dsa$OVT`fU`1-E=3lA_q zXMpfV+Tnxn(*uZ-%T5T`617awH$k5bkf6y8xDZeX5Z;Iwe8>Y@0TR7A021+YA}XoK zQ0b7z_1`5sH{btVvj1JO|1T`ro=XvIW@l>B7s*KBR(xx=DvKW&Wx77ycHQsr!3{@5 zQ$Me^6R3%vi2igY<&M)Puk|X?^70 z{K4tMpg{Hg%E7x8u4c^78JF&v&Dfq;L%P;h93CYr;-oVA{fYPC%R*vjJ8Q3Kw~Bpv zc+7QKcXV`0_mi(Pr%sg)sE&8=WxTI2Q8jS5U(+sz#-V0ve)c2E>{oq0I%yi~BrAJF z#t#<;oLJku;>Ra^@^o5+u$!HPld{^w64^}T-mdP_CW*8I0xqdBi+XB$@1-&G)?`Vb zQ#^^WA4f`tKWL0rrufwlF;?prUHZYS*EUq3Y8}4(b%C{JnCTMDyy4v24knM3e#|s) za5)nlmdtopBksM1>0<9)@C5>P{~4|sI09mcV}SZ%bQ5Q?l6^&HtGXoTlcxIdRQG_C z`;+{%_byL`PKz}boKTBd>he9HmGrRf`nXEHZ);1M3**ai=O$8cpN;l*_uIX=s3v+h z@vVxW-qV)hZG7&!2|m{E&+v6L$@VQ-%!GgVobm9+qO74B$E>PV$;%io#w_%xY*M6jgG1OT{ z>e%re)^8-XHrSAS67=eVzqf0CNN@bqz4XCw3}2gmOHAW=5qzf>DrnaKopZc?ntYG_ z%Qt(m(RSsMcXfX~NuHFRP}|wsc4gt#OHb1ye|9j4Z}XTJ&%&a)EmYmgO? z`6=*R%2!Nb%w^`xWVU6?J?77|4}8*6>;GGS>7B}r9V{aW@e zH%G`*&-A3EcC#LsQha&X#pLoVQnl#gyF9LSrWKP}oj(@0e&{p0e01m77;dZ!Ls~X* zLDm?17JPDDW9Vpxhf>9XH)~Yb_WvAnlU875=glrSL3+TJ-nd)AEH*%dD^6JLq(2*% ziIbnssNab-)4av_L0vQ=(Z@t8fi8>eonkCCu6Nu+XC{_wcc%3UxXb&#o{g*H ze0OWpU{kyP1Cib%#hA?F+7h1k4p;ti@Emyi#;}WS-geW^n_*j`fk%VBq1^okxtxYk zH$rMT8q3*C*0DI>U~%U6xHY)|TNbx2on$Zb;qgLehx(mU?$`bjA@LUd{aAvVaZQRk zUZ{r(CJq!^dW^fA6)PT{S#l@rd3StB?zU`;z}<@O!KIQOSN+2?wDlEBOXDkruPKja z>95W$ooXs87<<%wvwnfj<52jDGoMzO^z>TFuL=E#kgf_4+L&>4x==Il#a)5BXD>;25QH1dF8<*IE`{^xt zPFirx!2fR2^jgm&k@erpki?fG53u_(ACx!ZKM8ugWk>Fv_q-{4zPj8+?wZ5EijcBJ zDMFXn`fS7RzFaF|*jxWTYNWh{tw>&+BQ41-HQDBKRg6>Cp*TbUOm~ex^hbA7URVe z-v>LKaW6xlCmqt&^uXc#Q!}Fpk*gd*RYwEtPaZH^SGS(`aj@w_6^_V-)8k#vXLb3a z&ZuNg9JJ^yh&$^jx0^dE@k-Os?A};9dO7s;+-~m@-oX7yLE6r5|CE$KRJM)#S_6db z&hS3tH~goV3{Jf-pJLqKLArn9)%BE>=f8Ojcam;|E2)HZOY{~gX(uhjIgEgwNyHh( zLc?Pz0?$`5*jpDJ;X7U1l(|x>_hQiXspk8GYM+};Is7)9N=};`(r23Wu9V++qG4zc zgQ1(#7k~1~97#HQWO{lY)8Ez>%Uha8J8Erh!}nTScT`&lh-_jHXTQJLXghEDqOxQ0 z_TuWK;?3npyj)OO)-MnHgSB(2LxA414{ZQD@=W0+n!smTG>x zV&w;K?M}ZLzkcHHQ@Vo3nijX_U0$3g6L)@VWyXlI(msP>OJiGBf8*H1;#d3O_lfz* zJxEXQjhX1{p3kS7^)wrw*@V=cpN^L}H?cD+{+nW3|5bHem8V8g@|~X#k(jLIKdYvE z(S3Jj(&PH!r>8FJ#B47OJoDxy9%c;mTz@o<1Z%5XR-D$cWR7MSH`;8nsB==^n$55_ zRDMu$4}Vfb%A;&{)$17t4>Bh156Y~6pEVrB&!Sbiw(s&lVmKW=oSvRnDc?CZrx)o) z8md``?@wUicudH z`1?pjEN}ixtr+p7vx@6=^nvyuE}ac<<AeWnVUgC{}n}Quu%?n*o`C zqHy`oy-gY3_NVr&7f)aKxAb?Vk6(N%mtXZSy7zTx&y_5{;Vk>1(REsQYNF7^=P~%~ zxs9eJEi=OsxJbLjCcMWCA|ASf^yyozJ?TwpV-6|GR|ATGG+rBeZ>@y3mmhOonk!lNhuSj0j7V?{rEBoPc?(vLYom<&p zR0E!64)hHA1Ex|(ZZwoN2DfZcWoB>`5AHr%*Uo;VL_YkpW>&wvSD~nq_XW!zm0Y9G zCU~AG=1r;$j>?B62uW?-a%@!u9lewE^mg@R@_&>l%)ZRtyJ^V+v8?ZQ&jy|>xpMYm zyQ{0~g^S}6X)3SGKh%=GvwV$v99R@wdm%FDO4_FoFUuqCk4@eQ$>YWraN5uKkKFL& z$>i?YH>SB^;9A(&vG|g=DcOs6755(7crG_P%vze+=Frdg%>!2bqf@KRDv~bW!WPPS z<~*;V>mEbgsS6Ag{Bv0K>axq9dG%?Ur@$AZBX zoE#q<-fZDZF4=Ok+ynb7@I+b5pvT?kS539m@WN#fsvxN$QVF+Ojp19<0C`J$x#;g< zEz7^2zLbWqTKCvR$}{|aw=LV2wFdkZ3j`adlD}+=KV-8l0N>jw>a-pZyh7Tm?I5r^vr!GR9s9y6|PQb?OugFxY_oiV)e6+@lA(Z z1|Mm;3IxZ845xPe?d>wWp+BdXE>xH3Lv{7)`^y%s-vf_49U)ui31wjFt1S%+Uv9jj zbCC1O$6MSpR))>#8d4+@N2hB3eX`qMt`aTI963Uk<8r6(TR!~GiE~&3y`}}1j2`cm zXwAs^`ABJ9MaddrEWap_m!I#Iw!L<~O(*7ZF`TVqsOs-Cw{Itk8buckh%A@QMVwdd zGsvIy-%I?gM7wLRq^D=*b9hiu-V`Ks|t?(dF^t|ScEWo}~H ztWX|Ds+C}h`C;?b@Oyfc$Kn1^N8|IAN@ax=f{b`QLC?bdP`IAf)nO}#reY6`1L|3e zn3WAv!ecp&ma?R*YCnIDv&Pw!zp7P8^K_0nQ%1v5zl&+R9<9=U-4XtB`^g3H6Y@O0 zDtda+y4wbPw;#>FfaUD)(|=vcnK#sOU|Ov#q&|9Q?3bgP-L<%rDq3Ahw-5OQUJtVA zeks{qJr*E!JmA(EAJ#?28}8s?HbAeVTYLYwxaCM2nr;?IH@SW)qoY?tPp@c!?h<(~%@L6$ zhK^RVLu03`b^VHVU#VW6t8?y#!jpCp|Ge9iSCUD~1GG{eS*OU_up*bH4M)z+4wA@tonUteRb$ptI8;E$A^BhB?C|R$5@zrFXp^WryFD){UBL8 zELM-q=zZ2=ZI;Y=Z<8{Z_HK3`dxdznf4+n0@hbZ*QsK>ra1JT3u;}Q~d^y=y94XlfF3W=^5K9YXxO_bxN zhRFc!iZFRo<}(R5?#Yd{9M|hOD!#7dC~48^gN@8dw-XZ&T+MaYQ9Qro6;FD@W2YJt zuDk;11r9o(>RF5LpK>=Wd*yksQp?f7{>`c**DoA4pCujB*nK8XP1;y+cg^2IJsVku z5IKoLSDUA$T$?!gDlzeX{t!BPH|XgZ-ja~BXx@92w=(DSsr^IeHs;!%m2vHezAR*w z#FYP1R7PDQL%Jc86cAzccTFJQS}Vutcy4`PqmEN!epyHIrDY;EQF`Z2AY zZchcQ=;+;~r}un4pHGLgR*25>}GFnq>gJ;=81`iFCtiIED%WDNk`;p+I!M5%d?|$sN z)Y7txG0pkWkKk~F^Xs15Nw;;>+jzfUj<|WO>m6;0<>XgJ@?SoCcAzi^AGC)IzVLxn z)?a&8F2YXA_PI~TTk+_+z)zc>+-exC4Lp!Kxx}m`;#T#_Ao&-*qBpWcHZ2PYiu~v3F&j*(ZQ(sJmv=Jt zHg@|jh_4kRt()0nS*Cp8&TGMjyUkW#yI734?JVOyY4qZw4loCuMw*5jDsz;ElI8Am zm{fUweDJKTay#2$zPP=zK|YZ;m0ty!nH6p6E|#7g>)_A78{r>9zT~HmB$qapOlGRYhIf_uF;7t*c1Od#(gN z(~#H6;bNKI_~Je%gVUmjh0!W9s(7Iz(8KMpj{)!3itk;_C~oIlC408YIN@Qs>Dsaz zw>By#RtFXE+N_t1zW%*yFl(4fjgM&UGm%Drd^C&=TzkQIeJbmSno86o&)BHHZfKv& z<6fuySz+nBIh6vM0OM){O} zls+$WN60rWXykJ;o#S#_Vw>;*wOp`C+&V0Ny^u_6Oh^BQ&ywv0XGr1~q)f)#bleZ` zh_S57`|9cVCb{B)$(qEKILAt$7vaVWy_B z>ou1z8mW2_lc{ zQ-|?-pl4>ZU$M7zv4Hi6#MiCyESqj)9wRaVdYjr_ReiHa5FWJB_vtHZKha+*J3ZYO zJ03`qyw^YSrkqFkno<8?oMxR|&Ei#Pvm{zp(-Vc3`z$7&UJ|czph~bM9s{k#kGG+` zs3%_4hVDhJaPxMQ*Vvl+%!fEw2tzI?HdNg;s*W3U#LwTvi`d}_J02*>H975Q3MxxH zH*$vJ0!0NnR)wMlMIDMZ6dfpfxPAw^SCn|%YXrp@iU|}`D8wUS11KJNPX{`{K|GLM z4aF6T8~(Ht-HtoIK;6+I{PYX-K_c-$*B*)k6h|mdP|TnZ53`8D5ajKktbk$;WdRi8 zaaul<0w{-}9Dz~@&dfLquNuBLXevX^(IGUH1q(rIm_MS)`ZXsMFi8eRO%*i+zp0K1 zqUv~`5;n}Bt)hlIxMBjRHh#$yBLVzW#z?}TMyj{W{c-Dpx?^H<=THKMYN}}IY2)fD zm>{Db(NNqO;};>W5V5TYB8MQmz6L39Z?oDdPqR$~TMXK`IMdo3u%vYM2rA~9)RDFp*g@GiS* zihE1xZ1gXK32-ZwC>Xf&sunnDJG16 zCu($Xb016_x6#Bj;2Qqj#1D>~bub5jK25BK7&bve1K(wc2{7sr=>#1Jq&QtY3cK4lxckt(Shq?V@w--Q3pY#M$HWuBbev8 zurU*JiR9tOjbQ5L85HZlWWhWM;nZ*vdj~DS7FuF@bCra}Asb8p)1S}82?#$S&NrCr zVBRw|11TNsHvt+Nv~-(K7a+gil$LsP8T3olZHlc#@8GItK(`;aHN#YB<341DIg)7q zI_2>_81QfvY57kUa;wt}qsEruvJo@7q(R$-3mp+0!|D$D`bv6u^{9j1my*kdfH zFkY}4v%w!(KyFahhS|D8|F<*Oqf9falwz<&PVT8NYu4pS+BRE3>zXL z_A4o11qU)ywtWU%z#3Dd%{s*wj|W7E*n#L13V&#gt@s}ktywzVez{cg7`m0j_TuILnef-#4kBudNhdWylarTwLOVar_| z)`mdyUp3YEtOK@~a-7rtM_uaJK5K7Q=S({UY1v@wh;4wp`7O4!wv_271}{weAN7ao zg^SP?VaTse-r_D!VD~GrU|#HLtMP*qW=jNu{!V~0(R9ZCQjqcw#NwZBc9qL{I|NZ{C3e!YqarM=} zkFtSiqGEU=AyKxC2b9|`;_m4`7a8Iv2Dm3^K_?pce?CC}XEzu`SnzIlI2ow-D8KI! zzvmk*XQ){(LCO068@Y``l4{|Ou9V%8xDEdQe0~`9kY6_$)PopiN(8s?!L;Vmm-N3& z-)}xx5T2ocY1M3752prtx2Dt=yRs0~s__oRHnX9cHIHMlzie^eBbY?ZnVZ-F6w*-0lU&jhlbOr16SwYyoch z48*(_7>TG=u6g+aQ$U%tRHA~m;YMAcmF&U>!Tra6IN`**F%KpkN?mz3tlhkBED!e$ z!z6GJS#akbObNd-2z{u&#F+5X9?IS1%^qwooJ(jFm_d*1c>F6&9I}$8Sb#UYg1a|; zeBu?B`jXpRPH39vdTelj)o9Y0jiZgU!i|~j6 ztQk*fhET70gGr#U8O9A_MKq!feK2sdw-_HyK?&-Px0v#r$ER;0p7ev28iG@oUJ*Ei zEk$)}a)z)~g!9cK5a^b6j2ZVC!B}zj5zKE+y9uCc1baa2XA|St5}f-TOkevuY;vxr z@%NY^<%-LH^VUFBWmN){)m1cbmncXL?q@)B?*~kj+FlXQ9)(GI@c~o(L&JufwS#Y~ zQA~`!PI_MG9mNbFxrD|rZ-}qUH<+U9;}FRUV=%Sa_`@-*830&nnBVGR)FwlxVxbGq1MEI!KA?S=of7JT#LDw)>wYU7%`fUs40w_b`AcF ze|Mqujp71EO7)rb4U?QVQJ#XBzy1cvK|f_O-?0I_cp8(hQTu_Jp=b-fWg4@V41BYv vMX~y@{*bf7t)xYmeM1^&g3L^DyWmgwy#ij&_|s|3hxn}s;lP!DVkiF>SyUA^ delta 14063 zcmeHt2UHYIv-Zr8lLSRrGJ=AEoTCIu5(LahP*G4!1Qiev5ET>Vl}i zYXAdC5*NBNRf z<<~j#hAimnn&~F>Sg6ZbXa7u_xpCD8Pi>vfvpsW3hEa-H?vfII;XRQUCWc{A!MiX_ z1W=JGFCdFkspbN{bK4GJ*euXJ0h9z3-HBl`fRtDaQvf^emdYruoD103Vsv;0~r2X@+gA#Vz@R{SY|#oS%y3B#2g`R zTn&F3Y|3`w*)wE5AD^A)yiFV@HW5%eXVQ2(zX1M@tfM!k)`#)4isIqPROLnL z)cSCKs-m7YRXjq7F0YEHq`r?)r>@$YOuZzlP8E-2=p&t}7Yvn2*VKB@UDAWPD8MMN zW#9=wWlQi0cwxCMfcobnG zS3nR5YP*ph&ry(^=u&2s9#1PsR&+^zksgm}6oxIIlBgiLOiAxRT1%JO74(D`MPt|! zQo<%bGnxJKLo06Tx_NpWy&!p0J)(^`9QI?F33oqeaI`yef4N*r>K+@{nt zGb4OEwIObyKqkm0P|F7FYo~IBqXpE)bTe)0$wLcy4jsacvuYSs(KpUvnFw}Lw)kg6cwvM4@_U^{R4_b1zNKP%kjX#2VBSZZ=t$8-U z0wsGSsPbF&1g?WbMwD#(+X$DSu1;7euq6@0y#7`x@Ep`+^#0PH(viuDCDlxFGTZ_o zpkB$aW>LQa^^U)*O_P4}VOTr6^YDZAAB>6Q-~3oyuLJeAzq#gU1GO1BM*?z3Cnx+b zOk-Kp%vkK?;RmfZ!y8iz!o*{;H_!wL2^YmyO1 zDjYe}kMxg0xTqcpMjT0SI9f~|;g9kfs4;wzurp(Z1ct#EGbVrj3*>-$ zbA}y>>>yh)WD?1s*#hE#b__j|YByWdiiAAn>;o&v%bD`VhuKrWE00678QF!T+PjEXjfq63iZxQik8Ft8VpB-Y2k0YH-2 zAVVGoBtImQ`Vj_>0`h>&PC8{EdB_DnBu4VWFWHu#p%4TlM=+g%VvKqUAn8~F5Wbi+ z1LXiodqsw}kJwKHltp>L8tKOjj{7?2lG5)i(a zEc_?~+5nQhIRTRKb0#YpNaQ)D3jTY^PR{qgm+Zfn?Ei!%+nnpqXQr4QzQZx^%{jiJ zB7^VqK6~y}cFA0U8SS7N-wm?B{A!kac}oo)mLD+9+;FI;nO{xaT3+Ve?98}_j@p_Z zZ^!M*OMD>CXC^qT@Q%9u!(m;agoo}$-`^i)b8?mBD)S#WdVg`v{`U`mT4AEgCbsM` zz4=(duk2}6J4JBs)t2%VuO5{*;*HT`~kQ~};89W3x ztJ^!UIxaj^g>T5=p~N%x>eaY&_5J22HOo@8FFJNhoO+T=tKP|_)3xl$_N1tv68YJD zDkmNc;w#yg25joQ-la=Rl*4}tN9O5SpS`7tP8sA3Dq4D73o^V(Nt53wdu5H7SD~L& z!aTNBZ_TWEvQPbZP?8WV*7QQ8J6S7NJH(`bQkGM5dA9;=jW{+TI01WYFuC6{KI|Vm zcR3C_NwK|L#9867=1`QMs@sfX3I@+rdnTHbOV@2laA`fTK;N12@MJ)u%bg(YmAMJR z`sRVh=M^t?f2Qwm7=@&@@UqCEq|-Fxl22v!m`>qSY-F)fcjm#PDR+Mj$(`2ToVaTs zzi)gjXXbciG%7PPo4+U%)xEqsqg$1`Dfnr?UK{j3tZ7Mce4p4)>HMK1!S!6uovX~SZp#hclbzG* z(ouvqE^anw5$*jqk=7w%d(KP0gn^a$k9j&KIE+GU9vfE}v{> z3oO1UHe*7lY{BrRhW3^<+qXzgWYb=2<8ru}K6U2RR(m{US-Hb>>SygN_gt?4k(5d| z@t=O$(W0A+3R;L-)!B)IvSZtXa8&``UKtb%xf4#~tT7)AV{AZZ<0fG|FeE3EIx@pEhE+DaPQ_y|bQk z&xdy|6XE(`xsT)Gfv7vHas8&KhvV2VbA7Vg-vxb)#fE0{Knk(HXWjb^xd>ED2$W2 z|8#VkrIEB*R`VVGv-Tlgb|_6B7mo~Jf0ef4*o?AH`)tY11IHR?f0lMf{Hp8yuf=V9 z|I+W)BI3BrY(Mj*fn_?zHdoR=m)$bepr-DQpa0e;IJc=omxbOTR(c5wOhz3I4)0LO z{jT6JcKvMzV$Q{H?Wyt6Ljnd*?f;ul-XOz5d zwd2UIgUHbU=vi)JKO7vKB*90$ae%mKTs=EgIv$9sQnJ(}0Hb|&nvh;20=2c^h z-gcJlXgIvQvs?6|lje`zN-+xt%BOM5vCxZWr8g>YPe1FzCQ;7pF3#bmRX$Z~x<>Wx z;%%*i^Q{X~eHLCt!Q6#{`jqPaWgZd30ZVI~6}b-?X1%(3d701#TUx^ z{!&zYj#=#UVOn9POOxr-TdG_y6gLYV%a`8nFk(FNKAGazCoUT+U{4vd9XXwq6j4J8Jb>V|SM@?gINKLbwao75O z)Wfz`%AC8Kwb~p4X`X!Ya&c1(v7>Iaw3@@6v$Ff@AJ}I9BNLBA6Zs$pZA=CoJD)fSBdRC;a`MTRRoq(=GBjwKYZZ( zZrYi95lgzxj(%j{v!k^he>{vljDX%&xzJtq)|O}FKeO4_TlAmMGmOd4?9)kYy0%9n zdMNhL$?d-a)OOw4pZPwKvdu{-`t_9;3l(c;=szsYE}1xZ`Zf!_qpbAOV*Br17x^WD zB~~{+k68bt%e?nx><>;tv#wEpo;%M>;qK2_nr~AncUE;8b4K#VZk)g8Z1r37s%ay6 z8rHg>CXONjW1x4%{)2^sx0Qc@w(WA1`yNT7NJyH0cx5mJ*nSXP>aOko; zo8+OU{pY*F+P-aazN??8sA2zQEv49Yfl0!-!bi1+<9iggH$8`cRnFgQrs?4{)p6S2Ieb|&^;bmfOg0M<>Y8&VI z;`NkUjpf98_Gl{sM;3a=Sn18^cA2?kKrd=~C;!v@nWn9;>%zKyxmWItJ{|Zdc-+wL zJ2$oCsh2h7neT&sO^M@rCwE`Gurj7Ok7%exU4*SCO*~|6A?*DQ9AMB8>uzR&2P{JZrTF`NBjqx^tc8Q-G_Id90i5Bgqj6SLO z8s=@YtMy~uSbvs&(A}?d*+t%}soh4>rntY^+b@N*I~lK5b?q%3IqcEVxEF1eh zSI;zb;?Ka6P-2;aR$YDJ* zyR3Uljdz`hsXS6K12RT)((ae?@)cNlXqj($)tX^Hn$Uga)(lFDPRh`8cK5;6XBs*V zEP0Xlg_3`UQ(NJx-o{*yqdo$gJfjOk%JQewReHME`#p<_vw>lbL3sev>a43X571RbcYTLz4NT}W_cdZ7vrw;p9ov~ zvahz7zjW{oE-0QONJ#w@?4BcF<(46Fk*)0%MRrTf9PwL=mAdmYRwq3)mLJRTY`Xu( z;CU=Ev4FFEz#QRtCwsP=4T9VywSj}|zwn;yRP8tM`9 z(MP@wI-W04RQaf7erf$gMU`zzs?e>%dDXa%>VK#WGKq}E1Uhb-j^a@`qmN=!#oC83P-q~tf-%pn0$JalTGO! z<>h)`bpt*2ODlY*Z3u9*ZhPr|$Uv2odeRg*E`mYk$d2a+>L1>@CVPv2^AYaYm~tO0 z@77IWZow%rL88&CiZh*ued-^~n^ADk{u7s2+ew~`N_NTV-9ve*E5x~#ZlmQ^;N|=c zkNVr*QFH3nMjl(%u)?$9n!>gr+bp2h6E9-V-?n}9LuVky zJlOAP$d;$_doTAKt-iM1J3{(Ha-H&kWhuq#XHYVA-kaW>*C)!l(*+Ln)~P?^UX&U_ zLjpEHZz1>VQj>K#jhFj*q<3ZY**$6)Z*k)fQIn42ZO`WNHrVF=Q2i{|m&2P1d@k0M zFF(+L2|NxxerDJF&Qu<+2b;)$bC_4{d93s-B`M!$+xQDbdWfwm;(GaxO-=V~xb(<{ zS-E51e6$0%ZhzQdE3Q{ZNqpHn?XJU|C4tr7<_*8N#2v=z9#*(ec|VRGko5G=WUk)A zCbFQasVI$?P0Y^j6yE~HLuX6v3M!x1wJ2{@PL8wuSXC2;W^MKQ6#d9xpOa$Kb6QrH zyhH${GcyjJHcZjG!%DBPMk*%whWOEIUC)-#8lvwBdCSQ2#+eAFjTYY!`|w>YE#QdH z)_Fygc;}Xiw2q>c4YExE7A@^2{;i8o?|QZ<;RD)c3-ku>RSeRKGGrzOtEY!6Ze7T37bf`q#-0&$)bawfbi(?hPjz?Ua#X^t9 zO3yacEUCU`!fR9D4z@}^1&xB@y60gE`#cxdBzL%oDHI!o1Q@k^@1!6DKRM?x|H889 ztG6@lLi{&w7OUpuE$l={+77miPdTOja?a-_e67=7HYN+6?#sNAySZJcY1GoeU%-;0 zbGYO?CXyAD?yQ<4WHEiX#wKn*k5^!Ag7mC(4#zrPJG9FVZV)bi7b%$~HRy9q%I(>q zrnF#(LcA;9;jP-mZI}>aJ3f+)Tj)Dlk9IhMDg9N-d}0 z`Nq^76tWJ|s>k#UTiGjPji{iyQHc{D-YP4ub9(-ERm_;H^{<8Up}~(cGfz=DDS4OJ z))*vk{`&CZrfpe#b3`*eN1Nx|b=R@=@h*NRD7Hg@X>a^EyKS6lb%+0(>F zsjavA&9=|XnO<0MF2dl8d_vH}1FQBojq}RSmS??&EMleSb*yx*=Oby+vgYvzw%m3m zUn-t#u!_=&Q7?{C8EZd*{)j*46>F(OY5!w!hFUi2+3?+YZ)?u^dHUYMx^8MduToJn zNv|4PIpO>*jrZr;IEU}j9xFmc-LABa?wzGS+txKlK~}1^f^)?_mkW{&HsYJKbKEzm z7@Up%pj_J#JHT7{A>s%5zbte8m$1@{VK*J;UOBRAQ)GmG&b3D0p-_Yr_HPJS@ZgVx zi0z64Qu>@DS{j{MNGv#E zSUcXg&9?B+_6-w8d*(@P3R9r$t4r_{SpP8kyI+I*%Yt8hJtr>PeHDIgbqsxWggIF6 zb~Y)%&xV7o>Bx9vgYKJsRkAB;U(voF7z**pGP=`gasVqZoN&Fk#_9I0#J!xS9`ncx z`nIaS>Aw27IHo**goR!yE4}NV0#6L1BMs`@Kibn4(?qoHT%Rl3S{tIY(Bg`Mk>T^h zZ!a(HN_tIc;dwbPf}#;7zL_l{yu4}2nT;<}_v&YqB9!I?^k#Zate5Coy6KFM!_dG& zx1{~&RzEmpvHbG8LeWd5)#V0uqOSr?oyWs+gIjoB^jX9&pIf|Aa(?%OX2(&B_bZm! zv(S6SN^j75)XQFAf3iSC)LG#J`yTv==_qs9U@y~rKl#6~sn5EHK(lEV#!X^u#;MWJJ2z729s z7D)@P2k*ETpbT+2`jYtk{l^G4FG`u=%LM~ho^W>ber&6GZV@X{UxW;tLd%M8jh@^+ z|3dKk66<*^qpV^brNRE}TEW^pg=e4Bg&H2MbDRh@+IwyPaLTQLbZry;G^M_}ptm+% z%P0>=1^a%Tz9D?MSnVlxp={?`<@N%bR%h4uNWc~7aTM0}mBv*~fAz=d^XaC1Y^yDL z{Li;dPq98OzVTi^TSlS1&TYYOLL8}rP04F=MC31j5?S=_+S;|94-S5dKOQrkgE`D?5$xu1Z+9P7=n1f>G07kAlXf|=#`Z$3Do<`%-`#BP2uhLvmZE7ht#ZcB zZ=D`7wD8E0kUJA<5}m)`UJvM*Wj)rAwL5V@XNQki)0Qw3ojYx3I}^Tn6{uE)MI>AG zsA_B)@xp$dG4b1zb>x9kc=n^&9b?aJbb=$_1V^eZS?WL&U5b~=ECki%-Yh(Vbd69=Ac}ukq<7wv1;!d^k)# z%pwOvlDCKA0L20dHx%-5+DRy9+LH- zL%%vPlMN;eeOf_CKtF1RbR+qdz|H_U8-t{e_G$xYpbT9w(O}MOu?8Uqe&jp}DQNda zmykq4nuG(cg@l*V&CRXAhaO7Sr2Dv{35~T7t^;;$noy&IhBXOafD#R2CfGUa5;Dl& z1XAgy79q-1IO`DFNL-uP2py9&kkV?p*PGhJCup0gOYf?X1XfR4&|XRx+%x>1(}h+V z=+AcGLk98c5gIT=#%O>9*)+oh=4eV;X-w$iy2#v>9xA=R*fN^X>0i-@ZZwgG6Ig1f zYSZIK=wrym4st+4_b;KNH(6^yh)%8;M$8#eCZocp2YJN^T)P3G&K!-pAw7OB#^4pa z|CJayC2Ts_9+YVeDG3HFZ7GUTlW!-Ms@!XT82vPZnIC_R9_z3wjR-O3Fi;H{%BgwR z(1!UW>lxwV1j-mjMUu8KBa;lEMqQiGMSVaQ!pPVd7yZ%h78JDM5vKfI3zT1fZ1~- z#f4UMN-)h*Xtxz4E^|XG{LU+8h!}BT zic9PW9VU;-fwJZ)Get?~zXS7MI%VnOpIVW-2qT~H3bf0fP@S}b07@cdSD2@%)j%!- zmK0O_CwJsf{vo;yCw~hn?tzj3*=W-#Z&0*MTz{%U%O- zj(d2)(j0@0RFe4~ds5!w5}h#kZ9w0lUH~id zl8lVT|EsVuu9hm@3Ch17J>d7RT*{EaL#``e=b!Y$T=2||=dTIMl2eS;;!5AW{=X~{ z1X;aU@;|u}|KB)u?0OjEYKU>E08=Q1l)VUTM&kc#Ip69<_|oA1#}>y+X_wa#w|Q|* zn&(zx2OnTXFi~KK)@>u?XxSx15{~2>2nCe>gy2SVo)99mphv``X>cE*S52hh+O*fz zgd-wgNJn+8z{;$f5S=`V=q6Mq zeJP>pZes4FR-p&Zm2^v=9vDeT6Cr>KdWh+?+#bjgu=nqUP*%PsIMLc(f(Hro66+_m zTL7ARi5KK%2lF%sMg4$zs{25EWA^g(BVmMuI*EDQfg9CS)Vy~DqQsYADEAQ@C47Pr z>Y<~#FvFk_M2(+dF7;5!c1RMTYDg0G&jjo@bm#L(fdk3BhKA{%;k*QQq0r|%)b^P$ zByUCg2v4-U8>Tj@A3}V+55l00%KL~?0J>q(7eZsI6$pGt=_?^I<({rd{0akz{0bW_ zvsb$M)mK9Q&l=NyLgvq!+hn&{{lx4)t0lidM*`mn=|8I<)DR*l=o_I(s!x6+Xn(Gb z9e^7%I`8=?Y#1E>7$6jJZJN{|WB|jIQL|@=m<6V3LqynQ3;Hy0(`>#IYy{KC?w=6e zs!nEHZ~i1q{_I5smKoC+I)qymdU&l_U7|T7L diff --git a/link-scrapers/carrefour.ts b/link-scrapers/carrefour.ts index 6779c81..ce92306 100644 --- a/link-scrapers/carrefour.ts +++ b/link-scrapers/carrefour.ts @@ -1,6 +1,6 @@ import pMap from "p-map"; -import { decodeXML } from "entities"; import { saveUrls } from "db-datos/urlHelpers.js"; +import { getUrlsFromSitemap } from "./common.js"; export async function scrapCarrefourProducts() { await scrapBySitemap(); @@ -26,17 +26,7 @@ async function scrapBySitemap() { async (sitemapUrl) => { const res = await fetch(sitemapUrl); const xml = await res.text(); - let urls = new Set(); - new HTMLRewriter() - .on("loc", { - text(element) { - const txt = element.text.trim(); - if (!txt) return; - urls.add(decodeXML(txt)); - }, - }) - .transform(new Response(xml)); - saveUrls(Array.from(urls)); + saveUrls(getUrlsFromSitemap(xml)); }, { concurrency: 3 } ); diff --git a/link-scrapers/common.ts b/link-scrapers/common.ts new file mode 100644 index 0000000..f4107a0 --- /dev/null +++ b/link-scrapers/common.ts @@ -0,0 +1,14 @@ +import { decodeXML } from "entities"; +export function getUrlsFromSitemap(xml: string) { + let urls = new Set(); + new HTMLRewriter() + .on("loc", { + text(element) { + const txt = element.text.trim(); + if (!txt) return; + urls.add(decodeXML(txt)); + }, + }) + .transform(new Response(xml)); + return Array.from(urls); +} diff --git a/link-scrapers/dia.ts b/link-scrapers/dia.ts index 5d77c52..5b469f6 100644 --- a/link-scrapers/dia.ts +++ b/link-scrapers/dia.ts @@ -1,7 +1,7 @@ import pMap from "p-map"; -import { decodeXML } from "entities"; import { parseHTML } from "linkedom"; import { saveUrls } from "db-datos/urlHelpers.js"; +import { getUrlsFromSitemap } from "./common.js"; const categorias = [ "https://diaonline.supermercadosdia.com.ar/almacen", @@ -81,21 +81,15 @@ async function scrapBySitemap() { "https://diaonline.supermercadosdia.com.ar/sitemap/product-5.xml", ]; - await pMap(sitemaps, async (sitemapUrl) => { - const res = await fetch(sitemapUrl); - const xml = await res.text(); - let urls = new Set(); - new HTMLRewriter() - .on("loc", { - text(element) { - const txt = element.text.trim(); - if (!txt) return; - urls.add(decodeXML(txt)); - }, - }) - .transform(new Response(xml)); - saveUrls(Array.from(urls)); - }); + await pMap( + sitemaps, + async (sitemapUrl) => { + const res = await fetch(sitemapUrl); + const xml = await res.text(); + saveUrls(getUrlsFromSitemap(xml)); + }, + { concurrency: 3 } + ); } async function scrapBySite() { diff --git a/link-scrapers/package.json b/link-scrapers/package.json index ce7f074..9ae66f9 100644 --- a/link-scrapers/package.json +++ b/link-scrapers/package.json @@ -11,6 +11,7 @@ "author": "", "license": "ISC", "dependencies": { + "entities": "^4.5.0", "linkedom": "^0.16.5", "p-queue": "^8.0.1" } diff --git a/scraper/package.json b/scraper/package.json index a3351af..edaf0ca 100644 --- a/scraper/package.json +++ b/scraper/package.json @@ -17,7 +17,6 @@ "date-fns": "^3.0.6", "db-datos": "workspace:^", "drizzle-orm": "=0.29.1", - "entities": "^4.5.0", "linkedom": "^0.16.5", "nanoid": "^5.0.4", "p-map": "^7.0.1",