transicion-desordenada-diablo/downloader/network.js

123 lines
3.5 KiB
JavaScript
Raw Normal View History

2023-12-18 15:13:09 +00:00
import { Dispatcher, request, Agent } from "undici";
2023-12-16 14:15:27 +00:00
import pLimit from "p-limit";
import { userAgent } from "./config.js";
import pThrottle from "p-throttle";
2023-12-16 14:15:27 +00:00
2023-12-18 15:13:09 +00:00
const dispatcher = new Agent({
connect: { timeout: 60 * 1000 },
2023-12-18 15:13:09 +00:00
bodyTimeout: 15 * 60 * 1000,
maxRedirections: 20,
});
2023-12-16 14:15:27 +00:00
export class StatusCodeError extends Error {
/**
* @param {number} code
*/
constructor(code) {
super(`Status code: ${code}`);
this.code = code;
}
}
export class TooManyRedirectsError extends Error {}
/** key es host
* @type {Map<string, <Argument extends unknown, ReturnType>(
fn: (arguments_: Argument) => PromiseLike<ReturnType>) => Promise<ReturnType>>} */
2023-12-16 14:15:27 +00:00
const limiters = new Map();
const nConnections = process.env.N_THREADS
? parseInt(process.env.N_THREADS)
: 8;
const REPORT_RETRIES = process.env.REPORT_RETRIES === "true" || false;
2023-12-16 14:15:27 +00:00
/**
* @argument {URL} url
* @argument {number} attempts
* @returns {Promise<Dispatcher.ResponseData>}
2023-12-16 14:15:27 +00:00
*/
export async function customRequestWithLimitsAndRetries(url, attempts = 0) {
try {
return await _customRequestWithLimits(url);
} catch (error) {
// algunos servidores usan 403 como coso para decir "calmate"
// intentar hasta 15 veces con 15 segundos de por medio
if (
error instanceof StatusCodeError &&
((error.code === 403 && url.host === "minsegar-my.sharepoint.com") ||
(error.code === 503 && url.host === "cdn.buenosaires.gob.ar")) &&
attempts < 15
) {
if (REPORT_RETRIES)
console.debug(`reintentando(status)[${attempts}] ${url.toString()}`);
await wait(15000 + Math.random() * 10000);
2023-12-16 14:15:27 +00:00
return await customRequestWithLimitsAndRetries(url, attempts + 1);
}
// si no fue un error de http, reintentar hasta 3 veces con ~10 segundos de por medio
else if (
!(error instanceof StatusCodeError) &&
!(error instanceof TooManyRedirectsError) &&
attempts < 7
) {
if (REPORT_RETRIES)
console.debug(`reintentando[${attempts}] ${url.toString()}`);
2023-12-16 14:15:27 +00:00
await wait(5000 + Math.random() * 10000);
return await customRequestWithLimitsAndRetries(url, attempts + 1);
} else throw error;
}
}
/** @argument {number} ms */
function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* @param {URL} url
2023-12-18 15:17:31 +00:00
* @returns {Promise<Dispatcher.ResponseData>}
2023-12-16 14:15:27 +00:00
*/
2023-12-18 15:17:31 +00:00
function _customRequestWithLimits(url) {
2023-12-16 14:15:27 +00:00
let limit = limiters.get(url.host);
if (!limit) {
if (url.host === "cdn.buenosaires.gob.ar") {
// tenemos que throttlear en este host porque tiene un rate limit.
// de todas maneras descarga rápido
limit = pThrottle({ limit: 3, interval: 1000 })((x) => x());
} else {
limit = pLimit(nConnections);
}
2023-12-16 14:15:27 +00:00
limiters.set(url.host, limit);
}
2023-12-18 15:17:31 +00:00
return limit(() => _customRequest(url));
2023-12-16 14:15:27 +00:00
}
/**
2023-12-18 15:13:09 +00:00
* genera los headers para hacer un pedido dependiendo de la url
2023-12-16 14:15:27 +00:00
* @param {URL} url
*/
2023-12-18 15:13:09 +00:00
function getHeaders(url) {
2023-12-16 14:15:27 +00:00
// sharepoint no le gusta compartir a bots lol
const spoofUserAgent = url.host.endsWith("sharepoint.com");
2023-12-18 15:13:09 +00:00
return {
"User-Agent": spoofUserAgent
? "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0"
: userAgent,
};
}
/**
* @param {URL} url
*/
async function _customRequest(url) {
const res = await request(url.toString(), {
2023-12-18 15:13:09 +00:00
headers: getHeaders(url),
dispatcher,
2023-12-16 14:15:27 +00:00
});
if (res.statusCode >= 300 && res.statusCode <= 399)
throw new TooManyRedirectsError();
if (res.statusCode < 200 || res.statusCode > 299)
throw new StatusCodeError(res.statusCode);
2023-12-16 14:15:27 +00:00
return res;
}