Cat /dev/Nulo
895201f3ab
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
para el de copiona
72 lines
1.8 KiB
JavaScript
72 lines
1.8 KiB
JavaScript
const { parseFeed: _parseFeed } = require("htmlparser2");
|
|
const { parseDocument } = require("htmlparser2");
|
|
const { getElementsByTagName } = require("domutils");
|
|
const { feeds, readFeed } = require("./feeds.js");
|
|
|
|
module.exports = async () => {
|
|
const articles = [];
|
|
|
|
for (const [name, baseUrl] of Object.entries(feeds)) {
|
|
const rawFeed = await readFeed(name);
|
|
const { title, item, link } = parseFeed(baseUrl, rawFeed);
|
|
|
|
articles.push({ title, item, link, baseUrl });
|
|
}
|
|
|
|
return { articles };
|
|
};
|
|
|
|
/**
|
|
* parsea un feed de rss encontrando cosas que htmlparser2 solo no encuentra
|
|
* @param {string} feedUrl
|
|
* @param {string} rawFeed
|
|
*/
|
|
function parseFeed(feedUrl, rawFeed) {
|
|
const feed = _parseFeed(rawFeed);
|
|
const item = feed?.items[0];
|
|
|
|
const dom = parseDocument(rawFeed);
|
|
const feedDom = getElementsByTagName(
|
|
(n) => n === "rss" || n === "feed" || n === "rdf:RDF",
|
|
dom.childNodes,
|
|
false
|
|
)[0];
|
|
const linksDom = getElementsByTagName(
|
|
(t) => ["link", "atom:link"].includes(t),
|
|
feedDom.childNodes,
|
|
false
|
|
);
|
|
const linkDom = linksDom.find(
|
|
(d) =>
|
|
d.attribs.rel === "alternate" ||
|
|
// https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7.2
|
|
// >If the "rel" attribute is not present, the link element MUST be interpreted as if the link relation type is "alternate".
|
|
!("rel" in d.attribs)
|
|
);
|
|
|
|
const feedUrll = new URL(feedUrl);
|
|
let link = feedUrll.origin;
|
|
if (linkDom?.attribs?.href) {
|
|
const hrefUrl = new URL(linkDom.attribs.href, feedUrl);
|
|
link = hrefUrl.toString();
|
|
}
|
|
if (
|
|
!feed ||
|
|
!feed.link ||
|
|
!feed.title ||
|
|
!item ||
|
|
!item.link ||
|
|
!item.title ||
|
|
!item.pubDate ||
|
|
!link
|
|
) {
|
|
throw "no pude parsear";
|
|
}
|
|
|
|
return {
|
|
title: feed.title,
|
|
link,
|
|
item: { title: item.title, link: item.link, pubDate: item.pubDate },
|
|
};
|
|
}
|