This repository has been archived on 2024-02-11. You can view files and clone it, but cannot push or open issues or pull requests.
sitio/index.11tydata.js

72 lines
1.8 KiB
JavaScript
Raw Normal View History

2023-11-12 09:37:38 +00:00
const { parseFeed: _parseFeed } = require("htmlparser2");
const { parseDocument } = require("htmlparser2");
const { getElementsByTagName } = require("domutils");
const { feeds, readFeed } = require("./feeds.js");
2023-04-16 18:58:25 +00:00
2023-11-12 09:37:38 +00:00
module.exports = async () => {
2023-04-16 18:58:25 +00:00
const articles = [];
for (const [name, baseUrl] of Object.entries(feeds)) {
2023-04-16 23:40:10 +00:00
const rawFeed = await readFeed(name);
2023-05-04 12:08:13 +00:00
const { title, item, link } = parseFeed(baseUrl, rawFeed);
2023-04-16 18:58:25 +00:00
2023-11-12 09:37:38 +00:00
articles.push({ title, item, link, baseUrl });
2023-04-16 18:58:25 +00:00
}
2023-11-12 09:37:38 +00:00
return { articles };
2023-04-16 18:58:25 +00:00
};
/**
* parsea un feed de rss encontrando cosas que htmlparser2 solo no encuentra
2023-05-04 12:08:13 +00:00
* @param {string} feedUrl
2023-04-16 18:58:25 +00:00
* @param {string} rawFeed
*/
2023-05-04 12:08:13 +00:00
function parseFeed(feedUrl, rawFeed) {
2023-04-16 18:58:25 +00:00
const feed = _parseFeed(rawFeed);
const item = feed?.items[0];
const dom = parseDocument(rawFeed);
2023-04-16 23:35:12 +00:00
const feedDom = getElementsByTagName(
(n) => n === "rss" || n === "feed" || n === "rdf:RDF",
dom.childNodes,
2023-11-12 09:37:38 +00:00
false
2023-04-16 23:35:12 +00:00
)[0];
2023-05-04 12:08:13 +00:00
const linksDom = getElementsByTagName(
(t) => ["link", "atom:link"].includes(t),
feedDom.childNodes,
2023-11-12 09:37:38 +00:00
false
2023-05-04 12:08:13 +00:00
);
2023-04-16 23:35:12 +00:00
const linkDom = linksDom.find(
(d) =>
d.attribs.rel === "alternate" ||
// https://datatracker.ietf.org/doc/html/rfc4287#section-4.2.7.2
// >If the "rel" attribute is not present, the link element MUST be interpreted as if the link relation type is "alternate".
2023-11-12 09:37:38 +00:00
!("rel" in d.attribs)
2023-04-16 23:35:12 +00:00
);
2023-05-04 12:08:13 +00:00
const feedUrll = new URL(feedUrl);
let link = feedUrll.origin;
if (linkDom?.attribs?.href) {
const hrefUrl = new URL(linkDom.attribs.href, feedUrl);
link = hrefUrl.toString();
}
2023-04-16 18:58:25 +00:00
if (
!feed ||
!feed.link ||
!feed.title ||
!item ||
!item.link ||
!item.title ||
!item.pubDate ||
2023-05-04 12:08:13 +00:00
!link
2023-04-16 18:58:25 +00:00
) {
throw "no pude parsear";
}
return {
title: feed.title,
2023-05-04 12:08:13 +00:00
link,
2023-04-16 18:58:25 +00:00
item: { title: item.title, link: item.link, pubDate: item.pubDate },
};
}