2023-12-09 19:53:49 +00:00
|
|
|
import z from "zod";
|
2023-12-19 15:01:37 +00:00
|
|
|
import pMap from "p-map";
|
2023-12-09 19:53:49 +00:00
|
|
|
import { basename } from "path";
|
2023-12-19 15:01:37 +00:00
|
|
|
import { customRequest } from "./network.js";
|
2023-12-09 19:53:49 +00:00
|
|
|
|
|
|
|
const zCkanPackageList = z.object({
|
|
|
|
success: z.literal(true),
|
|
|
|
result: z.array(z.string()),
|
|
|
|
});
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {string} url
|
|
|
|
*/
|
|
|
|
async function getJson(url) {
|
2023-12-19 15:01:37 +00:00
|
|
|
const res = await customRequest(new URL(url));
|
2023-12-16 14:27:51 +00:00
|
|
|
const json = await res.body.json();
|
2023-12-09 19:53:49 +00:00
|
|
|
return json;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* descarga una lista de los names de los datasets
|
|
|
|
* @param {string} ckanUrl
|
|
|
|
* @returns {Promise<string[]>}
|
|
|
|
*/
|
|
|
|
async function getCkanPackageList(ckanUrl) {
|
|
|
|
const json = await getJson(`${ckanUrl}/api/3/action/package_list`);
|
|
|
|
return zCkanPackageList.parse(json).result;
|
|
|
|
}
|
|
|
|
|
|
|
|
const zCkanOrganization = z.object({
|
|
|
|
name: z.string(),
|
|
|
|
title: z.string(),
|
|
|
|
id: z.string(),
|
|
|
|
created: z.string(),
|
|
|
|
});
|
|
|
|
const zCkanResource = z.object({
|
|
|
|
id: z.string(),
|
|
|
|
name: z.string(),
|
|
|
|
description: z.string(),
|
|
|
|
format: z.string(),
|
|
|
|
url: z.string(),
|
|
|
|
});
|
|
|
|
const zCkanTag = z.object({
|
|
|
|
id: z.string(),
|
|
|
|
display_name: z.string(),
|
|
|
|
name: z.string(),
|
|
|
|
});
|
|
|
|
const zCkanGroup = z.object({
|
|
|
|
id: z.string(),
|
|
|
|
display_name: z.string(),
|
|
|
|
name: z.string(),
|
|
|
|
description: z.string(),
|
|
|
|
});
|
|
|
|
const zCkanPackage = z.object({
|
|
|
|
license_title: z.string(),
|
|
|
|
license_id: z.string(),
|
2023-12-09 20:09:40 +00:00
|
|
|
license_url: z.string().optional(),
|
2023-12-09 19:53:49 +00:00
|
|
|
maintainer: z.string(),
|
|
|
|
maintainer_email: z.string(),
|
|
|
|
id: z.string(),
|
|
|
|
name: z.string(),
|
|
|
|
title: z.string(),
|
|
|
|
metadata_created: z.string(),
|
|
|
|
metadata_modified: z.string(),
|
|
|
|
author: z.string(),
|
|
|
|
author_email: z.string(),
|
|
|
|
resources: z.array(zCkanResource),
|
|
|
|
tags: z.array(zCkanTag),
|
|
|
|
groups: z.array(zCkanGroup),
|
|
|
|
organization: zCkanOrganization,
|
|
|
|
url: z.string(),
|
|
|
|
notes: z.string(),
|
|
|
|
});
|
|
|
|
const zCkanPackageShow = z.object({
|
|
|
|
success: z.literal(true),
|
|
|
|
result: zCkanPackage,
|
|
|
|
});
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {string} ckanUrl
|
|
|
|
* @param {string} packageName
|
|
|
|
*/
|
|
|
|
async function getCkanPackage(ckanUrl, packageName) {
|
|
|
|
const json = await getJson(
|
|
|
|
`${ckanUrl}/api/3/action/package_show?id=${encodeURIComponent(packageName)}`
|
|
|
|
);
|
|
|
|
return zCkanPackageShow.parse(json).result;
|
|
|
|
}
|
|
|
|
|
|
|
|
const zCkanStatusShow = z.object({
|
|
|
|
success: z.literal(true),
|
|
|
|
result: z.object({
|
|
|
|
site_url: z.string().describe("Titulo del portal. A veces vacio."),
|
|
|
|
site_description: z
|
|
|
|
.string()
|
|
|
|
.describe("Descripción del portal. A veces vacio."),
|
|
|
|
site_title: z.string(),
|
2023-12-09 20:09:40 +00:00
|
|
|
error_emails_to: z.string().nullable(),
|
2023-12-09 19:53:49 +00:00
|
|
|
}),
|
|
|
|
});
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Consigue información general sobre el portal
|
|
|
|
* @param {string} ckanUrl
|
|
|
|
*/
|
|
|
|
async function getCkanInfo(ckanUrl) {
|
|
|
|
const json = await getJson(`${ckanUrl}/api/3/action/status_show`);
|
|
|
|
return zCkanStatusShow.parse(json).result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Genera un data.json a partir de un CKAN que quizás no tiene un data.json oficial.
|
|
|
|
* @param {string} ckanUrl
|
|
|
|
*/
|
|
|
|
export async function generateDataJsonFromCkan(ckanUrl) {
|
|
|
|
const list = await getCkanPackageList(ckanUrl);
|
|
|
|
const info = await getCkanInfo(ckanUrl);
|
2023-12-19 15:01:37 +00:00
|
|
|
const packages = await pMap(list, (link) => getCkanPackage(ckanUrl, link), {
|
|
|
|
concurrency: 12,
|
|
|
|
});
|
2023-12-09 19:53:49 +00:00
|
|
|
/** @type {import("common/schema.js").Data & { generatedBy: string }} */
|
|
|
|
const data = {
|
|
|
|
generatedBy:
|
|
|
|
"archivador de datos abiertos (ckan_to_datajson) <https://github.com/catdevnull/transicion-desordenada-diablo>",
|
|
|
|
title: info.site_title || ckanUrl,
|
|
|
|
description: info.site_description || "",
|
|
|
|
homepage: info.site_url || ckanUrl,
|
|
|
|
dataset: packages.map((p) => ({
|
|
|
|
title: p.title,
|
|
|
|
description: p.notes,
|
|
|
|
identifier: p.id,
|
|
|
|
publisher: {
|
|
|
|
name: p.maintainer,
|
|
|
|
mbox: p.maintainer_email,
|
|
|
|
},
|
|
|
|
landingPage: p.url,
|
|
|
|
distribution: p.resources.map((r) => ({
|
|
|
|
identifier: r.id,
|
|
|
|
title: r.name,
|
|
|
|
description: r.description,
|
|
|
|
fileName: basename(r.url),
|
|
|
|
format: r.format,
|
|
|
|
downloadURL: r.url,
|
|
|
|
})),
|
|
|
|
})),
|
|
|
|
};
|
|
|
|
return data;
|
|
|
|
}
|