2021-10-20 16:08:21 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
# Procesar una lista de URIs para una lista de dominios. Esto nos
|
2021-10-26 14:33:15 +00:00
|
|
|
# permite procesar estadísticas a demanda.
|
|
|
|
#
|
|
|
|
# Hay varias cosas acá que van a convertirse en métodos propios, como la
|
|
|
|
# detección de URIs de un sitio (aunque la versión actual detecta todas
|
|
|
|
# las páginas y no solo las de posts como tenemos planeado, hay que
|
|
|
|
# resolver eso).
|
|
|
|
#
|
|
|
|
# Los hostnames de un sitio van a poder obtenerse a partir de
|
|
|
|
# Site#hostnames con la garantía de que son únicos.
|
|
|
|
class UriCollectionJob < PeriodicJob
|
|
|
|
# Ignoramos imágenes porque suelen ser demasiadas y no aportan a las
|
|
|
|
# estadísticas.
|
|
|
|
IMAGES = %w[.png .jpg .jpeg .gif .webp].freeze
|
|
|
|
STAT_NAME = 'uri_collection_job'
|
|
|
|
|
|
|
|
def perform(site_id:, once: true)
|
|
|
|
@site = Site.find site_id
|
2021-10-20 16:08:21 +00:00
|
|
|
|
2022-04-23 20:50:36 +00:00
|
|
|
# Recordar la última vez que se corrió la tarea
|
|
|
|
stat = site.stats.create! name: STAT_NAME
|
|
|
|
|
2021-10-20 16:08:21 +00:00
|
|
|
hostnames.each do |hostname|
|
|
|
|
uris.each do |uri|
|
2022-04-23 20:50:36 +00:00
|
|
|
next if stop?
|
2021-10-26 14:33:15 +00:00
|
|
|
|
|
|
|
AccessLog.where(host: hostname, uri: uri)
|
|
|
|
.where('created_at >= ?', beginning_of_interval)
|
|
|
|
.completed_requests
|
|
|
|
.non_robots
|
|
|
|
.group(:host, :uri)
|
|
|
|
.rollup('host|uri', interval: starting_interval, update: true)
|
|
|
|
|
|
|
|
# Reducir las estadísticas calculadas aplicando un rollup sobre el
|
|
|
|
# intervalo más amplio.
|
|
|
|
Stat::INTERVALS.reduce do |previous, current|
|
|
|
|
Rollup.where(name: 'host|uri', interval: previous)
|
|
|
|
.where_dimensions(host: hostname, uri: uri)
|
|
|
|
.group("dimensions->'host'", "dimensions->'uri'")
|
|
|
|
.rollup('host|uri', interval: current, update: true) do |rollup|
|
|
|
|
rollup.sum(:value)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Devolver el intervalo actual
|
|
|
|
current
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-04-23 20:50:36 +00:00
|
|
|
stat.touch
|
2021-10-26 14:33:15 +00:00
|
|
|
|
|
|
|
run_again! unless once
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def stat_name
|
|
|
|
STAT_NAME
|
|
|
|
end
|
|
|
|
|
|
|
|
# @return [String]
|
|
|
|
#
|
|
|
|
# TODO: Cambiar al mergear origin-referer
|
|
|
|
def destination
|
|
|
|
@destination ||= site.deploys.find_by(type: 'DeployLocal').destination
|
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: Cambiar al mergear origin-referer
|
|
|
|
#
|
|
|
|
# @return [Array]
|
|
|
|
def hostnames
|
|
|
|
@hostnames ||= site.deploys.map do |deploy|
|
|
|
|
case deploy
|
|
|
|
when DeployLocal
|
|
|
|
site.hostname
|
|
|
|
when DeployWww
|
|
|
|
deploy.fqdn
|
|
|
|
when DeployAlternativeDomain
|
|
|
|
deploy.hostname.dup.tap do |h|
|
|
|
|
h.replace(h.end_with?('.') ? h[0..-2] : "#{h}.#{Site.domain}")
|
|
|
|
end
|
|
|
|
when DeployHiddenService
|
|
|
|
deploy.onion
|
|
|
|
end
|
|
|
|
end.compact
|
|
|
|
end
|
|
|
|
|
|
|
|
# Recolecta todas las URIs menos imágenes
|
|
|
|
#
|
|
|
|
# @return [Array]
|
|
|
|
def uris
|
|
|
|
@uris ||= Dir.chdir destination do
|
|
|
|
(Dir.glob('**/*.html') + Dir.glob('public/**/*').reject do |p|
|
|
|
|
File.directory? p
|
|
|
|
end.reject do |p|
|
|
|
|
p = p.downcase
|
2021-10-20 16:08:21 +00:00
|
|
|
|
2021-10-26 14:33:15 +00:00
|
|
|
IMAGES.any? do |i|
|
|
|
|
p.end_with? i
|
|
|
|
end
|
|
|
|
end).map do |uri|
|
|
|
|
"/#{uri}"
|
2021-10-20 16:08:21 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|