2021-10-20 16:08:21 +00:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
# Procesar una lista de URIs para una lista de dominios. Esto nos
|
2021-10-26 14:33:15 +00:00
|
|
|
# permite procesar estadísticas a demanda.
|
|
|
|
#
|
|
|
|
# Hay varias cosas acá que van a convertirse en métodos propios, como la
|
|
|
|
# detección de URIs de un sitio (aunque la versión actual detecta todas
|
|
|
|
# las páginas y no solo las de posts como tenemos planeado, hay que
|
|
|
|
# resolver eso).
|
|
|
|
#
|
|
|
|
# Los hostnames de un sitio van a poder obtenerse a partir de
|
|
|
|
# Site#hostnames con la garantía de que son únicos.
|
|
|
|
class UriCollectionJob < PeriodicJob
|
|
|
|
# Ignoramos imágenes porque suelen ser demasiadas y no aportan a las
|
|
|
|
# estadísticas.
|
2022-04-23 21:56:34 +00:00
|
|
|
IMAGES = %w[.png .jpg .jpeg .gif .webp .jfif].freeze
|
2021-10-26 14:33:15 +00:00
|
|
|
STAT_NAME = 'uri_collection_job'
|
|
|
|
|
|
|
|
def perform(site_id:, once: true)
|
|
|
|
@site = Site.find site_id
|
2021-10-20 16:08:21 +00:00
|
|
|
|
2022-04-23 20:50:36 +00:00
|
|
|
# Recordar la última vez que se corrió la tarea
|
|
|
|
stat = site.stats.create! name: STAT_NAME
|
|
|
|
|
2021-10-20 16:08:21 +00:00
|
|
|
hostnames.each do |hostname|
|
|
|
|
uris.each do |uri|
|
2022-04-23 20:50:36 +00:00
|
|
|
next if stop?
|
2021-10-26 14:33:15 +00:00
|
|
|
|
|
|
|
AccessLog.where(host: hostname, uri: uri)
|
|
|
|
.where('created_at >= ?', beginning_of_interval)
|
|
|
|
.completed_requests
|
|
|
|
.non_robots
|
|
|
|
.group(:host, :uri)
|
|
|
|
.rollup('host|uri', interval: starting_interval, update: true)
|
|
|
|
|
|
|
|
# Reducir las estadísticas calculadas aplicando un rollup sobre el
|
|
|
|
# intervalo más amplio.
|
|
|
|
Stat::INTERVALS.reduce do |previous, current|
|
|
|
|
Rollup.where(name: 'host|uri', interval: previous)
|
|
|
|
.where_dimensions(host: hostname, uri: uri)
|
|
|
|
.group("dimensions->'host'", "dimensions->'uri'")
|
|
|
|
.rollup('host|uri', interval: current, update: true) do |rollup|
|
|
|
|
rollup.sum(:value)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Devolver el intervalo actual
|
|
|
|
current
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2022-04-23 20:50:36 +00:00
|
|
|
stat.touch
|
2021-10-26 14:33:15 +00:00
|
|
|
|
|
|
|
run_again! unless once
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def stat_name
|
|
|
|
STAT_NAME
|
|
|
|
end
|
|
|
|
|
|
|
|
# @return [String]
|
|
|
|
#
|
|
|
|
# TODO: Cambiar al mergear origin-referer
|
|
|
|
def destination
|
|
|
|
@destination ||= site.deploys.find_by(type: 'DeployLocal').destination
|
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: Cambiar al mergear origin-referer
|
|
|
|
#
|
|
|
|
# @return [Array]
|
|
|
|
def hostnames
|
|
|
|
@hostnames ||= site.deploys.map do |deploy|
|
|
|
|
case deploy
|
|
|
|
when DeployLocal
|
|
|
|
site.hostname
|
|
|
|
when DeployWww
|
|
|
|
deploy.fqdn
|
|
|
|
when DeployAlternativeDomain
|
|
|
|
deploy.hostname.dup.tap do |h|
|
|
|
|
h.replace(h.end_with?('.') ? h[0..-2] : "#{h}.#{Site.domain}")
|
|
|
|
end
|
|
|
|
when DeployHiddenService
|
|
|
|
deploy.onion
|
|
|
|
end
|
|
|
|
end.compact
|
|
|
|
end
|
|
|
|
|
|
|
|
# Recolecta todas las URIs menos imágenes
|
|
|
|
#
|
|
|
|
# @return [Array]
|
|
|
|
def uris
|
2022-04-23 21:56:34 +00:00
|
|
|
@uris ||=
|
|
|
|
locales.map do |locale|
|
|
|
|
uri = "/#{locale}/".squeeze('/')
|
|
|
|
dir = File.join(destination, locale)
|
|
|
|
|
|
|
|
files(dir).map do |f|
|
|
|
|
uri + f
|
2021-10-26 14:33:15 +00:00
|
|
|
end
|
2022-04-23 21:56:34 +00:00
|
|
|
end.flatten(2)
|
|
|
|
end
|
|
|
|
|
|
|
|
# @return [Array]
|
|
|
|
def locales
|
|
|
|
@locales ||= ['', site.locales.map(&:to_s)].flatten(1)
|
|
|
|
end
|
|
|
|
|
|
|
|
# @param :dir [String]
|
|
|
|
# @return [Array]
|
|
|
|
def files(dir)
|
|
|
|
Dir.chdir(dir) do
|
|
|
|
pages = Dir.glob('**/*.html')
|
|
|
|
files = Dir.glob('public/**/*')
|
|
|
|
files = remove_directories files
|
|
|
|
files = remove_images files
|
|
|
|
|
|
|
|
[pages, files].flatten(1)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# @param :files [Array]
|
|
|
|
# @return [Array]
|
|
|
|
def remove_directories(files)
|
|
|
|
files.reject do |f|
|
|
|
|
File.directory? f
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove_images(files)
|
|
|
|
files.reject do |f|
|
|
|
|
IMAGES.include? File.extname(f).downcase
|
2021-10-20 16:08:21 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|