recolección de estadísticas con mejor performance

This commit is contained in:
f 2021-10-20 13:14:19 -03:00
parent 9cf7c62861
commit 8d38d0d2ae

View file

@ -21,14 +21,20 @@ class StatCollectionJob < ApplicationJob
COLUMNS = %i[uri].freeze
def perform(once: false)
Stat::INTERVALS.each do |interval|
options = { interval: interval }
Site.find_each do |site|
hostnames = [site.hostname, site.alternative_hostnames].flatten
# Usamos el primero porque luego podemos hacer un rollup recursivo
options = { interval: Stat::INTERVALS.first }
# Visitas por hostname
AccessLog.completed_requests.non_robots.pages.group(:host).rollup('host', **options)
hostnames.each do |hostname|
AccessLog.where(host: hostname).completed_requests.non_robots.pages.group(:host).rollup('host', **options)
combined_columns(**options)
stats_by_site(**options)
combined_columns(hostname, **options)
end
stats_by_site(site, **options)
end
# Registrar que se hicieron todas las recolecciones
@ -40,9 +46,15 @@ class StatCollectionJob < ApplicationJob
private
# Combinación de columnas
def combined_columns(**options)
def combined_columns(hostname, **options)
where = { host: hostname }
COLUMNS.each do |column|
AccessLog.completed_requests.non_robots.group(:host, column).rollup("host|#{column}", **options)
AccessLog.where(host: hostname).pluck(Arel.sql("distinct #{column}")).each do |value|
where[column] = value
AccessLog.where(**where).completed_requests.non_robots.group(:host, column).rollup("host|#{column}", **options)
end
end
end
@ -50,8 +62,7 @@ class StatCollectionJob < ApplicationJob
#
# XXX: En realidad se agrupan por el deploy_id, que siempre será el
# del DeployLocal.
def stats_by_site(**options)
Site.find_each do |site|
def stats_by_site(site, **options)
site.build_stats.jekyll.group(:deploy_id).rollup('builds', **options)
site.build_stats.jekyll.group(:deploy_id).rollup('space_used', **options) do |rollup|
@ -62,5 +73,4 @@ class StatCollectionJob < ApplicationJob
rollup.average(:seconds)
end
end
end
end