diff --git a/app/jobs/concerns/recursive_rollup.rb b/app/jobs/concerns/recursive_rollup.rb new file mode 100644 index 00000000..3163558e --- /dev/null +++ b/app/jobs/concerns/recursive_rollup.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module RecursiveRollup + extend ActiveSupport::Concern + + included do + private + + # Genera un rollup recursivo en base al período anterior y aplica una + # operación. + # + # @param :name [String] + # @param :interval_previous [String] + # @param :interval [String] + # @param :operation [Symbol] + # @param :dimensions [Hash] + # @param :new_name [String] + # @param :beginning [Time] + # @return [Rollup] + def recursive_rollup(name:, interval_previous:, interval:, dimensions:, beginning:, operation: :sum, new_name: nil) + Rollup.where(name: name, interval: interval_previous) + .where('time >= ?', beginning.try(:"beginning_of_#{interval}")) + .where_dimensions(**dimensions) + .group(*dimensions_to_jsonb_query(dimensions)) + .rollup(new_name || name, interval: interval, update: true) do |rollup| + rollup.try(operation, :value) + end + end + + # @param :dimensions [Hash] + # @return [Array] + def dimensions_to_jsonb_query(dimensions) + dimensions.keys.map do |key| + "dimensions->'#{key}'" + end + end + end +end diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index e70544c3..fcb4d6e1 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -3,11 +3,17 @@ # Genera resúmenes de información para poder mostrar estadísticas y se # corre regularmente a sí misma. class StatCollectionJob < PeriodicJob + include RecursiveRollup + STAT_NAME = 'stat_collection_job' def perform(site_id:, once: true) @site = Site.find site_id + # Registrar que se hicieron todas las recolecciones + stat = site.stats.create! name: STAT_NAME + beginning = beginning_of_interval + scope.rollup('builds', **options) scope.rollup('space_used', **options) do |rollup| @@ -20,34 +26,21 @@ class StatCollectionJob < PeriodicJob # XXX: Es correcto promediar promedios? Stat::INTERVALS.reduce do |previous, current| - rollup(name: 'builds', interval_previous: previous, interval: current) - rollup(name: 'space_used', interval_previous: previous, interval: current, operation: :average) - rollup(name: 'build_time', interval_previous: previous, interval: current, operation: :average) + opts = { interval_previous: previous, interval: current, beginning: beginning, dimensions: { site_id: site.id } } + + recursive_rollup(name: 'builds', **opts) + recursive_rollup(name: 'space_used', operation: :average, **opts) + recursive_rollup(name: 'build_time', operation: :average, **opts) current end - # Registrar que se hicieron todas las recolecciones - site.stats.create! name: STAT_NAME - + stat.touch run_again! unless once end private - # Genera un rollup recursivo en base al período anterior y aplica una - # operación. - # - # @return [NilClass] - def rollup(name:, interval_previous:, interval:, operation: :sum) - Rollup.where(name: name, interval: interval_previous) - .where_dimensions(site_id: site.id) - .group("dimensions->'site_id'") - .rollup(name, interval: interval, update: true) do |rollup| - rollup.try(operation, :value) - end - end - # Los registros a procesar # # @return [ActiveRecord::Relation] diff --git a/app/jobs/uri_collection_job.rb b/app/jobs/uri_collection_job.rb index 4d369c2d..cad05cbb 100644 --- a/app/jobs/uri_collection_job.rb +++ b/app/jobs/uri_collection_job.rb @@ -11,6 +11,8 @@ # Los hostnames de un sitio van a poder obtenerse a partir de # Site#hostnames con la garantía de que son únicos. class UriCollectionJob < PeriodicJob + include RecursiveRollup + # Ignoramos imágenes porque suelen ser demasiadas y no aportan a las # estadísticas. IMAGES = %w[.png .jpg .jpeg .gif .webp .jfif].freeze @@ -21,35 +23,19 @@ class UriCollectionJob < PeriodicJob # Recordar la última vez que se corrió la tarea stat = site.stats.create! name: STAT_NAME + name = 'host|uri' + beginning = beginning_of_interval + + hostnames.each do |host| + break if stop? - hostnames.each do |hostname| uris.each do |uri| - next if stop? + break if stop? - AccessLog.where(host: hostname, uri: uri) - .where('created_at >= ?', beginning_of_interval) - .completed_requests - .non_robots - .group(:host, :uri) - .rollup('host|uri', interval: starting_interval, update: true) - - # Reducir las estadísticas calculadas aplicando un rollup sobre el - # intervalo más amplio. - Stat::INTERVALS.reduce do |previous, current| - beginning_of_this_interval = beginning_of_interval.try(:"beginning_of_#{current}") - - Rollup.where(name: 'host|uri', interval: previous) - .where('time >= ?', beginning_of_this_interval) - .where_dimensions(host: hostname, uri: uri) - .group("dimensions->'host'", "dimensions->'uri'") - .rollup('host|uri', interval: current, update: true) do |rollup| - rollup.sum(:value) - end - - # Devolver el intervalo actual - current - end + rollup_uri(uri, host, name, beginning) end + + rollup_host(host, name, beginning) end stat.touch @@ -59,6 +45,52 @@ class UriCollectionJob < PeriodicJob private + def rollup_uri(uri, host, name, beginning) + dimensions = { host: host, uri: uri } + + AccessLog.where(**dimensions) + .where('created_at >= ?', beginning) + .completed_requests + .non_robots + .group(*dimensions.keys) + .rollup(name, interval: starting_interval, update: true) + + # Reducir las estadísticas calculadas aplicando un rollup sobre el + # intervalo más amplio. + Stat::INTERVALS.reduce do |previous, current| + recursive_rollup(name: name, + interval_previous: previous, + interval: current, + dimensions: dimensions, + beginning: beginning) + + # Devolver el intervalo actual + current + end + end + + def rollup_host(host, name, beginning) + dimensions = { host: host } + new_name = 'host' + + recursive_rollup(name: name, + new_name: new_name, + interval_previous: starting_interval, + interval: starting_interval, + dimensions: dimensions, + beginning: beginning) + + Stat::INTERVALS.reduce do |previous, current| + recursive_rollup(name: new_name, + interval_previous: previous, + interval: current, + dimensions: dimensions, + beginning: beginning) + + current + end + end + def stat_name STAT_NAME end diff --git a/bin/access_logs b/bin/access_logs index 613e493f..0597b968 100755 --- a/bin/access_logs +++ b/bin/access_logs @@ -1,8 +1,8 @@ #!/bin/sh set -e -# Volcar y eliminar todos los access logs del día anterior -date=`date +%F` +# Volcar y eliminar todos los access logs de dos días atrás +date=`bundle exec rails runner "puts (Date.today - 2.days)"` psql -h postgresql "${DATABASE:-sutty}" sutty < "/srv/_storage/${date}.psql.gz" begin;