From d8060728be781f074d9a8442408d651ef44d570c Mon Sep 17 00:00:00 2001 From: f Date: Sat, 23 Apr 2022 19:32:55 -0300 Subject: [PATCH 1/5] =?UTF-8?q?mantener=20el=20d=C3=ADa=20anterior=20compl?= =?UTF-8?q?eto=20en=20la=20base=20de=20datos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bin/access_logs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/access_logs b/bin/access_logs index 2b4fd99f..a7c151cc 100755 --- a/bin/access_logs +++ b/bin/access_logs @@ -1,8 +1,8 @@ #!/bin/sh set -e -# Volcar y eliminar todos los access logs del día anterior -date=`date +%F` +# Volcar y eliminar todos los access logs de dos días atrás +date=`bundle exec rails runner "puts (Date.today - 2.days)"` psql -h postgresql "${DATABASE:-sutty}" sutty < "/srv/http/_storage/${date}.psql.gz" begin; From 6b191d2d91541df6ef40e36a354e3a3a071b3ef5 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 23 Apr 2022 19:37:08 -0300 Subject: [PATCH 2/5] =?UTF-8?q?usar=20la=20misma=20optimizaci=C3=B3n=20par?= =?UTF-8?q?a=20stats?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/jobs/stat_collection_job.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index e70544c3..3b68b61e 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -41,6 +41,7 @@ class StatCollectionJob < PeriodicJob # @return [NilClass] def rollup(name:, interval_previous:, interval:, operation: :sum) Rollup.where(name: name, interval: interval_previous) + .where('time >= ?', beginning_of_interval.try(:"beginning_of_#{interval}")) .where_dimensions(site_id: site.id) .group("dimensions->'site_id'") .rollup(name, interval: interval, update: true) do |rollup| From aad5c82cb7b7de9b673d2f4d57b41c3d39bef961 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 23 Apr 2022 20:32:20 -0300 Subject: [PATCH 3/5] refactorizar rollups recursivos --- app/jobs/concerns/recursive_rollup.rb | 38 +++++++++++++ app/jobs/stat_collection_job.rb | 24 +++----- app/jobs/uri_collection_job.rb | 82 +++++++++++++++++++-------- 3 files changed, 102 insertions(+), 42 deletions(-) create mode 100644 app/jobs/concerns/recursive_rollup.rb diff --git a/app/jobs/concerns/recursive_rollup.rb b/app/jobs/concerns/recursive_rollup.rb new file mode 100644 index 00000000..3163558e --- /dev/null +++ b/app/jobs/concerns/recursive_rollup.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module RecursiveRollup + extend ActiveSupport::Concern + + included do + private + + # Genera un rollup recursivo en base al período anterior y aplica una + # operación. + # + # @param :name [String] + # @param :interval_previous [String] + # @param :interval [String] + # @param :operation [Symbol] + # @param :dimensions [Hash] + # @param :new_name [String] + # @param :beginning [Time] + # @return [Rollup] + def recursive_rollup(name:, interval_previous:, interval:, dimensions:, beginning:, operation: :sum, new_name: nil) + Rollup.where(name: name, interval: interval_previous) + .where('time >= ?', beginning.try(:"beginning_of_#{interval}")) + .where_dimensions(**dimensions) + .group(*dimensions_to_jsonb_query(dimensions)) + .rollup(new_name || name, interval: interval, update: true) do |rollup| + rollup.try(operation, :value) + end + end + + # @param :dimensions [Hash] + # @return [Array] + def dimensions_to_jsonb_query(dimensions) + dimensions.keys.map do |key| + "dimensions->'#{key}'" + end + end + end +end diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index 3b68b61e..c94320d5 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -3,6 +3,8 @@ # Genera resúmenes de información para poder mostrar estadísticas y se # corre regularmente a sí misma. class StatCollectionJob < PeriodicJob + include RecursiveRollup + STAT_NAME = 'stat_collection_job' def perform(site_id:, once: true) @@ -20,9 +22,11 @@ class StatCollectionJob < PeriodicJob # XXX: Es correcto promediar promedios? Stat::INTERVALS.reduce do |previous, current| - rollup(name: 'builds', interval_previous: previous, interval: current) - rollup(name: 'space_used', interval_previous: previous, interval: current, operation: :average) - rollup(name: 'build_time', interval_previous: previous, interval: current, operation: :average) + opts = { interval_previous: previous, interval: current, dimensions: { site_id: site.id } } + + recursive_rollup(name: 'builds', **opts) + recursive_rollup(name: 'space_used', operation: :average, **opts) + recursive_rollup(name: 'build_time', operation: :average, **opts) current end @@ -35,20 +39,6 @@ class StatCollectionJob < PeriodicJob private - # Genera un rollup recursivo en base al período anterior y aplica una - # operación. - # - # @return [NilClass] - def rollup(name:, interval_previous:, interval:, operation: :sum) - Rollup.where(name: name, interval: interval_previous) - .where('time >= ?', beginning_of_interval.try(:"beginning_of_#{interval}")) - .where_dimensions(site_id: site.id) - .group("dimensions->'site_id'") - .rollup(name, interval: interval, update: true) do |rollup| - rollup.try(operation, :value) - end - end - # Los registros a procesar # # @return [ActiveRecord::Relation] diff --git a/app/jobs/uri_collection_job.rb b/app/jobs/uri_collection_job.rb index 4d369c2d..cad05cbb 100644 --- a/app/jobs/uri_collection_job.rb +++ b/app/jobs/uri_collection_job.rb @@ -11,6 +11,8 @@ # Los hostnames de un sitio van a poder obtenerse a partir de # Site#hostnames con la garantía de que son únicos. class UriCollectionJob < PeriodicJob + include RecursiveRollup + # Ignoramos imágenes porque suelen ser demasiadas y no aportan a las # estadísticas. IMAGES = %w[.png .jpg .jpeg .gif .webp .jfif].freeze @@ -21,35 +23,19 @@ class UriCollectionJob < PeriodicJob # Recordar la última vez que se corrió la tarea stat = site.stats.create! name: STAT_NAME + name = 'host|uri' + beginning = beginning_of_interval + + hostnames.each do |host| + break if stop? - hostnames.each do |hostname| uris.each do |uri| - next if stop? + break if stop? - AccessLog.where(host: hostname, uri: uri) - .where('created_at >= ?', beginning_of_interval) - .completed_requests - .non_robots - .group(:host, :uri) - .rollup('host|uri', interval: starting_interval, update: true) - - # Reducir las estadísticas calculadas aplicando un rollup sobre el - # intervalo más amplio. - Stat::INTERVALS.reduce do |previous, current| - beginning_of_this_interval = beginning_of_interval.try(:"beginning_of_#{current}") - - Rollup.where(name: 'host|uri', interval: previous) - .where('time >= ?', beginning_of_this_interval) - .where_dimensions(host: hostname, uri: uri) - .group("dimensions->'host'", "dimensions->'uri'") - .rollup('host|uri', interval: current, update: true) do |rollup| - rollup.sum(:value) - end - - # Devolver el intervalo actual - current - end + rollup_uri(uri, host, name, beginning) end + + rollup_host(host, name, beginning) end stat.touch @@ -59,6 +45,52 @@ class UriCollectionJob < PeriodicJob private + def rollup_uri(uri, host, name, beginning) + dimensions = { host: host, uri: uri } + + AccessLog.where(**dimensions) + .where('created_at >= ?', beginning) + .completed_requests + .non_robots + .group(*dimensions.keys) + .rollup(name, interval: starting_interval, update: true) + + # Reducir las estadísticas calculadas aplicando un rollup sobre el + # intervalo más amplio. + Stat::INTERVALS.reduce do |previous, current| + recursive_rollup(name: name, + interval_previous: previous, + interval: current, + dimensions: dimensions, + beginning: beginning) + + # Devolver el intervalo actual + current + end + end + + def rollup_host(host, name, beginning) + dimensions = { host: host } + new_name = 'host' + + recursive_rollup(name: name, + new_name: new_name, + interval_previous: starting_interval, + interval: starting_interval, + dimensions: dimensions, + beginning: beginning) + + Stat::INTERVALS.reduce do |previous, current| + recursive_rollup(name: new_name, + interval_previous: previous, + interval: current, + dimensions: dimensions, + beginning: beginning) + + current + end + end + def stat_name STAT_NAME end From c09df3b87b8fa563e9225de4bed244ed4e280937 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 23 Apr 2022 20:33:02 -0300 Subject: [PATCH 4/5] =?UTF-8?q?registrar=20el=20tiempo=20que=20pas=C3=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/jobs/stat_collection_job.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index c94320d5..03dcd292 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -10,6 +10,9 @@ class StatCollectionJob < PeriodicJob def perform(site_id:, once: true) @site = Site.find site_id + # Registrar que se hicieron todas las recolecciones + stat = site.stats.create! name: STAT_NAME + scope.rollup('builds', **options) scope.rollup('space_used', **options) do |rollup| @@ -31,9 +34,7 @@ class StatCollectionJob < PeriodicJob current end - # Registrar que se hicieron todas las recolecciones - site.stats.create! name: STAT_NAME - + stat.touch run_again! unless once end From 47a74b584824001e81d1bc1be86c6333ba2099b0 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 23 Apr 2022 20:33:58 -0300 Subject: [PATCH 5/5] fixup! refactorizar rollups recursivos --- app/jobs/stat_collection_job.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index 03dcd292..fcb4d6e1 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -12,6 +12,7 @@ class StatCollectionJob < PeriodicJob # Registrar que se hicieron todas las recolecciones stat = site.stats.create! name: STAT_NAME + beginning = beginning_of_interval scope.rollup('builds', **options) @@ -25,7 +26,7 @@ class StatCollectionJob < PeriodicJob # XXX: Es correcto promediar promedios? Stat::INTERVALS.reduce do |previous, current| - opts = { interval_previous: previous, interval: current, dimensions: { site_id: site.id } } + opts = { interval_previous: previous, interval: current, beginning: beginning, dimensions: { site_id: site.id } } recursive_rollup(name: 'builds', **opts) recursive_rollup(name: 'space_used', operation: :average, **opts)