From deb3f0f0709bed4d586e8580cb1987722b310094 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 30 Apr 2022 10:23:58 -0300 Subject: [PATCH 1/2] =?UTF-8?q?usar=20el=20=C3=BAltimo=20intervalo,=20no?= =?UTF-8?q?=20el=20que=20acabamos=20de=20crear?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/jobs/uri_collection_job.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/jobs/uri_collection_job.rb b/app/jobs/uri_collection_job.rb index 1f635997..e5105e5a 100644 --- a/app/jobs/uri_collection_job.rb +++ b/app/jobs/uri_collection_job.rb @@ -21,9 +21,10 @@ class UriCollectionJob < PeriodicJob def perform(site_id:, once: true) @site = Site.find site_id + # Obtener el principio del intervalo anterior + beginning = beginning_of_interval # Recordar la última vez que se corrió la tarea stat = site.stats.create! name: STAT_NAME - beginning = beginning_of_interval columns = %i[http_referer geoip2_data_country_name] # Recorremos todos los hostnames y uris posibles y luego agrupamos From 80f2df362b3091b8c9856766ceb1bf4bd5dc4f64 Mon Sep 17 00:00:00 2001 From: f Date: Sat, 30 Apr 2022 10:24:21 -0300 Subject: [PATCH 2/2] precalcular las columnas --- app/jobs/uri_collection_job.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/app/jobs/uri_collection_job.rb b/app/jobs/uri_collection_job.rb index e5105e5a..196d2e50 100644 --- a/app/jobs/uri_collection_job.rb +++ b/app/jobs/uri_collection_job.rb @@ -25,7 +25,8 @@ class UriCollectionJob < PeriodicJob beginning = beginning_of_interval # Recordar la última vez que se corrió la tarea stat = site.stats.create! name: STAT_NAME - columns = %i[http_referer geoip2_data_country_name] + # Columnas a agrupar + columns = %i[http_referer geoip2_data_country_name].zip([nil]).to_h # Recorremos todos los hostnames y uris posibles y luego agrupamos # recursivamente para no tener que recalcular, asumiendo que es más @@ -36,6 +37,9 @@ class UriCollectionJob < PeriodicJob break if stop? host_dimensions = { host: host } + columns.each_key do |column| + columns[column] = AccessLog.where(**host_dimensions).distinct(column).pluck(column) + end # Las URIs son la fuente de verdad de las visitas, porque son las # que indican las páginas y recursos descargables, el resto son @@ -50,9 +54,9 @@ class UriCollectionJob < PeriodicJob rollup(name, beginning, **dimensions) reduce_rollup(name, beginning, **dimensions) - columns.each do |column| + columns.each_pair do |column, values| # Obtener orígenes de visitas por host - AccessLog.where(**host_dimensions).distinct(column).pluck(column).each do |value| + values.each do |value| column_name = "host|uri|#{column}" dimensions[column] = value @@ -72,7 +76,7 @@ class UriCollectionJob < PeriodicJob # Acumular por mes y año reduce_rollup('host', beginning, **host_dimensions) - columns.each do |column| + columns.each_key do |column| square_rollup(name: "host|uri|#{column}", new_name: "host|#{column}", interval: starting_interval,