diff --git a/app/jobs/concerns/recursive_rollup.rb b/app/jobs/concerns/recursive_rollup.rb index 4d9e11b0..1da14c01 100644 --- a/app/jobs/concerns/recursive_rollup.rb +++ b/app/jobs/concerns/recursive_rollup.rb @@ -30,17 +30,16 @@ module RecursiveRollup # intervalo más amplio. # # @param :name [String] - # @param :beginning [Time] # @param :operation [Symbol] # @param :dimensions [Hash] # @return [nil] - def reduce_rollup(name, beginning, operation, **dimensions) + def reduce_rollup(name:, dimensions:, operation: :sum) Stat::INTERVALS.reduce do |previous, current| recursive_rollup(name: name, interval_previous: previous, interval: current, dimensions: dimensions, - beginning: beginning, + beginning: beginning_of_interval, operation: operation) # Devolver el intervalo actual diff --git a/app/jobs/stat_collection_job.rb b/app/jobs/stat_collection_job.rb index 63d44999..e402e3b5 100644 --- a/app/jobs/stat_collection_job.rb +++ b/app/jobs/stat_collection_job.rb @@ -22,9 +22,11 @@ class StatCollectionJob < PeriodicJob rollup.average(:seconds) end - reduce_rollup('builds', beginning, :sum, site_id: site_id) - reduce_rollup('space_used', beginning, :average, site_id: site_id) - reduce_rollup('build_time', beginning, :average, site_id: site_id) + dimensions = { site_id: site_id } + + reduce_rollup(name: 'builds', operation: :sum, dimensions: dimensions) + reduce_rollup(name: 'space_used', operation: :average, dimensions: dimensions) + reduce_rollup(name: 'build_time', operation: :average, dimensions: dimensions) stat.touch run_again! unless once diff --git a/app/jobs/uri_collection_job.rb b/app/jobs/uri_collection_job.rb index 90789aba..a8c51d84 100644 --- a/app/jobs/uri_collection_job.rb +++ b/app/jobs/uri_collection_job.rb @@ -22,7 +22,7 @@ class UriCollectionJob < PeriodicJob @site = Site.find site_id # Obtener el principio del intervalo anterior - beginning = beginning_of_interval + beginning_of_interval # Recordar la última vez que se corrió la tarea stat = site.stats.create! name: STAT_NAME # Columnas a agrupar @@ -33,51 +33,33 @@ class UriCollectionJob < PeriodicJob # rápido buscar en los rollups indexados que en la tabla en bruto. # # Los referers solo se agrupan por host. - site.hostnames.each do |host| - break if stop? + columns.each_key do |column| + columns[column] = AccessLog.where(**host_dimensions).distinct(column).pluck(column) + end - host_dimensions = { host: host } - columns.each_key do |column| - columns[column] = AccessLog.where(**host_dimensions).distinct(column).pluck(column) - end + # Las URIs son la fuente de verdad de las visitas, porque son las + # que indican las páginas y recursos descargables, el resto son + # imágenes, CSS, JS y tipografías que no nos aportan números + # significativos. + uri_dimensions = { host: site.hostnames, uri: uris } + host_dimensions = { host: site.hostnames } - # Las URIs son la fuente de verdad de las visitas, porque son las - # que indican las páginas y recursos descargables, el resto son - # imágenes, CSS, JS y tipografías que no nos aportan números - # significativos. - name = 'host|uri' - dimensions = { host: host, uri: uris } + # Cantidad de visitas por host + rollup(name: 'host', dimensions: host_dimensions, filter: uri_dimensions) + reduce_rollup(name: 'host', operation: :sum, dimensions: uri_dimensions) - rollup(name, beginning, **dimensions) - reduce_rollup(name, beginning, :sum, **dimensions) + # Cantidad de visitas por página/recurso + rollup(name: 'host|uri', dimensions: uri_dimensions) + reduce_rollup(name: 'host|uri', operation: :sum, dimensions: uri_dimensions) - columns.each_pair do |column, values| - # Obtener orígenes de visitas por host - values.each do |value| - column_name = "host|uri|#{column}" - column_dimensions = { host: host, uri: uris } - column_dimensions[column] = value + # Cantidad de visitas host y parámetro + columns.each_pair do |column, values| + column_name = "host|#{column}" + column_dimensions = { host: site.hostnames } + column_dimensions[column] = values - rollup(column_name, beginning, **column_dimensions) - reduce_rollup(column_name, beginning, :sum, **column_dimensions) - end - end - - # Reducir todas las visitas a cantidad de visitas por host - rollup('host', beginning, **host_dimensions) - - # Acumular por mes y año - reduce_rollup('host', beginning, :sum, **host_dimensions) - - columns.each_pair do |column, values| - values.each do |value| - column_dimensions = { host: host } - column_dimensions[column] = value - - rollup("host|#{column}", beginning, **column_dimensions) - reduce_rollup("host|#{column}", beginning, :sum, **column_dimensions) - end - end + rollup(name: column_name, dimensions: column_dimensions, filter: uri_dimensions) + reduce_rollup(name: column_name, dimensions: column_dimensions) end stat.touch @@ -92,9 +74,10 @@ class UriCollectionJob < PeriodicJob # @param :name [String] # @param :beginning [Time] # @param :dimensions [Hash] + # @param :filter [Hash] # @return [nil] - def rollup(name, beginning, **dimensions) - AccessLog.where(**dimensions) + def rollup(name:, beginning:, dimensions:, filter: nil) + AccessLog.where(**(filter || dimensions)) .where('created_at >= ?', beginning) .completed_requests .non_robots