5
0
Fork 0
mirror of https://0xacab.org/sutty/sutty synced 2024-06-02 05:04:16 +00:00

refactorizar rollups recursivos

This commit is contained in:
f 2022-04-23 20:32:20 -03:00
parent 6b191d2d91
commit aad5c82cb7
3 changed files with 102 additions and 42 deletions

View file

@ -0,0 +1,38 @@
# frozen_string_literal: true
module RecursiveRollup
extend ActiveSupport::Concern
included do
private
# Genera un rollup recursivo en base al período anterior y aplica una
# operación.
#
# @param :name [String]
# @param :interval_previous [String]
# @param :interval [String]
# @param :operation [Symbol]
# @param :dimensions [Hash]
# @param :new_name [String]
# @param :beginning [Time]
# @return [Rollup]
def recursive_rollup(name:, interval_previous:, interval:, dimensions:, beginning:, operation: :sum, new_name: nil)
Rollup.where(name: name, interval: interval_previous)
.where('time >= ?', beginning.try(:"beginning_of_#{interval}"))
.where_dimensions(**dimensions)
.group(*dimensions_to_jsonb_query(dimensions))
.rollup(new_name || name, interval: interval, update: true) do |rollup|
rollup.try(operation, :value)
end
end
# @param :dimensions [Hash]
# @return [Array]
def dimensions_to_jsonb_query(dimensions)
dimensions.keys.map do |key|
"dimensions->'#{key}'"
end
end
end
end

View file

@ -3,6 +3,8 @@
# Genera resúmenes de información para poder mostrar estadísticas y se
# corre regularmente a sí misma.
class StatCollectionJob < PeriodicJob
include RecursiveRollup
STAT_NAME = 'stat_collection_job'
def perform(site_id:, once: true)
@ -20,9 +22,11 @@ class StatCollectionJob < PeriodicJob
# XXX: Es correcto promediar promedios?
Stat::INTERVALS.reduce do |previous, current|
rollup(name: 'builds', interval_previous: previous, interval: current)
rollup(name: 'space_used', interval_previous: previous, interval: current, operation: :average)
rollup(name: 'build_time', interval_previous: previous, interval: current, operation: :average)
opts = { interval_previous: previous, interval: current, dimensions: { site_id: site.id } }
recursive_rollup(name: 'builds', **opts)
recursive_rollup(name: 'space_used', operation: :average, **opts)
recursive_rollup(name: 'build_time', operation: :average, **opts)
current
end
@ -35,20 +39,6 @@ class StatCollectionJob < PeriodicJob
private
# Genera un rollup recursivo en base al período anterior y aplica una
# operación.
#
# @return [NilClass]
def rollup(name:, interval_previous:, interval:, operation: :sum)
Rollup.where(name: name, interval: interval_previous)
.where('time >= ?', beginning_of_interval.try(:"beginning_of_#{interval}"))
.where_dimensions(site_id: site.id)
.group("dimensions->'site_id'")
.rollup(name, interval: interval, update: true) do |rollup|
rollup.try(operation, :value)
end
end
# Los registros a procesar
#
# @return [ActiveRecord::Relation]

View file

@ -11,6 +11,8 @@
# Los hostnames de un sitio van a poder obtenerse a partir de
# Site#hostnames con la garantía de que son únicos.
class UriCollectionJob < PeriodicJob
include RecursiveRollup
# Ignoramos imágenes porque suelen ser demasiadas y no aportan a las
# estadísticas.
IMAGES = %w[.png .jpg .jpeg .gif .webp .jfif].freeze
@ -21,35 +23,19 @@ class UriCollectionJob < PeriodicJob
# Recordar la última vez que se corrió la tarea
stat = site.stats.create! name: STAT_NAME
name = 'host|uri'
beginning = beginning_of_interval
hostnames.each do |host|
break if stop?
hostnames.each do |hostname|
uris.each do |uri|
next if stop?
break if stop?
AccessLog.where(host: hostname, uri: uri)
.where('created_at >= ?', beginning_of_interval)
.completed_requests
.non_robots
.group(:host, :uri)
.rollup('host|uri', interval: starting_interval, update: true)
# Reducir las estadísticas calculadas aplicando un rollup sobre el
# intervalo más amplio.
Stat::INTERVALS.reduce do |previous, current|
beginning_of_this_interval = beginning_of_interval.try(:"beginning_of_#{current}")
Rollup.where(name: 'host|uri', interval: previous)
.where('time >= ?', beginning_of_this_interval)
.where_dimensions(host: hostname, uri: uri)
.group("dimensions->'host'", "dimensions->'uri'")
.rollup('host|uri', interval: current, update: true) do |rollup|
rollup.sum(:value)
end
# Devolver el intervalo actual
current
end
rollup_uri(uri, host, name, beginning)
end
rollup_host(host, name, beginning)
end
stat.touch
@ -59,6 +45,52 @@ class UriCollectionJob < PeriodicJob
private
def rollup_uri(uri, host, name, beginning)
dimensions = { host: host, uri: uri }
AccessLog.where(**dimensions)
.where('created_at >= ?', beginning)
.completed_requests
.non_robots
.group(*dimensions.keys)
.rollup(name, interval: starting_interval, update: true)
# Reducir las estadísticas calculadas aplicando un rollup sobre el
# intervalo más amplio.
Stat::INTERVALS.reduce do |previous, current|
recursive_rollup(name: name,
interval_previous: previous,
interval: current,
dimensions: dimensions,
beginning: beginning)
# Devolver el intervalo actual
current
end
end
def rollup_host(host, name, beginning)
dimensions = { host: host }
new_name = 'host'
recursive_rollup(name: name,
new_name: new_name,
interval_previous: starting_interval,
interval: starting_interval,
dimensions: dimensions,
beginning: beginning)
Stat::INTERVALS.reduce do |previous, current|
recursive_rollup(name: new_name,
interval_previous: previous,
interval: current,
dimensions: dimensions,
beginning: beginning)
current
end
end
def stat_name
STAT_NAME
end