2016-11-23 14:25:44 +00:00
# Copyright (C) 2012-2016 Zammad Foundation, http://zammad-foundation.org/
class MonitoringController < ApplicationController
2018-11-02 06:35:28 +00:00
prepend_before_action - > { authentication_check ( permission : 'admin.monitoring' ) } , except : % i [ health_check status amount_check ]
2017-02-15 12:29:25 +00:00
skip_before_action :verify_csrf_token
2016-11-23 14:25:44 +00:00
= begin
Resource :
GET / api / v1 / monitoring / health_check? token = XXX
Response :
{
" healthy " : true ,
" message " : " success " ,
}
{
" healthy " : false ,
" message " : " authentication of XXX failed; issue # 2 " ,
" issues " : [ " authentication of XXX failed " , " issue # 2 " ] ,
}
Test :
curl http : / / localhost / api / v1 / monitoring / health_check? token = XXX
= end
def health_check
token_or_permission_check
issues = [ ]
2017-09-07 14:33:09 +00:00
actions = Set . new
2016-11-23 14:25:44 +00:00
# channel check
2017-04-28 09:52:30 +00:00
last_run_tolerance = Time . zone . now - 1 . hour
2017-10-01 12:25:52 +00:00
Channel . where ( active : true ) . each do | channel |
2017-04-28 09:52:30 +00:00
# inbound channel
2016-11-23 14:25:44 +00:00
if channel . status_in == 'error'
message = " Channel: #{ channel . area } in "
2017-11-23 08:09:44 +00:00
%w[ host user uid ] . each do | key |
2017-11-21 14:25:04 +00:00
next if channel . options [ key ] . blank?
2018-10-09 06:17:41 +00:00
2016-11-23 14:25:44 +00:00
message += " key: #{ channel . options [ key ] } ; "
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
issues . push " #{ message } #{ channel . last_log_in } "
end
2017-04-28 09:52:30 +00:00
if channel . preferences && channel . preferences [ 'last_fetch' ] && channel . preferences [ 'last_fetch' ] < last_run_tolerance
2018-08-15 07:11:48 +00:00
diff = Time . zone . now - channel . preferences [ 'last_fetch' ]
2018-11-20 10:01:54 +00:00
issues . push " #{ message } channel is active but not fetched for #{ helpers . time_ago_in_words ( Time . zone . now - diff . seconds ) } "
2017-04-28 09:52:30 +00:00
end
# outbound channel
2016-11-23 14:25:44 +00:00
next if channel . status_out != 'error'
2018-10-09 06:17:41 +00:00
2016-11-23 14:25:44 +00:00
message = " Channel: #{ channel . area } out "
2017-11-23 08:09:44 +00:00
%w[ host user uid ] . each do | key |
2017-11-21 14:25:04 +00:00
next if channel . options [ key ] . blank?
2018-10-09 06:17:41 +00:00
2016-11-23 14:25:44 +00:00
message += " key: #{ channel . options [ key ] } ; "
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
issues . push " #{ message } #{ channel . last_log_out } "
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
# unprocessable mail check
2017-11-23 08:09:44 +00:00
directory = Rails . root . join ( 'tmp' , 'unprocessable_mail' ) . to_s
2016-11-23 14:25:44 +00:00
if File . exist? ( directory )
count = 0
2017-10-01 12:25:52 +00:00
Dir . glob ( " #{ directory } /*.eml " ) do | _entry |
2016-11-23 14:25:44 +00:00
count += 1
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
if count . nonzero?
issues . push " unprocessable mails: #{ count } "
end
end
2018-08-15 07:11:48 +00:00
# scheduler running check
Scheduler . where ( 'active = ? AND period > 300' , true ) . where . not ( last_run : nil ) . order ( last_run : :asc , period : :asc ) . each do | scheduler |
diff = Time . zone . now - ( scheduler . last_run + scheduler . period . seconds )
next if diff < 8 . minutes
2018-10-09 06:17:41 +00:00
2018-08-15 07:11:48 +00:00
issues . push " scheduler may not run (last execution of #{ scheduler . method } #{ helpers . time_ago_in_words ( Time . zone . now - diff . seconds ) } over) - please contact your system administrator "
2016-11-23 14:25:44 +00:00
break
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
if Scheduler . where ( active : true , last_run : nil ) . count == Scheduler . where ( active : true ) . count
issues . push 'scheduler not running'
end
2017-09-07 16:07:48 +00:00
Scheduler . failed_jobs . each do | job |
issues . push " Failed to run scheduled job ' #{ job . name } '. Cause: #{ job . error_message } "
2017-09-07 14:33:09 +00:00
actions . add ( :restart_failed_jobs )
2017-09-07 13:00:12 +00:00
end
2017-05-15 11:50:55 +00:00
2018-03-26 15:43:52 +00:00
# failed jobs check
failed_jobs = Delayed :: Job . where ( 'attempts > 0' )
count_failed_jobs = failed_jobs . count
if count_failed_jobs > 10
2018-08-15 07:11:48 +00:00
issues . push " #{ count_failed_jobs } failing background jobs "
2018-03-26 15:43:52 +00:00
end
listed_failed_jobs = failed_jobs . select ( :handler , :attempts ) . limit ( 10 )
2018-07-05 14:40:45 +00:00
sorted_failed_jobs = listed_failed_jobs . group_by ( & :name ) . sort_by { | _handler , entries | entries . length } . reverse . to_h
sorted_failed_jobs . each_with_index do | ( name , jobs ) , index |
2018-03-26 15:43:52 +00:00
attempts = jobs . map ( & :attempts ) . sum
issues . push " Failed to run background job # #{ index += 1 } ' #{ name } ' #{ jobs . count } time(s) with #{ attempts } attempt(s). "
end
2018-08-15 07:11:48 +00:00
# job count check
total_jobs = Delayed :: Job . where ( 'created_at < ?' , Time . zone . now - 15 . minutes ) . count
if total_jobs > 8000
issues . push " #{ total_jobs } background jobs in queue "
end
2018-03-26 15:43:52 +00:00
# import jobs
2018-02-12 11:22:16 +00:00
import_backends = ImportJob . backends
2018-01-24 16:16:26 +00:00
2018-02-12 11:22:16 +00:00
# failed import jobs
import_backends . each do | backend |
2018-01-24 16:16:26 +00:00
job = ImportJob . where (
name : backend ,
dry_run : false ,
) . where ( 'finished_at >= ?' , 5 . minutes . ago ) . limit ( 1 ) . first
next if job . blank?
next if ! job . result . is_a? ( Hash )
error_message = job . result [ :error ]
next if error_message . blank?
issues . push " Failed to run import backend ' #{ backend } '. Cause: #{ error_message } "
end
2018-02-12 11:22:16 +00:00
# stuck import jobs
import_backends . each do | backend |
job = ImportJob . where (
name : backend ,
dry_run : false ,
finished_at : nil ,
) . where ( 'updated_at <= ?' , 5 . minutes . ago ) . limit ( 1 ) . first
next if job . blank?
issues . push " Stuck import backend ' #{ backend } ' detected. Last update: #{ job . updated_at } "
end
2016-11-23 14:25:44 +00:00
token = Setting . get ( 'monitoring_token' )
2017-11-21 14:25:04 +00:00
if issues . blank?
2016-11-23 14:25:44 +00:00
result = {
healthy : true ,
message : 'success' ,
token : token ,
}
render json : result
return
end
result = {
healthy : false ,
message : issues . join ( ';' ) ,
2017-09-07 14:33:09 +00:00
issues : issues ,
actions : actions ,
token : token ,
2016-11-23 14:25:44 +00:00
}
render json : result
end
= begin
Resource :
GET / api / v1 / monitoring / status? token = XXX
Response :
{
" agents " : 8123 ,
" last_login " : " 2016-11-21T14:14:14Z " ,
" counts " : {
" users " : 12313 ,
" tickets " : 23123 ,
" ticket_articles " : 131451 ,
} ,
" last_created_at " : {
" users " : " 2016-11-21T14:14:14Z " ,
" tickets " : " 2016-11-21T14:14:14Z " ,
" ticket_articles " : " 2016-11-21T14:14:14Z " ,
} ,
}
Test :
curl http : / / localhost / api / v1 / monitoring / status? token = XXX
= end
def status
token_or_permission_check
last_login = nil
last_login_user = User . where ( 'last_login IS NOT NULL' ) . order ( last_login : :desc ) . limit ( 1 ) . first
if last_login_user
last_login = last_login_user . last_login
end
status = {
counts : { } ,
last_created_at : { } ,
last_login : last_login ,
agents : User . with_permissions ( 'ticket.agent' ) . count ,
}
map = {
users : User ,
groups : Group ,
overviews : Overview ,
tickets : Ticket ,
ticket_articles : Ticket :: Article ,
2018-10-16 08:45:15 +00:00
text_modules : TextModule ,
2016-11-23 14:25:44 +00:00
}
2017-10-01 12:25:52 +00:00
map . each do | key , class_name |
2016-11-23 14:25:44 +00:00
status [ :counts ] [ key ] = class_name . count
last = class_name . last
2017-11-23 08:09:44 +00:00
status [ :last_created_at ] [ key ] = last & . created_at
2017-10-01 12:25:52 +00:00
end
2016-11-23 14:25:44 +00:00
2018-06-05 12:57:00 +00:00
if ActiveRecord :: Base . connection_config [ :adapter ] == 'postgresql'
2018-12-06 14:35:00 +00:00
sql = 'SELECT SUM(CAST(coalesce(size, \'0\') AS INTEGER)) FROM stores WHERE id IN (SELECT MAX(id) FROM stores GROUP BY store_file_id)'
2018-06-05 12:57:00 +00:00
records_array = ActiveRecord :: Base . connection . exec_query ( sql )
if records_array [ 0 ] && records_array [ 0 ] [ 'sum' ]
sum = records_array [ 0 ] [ 'sum' ]
status [ :storage ] = {
kB : sum / 1024 ,
MB : sum / 1024 / 1024 ,
GB : sum / 1024 / 1024 / 1024 ,
}
end
end
2016-11-23 14:25:44 +00:00
render json : status
end
2018-11-02 06:35:28 +00:00
= begin
get counts about created ticket in certain time slot . s , m , h and d possible .
Resource :
GET / api / v1 / monitoring / amount_check? token = XXX & max_warning = 2000 & max_critical = 3000 & periode = 1 h
GET / api / v1 / monitoring / amount_check? token = XXX & min_warning = 2000 & min_critical = 3000 & periode = 1 h
GET / api / v1 / monitoring / amount_check? token = XXX & periode = 1 h
Response :
{
" state " : " ok " ,
" message " : " " ,
" count " : 123 ,
}
{
" state " : " warning " ,
" message " : " limit of 2000 tickets in 1h reached " ,
" count " : 123 ,
}
{
" state " : " critical " ,
" message " : " limit of 3000 tickets in 1h reached " ,
" count " : 123 ,
}
Test :
curl http : / / localhost / api / v1 / monitoring / amount_check? token = XXX & max_warning = 2000 & max_critical = 3000 & periode = 1 h
curl http : / / localhost / api / v1 / monitoring / amount_check? token = XXX & min_warning = 2000 & min_critical = 3000 & periode = 1 h
curl http : / / localhost / api / v1 / monitoring / amount_check? token = XXX & periode = 1 h
= end
def amount_check
token_or_permission_check
raise Exceptions :: UnprocessableEntity , 'periode is missing!' if params [ :periode ] . blank?
scale = params [ :periode ] [ - 1 , 1 ]
raise Exceptions :: UnprocessableEntity , 'periode need to have s, m, h or d as last!' if scale !~ / ^(s|m|h|d)$ /
periode = params [ :periode ] [ 0 , params [ :periode ] . length - 1 ]
raise Exceptions :: UnprocessableEntity , 'periode need to be an integer!' if periode . to_i . zero?
if scale == 's'
created_at = Time . zone . now - periode . to_i . seconds
elsif scale == 'm'
created_at = Time . zone . now - periode . to_i . minutes
elsif scale == 'h'
created_at = Time . zone . now - periode . to_i . hours
elsif scale == 'd'
created_at = Time . zone . now - periode . to_i . days
end
map = [
{ param : :max_critical , notice : 'critical' , type : 'gt' } ,
{ param : :min_critical , notice : 'critical' , type : 'lt' } ,
{ param : :max_warning , notice : 'warning' , type : 'gt' } ,
{ param : :min_warning , notice : 'warning' , type : 'lt' } ,
]
result = { }
map . each do | row |
next if params [ row [ :param ] ] . blank?
raise Exceptions :: UnprocessableEntity , " #{ row [ :param ] } need to be an integer! " if params [ row [ :param ] ] . to_i . zero?
count = Ticket . where ( 'created_at >= ?' , created_at ) . count
if row [ :type ] == 'gt'
if count > params [ row [ :param ] ] . to_i
result = {
state : row [ :notice ] ,
message : " The limit of #{ params [ row [ :param ] ] } was exceeded with #{ count } in the last #{ params [ :periode ] } " ,
count : count ,
}
break
end
next
end
next if count > params [ row [ :param ] ] . to_i
result = {
state : row [ :notice ] ,
message : " The minimum of #{ params [ row [ :param ] ] } was undercut by #{ count } in the last #{ params [ :periode ] } " ,
count : count ,
}
break
end
if result . blank?
result = {
state : 'ok' ,
message : '' ,
count : Ticket . where ( 'created_at >= ?' , created_at ) . count ,
}
end
render json : result
end
2016-11-23 14:25:44 +00:00
def token
access_check
token = SecureRandom . urlsafe_base64 ( 40 )
Setting . set ( 'monitoring_token' , token )
result = {
token : token ,
}
render json : result , status : :created
end
2017-09-07 12:45:13 +00:00
def restart_failed_jobs
2017-05-15 11:50:55 +00:00
access_check
2017-09-07 15:34:32 +00:00
Scheduler . restart_failed_jobs
2017-05-15 11:50:55 +00:00
2017-09-07 14:33:09 +00:00
render json : { } , status : :ok
2017-05-15 11:50:55 +00:00
end
2016-11-23 14:25:44 +00:00
private
def token_or_permission_check
user = authentication_check_only ( permission : 'admin.monitoring' )
return if user
return if Setting . get ( 'monitoring_token' ) == params [ :token ]
2018-10-09 06:17:41 +00:00
2016-11-23 14:25:44 +00:00
raise Exceptions :: NotAuthorized
end
def access_check
return if Permission . find_by ( name : 'admin.monitoring' , active : true )
2018-10-09 06:17:41 +00:00
2016-11-23 14:25:44 +00:00
raise Exceptions :: NotAuthorized
end
end