Added advanced thread logging for better reasoning and error handling of Scheduler threads.

This commit is contained in:
Thorsten Eckel 2018-01-09 17:46:25 +01:00
parent 405a8ce77b
commit 8300887bae

View file

@ -40,20 +40,67 @@ class Scheduler < ApplicationModel
jobs.each do |job| jobs.each do |job|
# ignore job is still running # ignore job is still running
next if @@jobs_started[ job.id ] next if skip_job?(job)
# check job.last_run # check job.last_run
next if job.last_run && job.period && job.last_run > (Time.zone.now - job.period) next if job.last_run && job.period && job.last_run > (Time.zone.now - job.period)
# run job as own thread # run job as own thread
@@jobs_started[ job.id ] = true @@jobs_started[ job.id ] = start_job(job)
start_job(job)
sleep 10 sleep 10
end end
sleep 60 sleep 60
end end
end end
# Checks if a Scheduler Job should get started or not.
# The decision is based on if there is a running thread or not.
# Invalid threads get canceled and new threads can get started.
#
# @param [Scheduler] job The job that should get checked for running threads.
#
# @example
# Scheduler.skip_job(job)
#
# return [Boolean]
def self.skip_job?(job)
thread = @@jobs_started[ job.id ]
return false if thread.blank?
# check for validity of thread instance
if !thread.respond_to?(:status)
logger.error "Invalid thread stored for job '#{job.name}' (#{job.method}): #{thread.inspect}. Deleting and resting job."
@@jobs_started.delete(job.id)
return false
end
# check thread state:
# http://devdocs.io/ruby~2.4/thread#method-i-status
status = thread.status
# non falsly state means it has some literal running state
if status.present?
logger.info "Running job thread for '#{job.name}' (#{job.method}) status is: #{status}"
return true
end
# the following cases should not happen since the
# @@jobs_started cleanup is performed inside of the
# thread itself
# therefore we have to log an error and remove it
# from our threadpool @@jobs_started
how = 'unknownly'
if status.nil?
how = 'via an exception'
elsif status == false
how = 'normally'
end
logger.error "Job thread terminated #{how} found for '#{job.name}' (#{job.method}). This should not happen. Please report."
@@jobs_started.delete(job.id)
false
end
# Checks all delayed jobs that are locked and cleans them up. # Checks all delayed jobs that are locked and cleans them up.
# Should only get called when the Scheduler gets started. # Should only get called when the Scheduler gets started.
# #
@ -130,13 +177,14 @@ class Scheduler < ApplicationModel
def self.start_job(job) def self.start_job(job)
# start job and return thread handle
Thread.new do Thread.new do
ApplicationHandleInfo.current = 'scheduler' ApplicationHandleInfo.current = 'scheduler'
logger.info "Started job thread for '#{job.name}' (#{job.method})..." logger.info "Started job thread for '#{job.name}' (#{job.method})..."
# start loop for periods under 5 minutes # start loop for periods equal or under 5 minutes
if job.period && job.period <= 300 if job.period && job.period <= 5.minutes
loop do loop do
_start_job(job) _start_job(job)
job = Scheduler.lookup(id: job.id) job = Scheduler.lookup(id: job.id)
@ -161,8 +209,8 @@ class Scheduler < ApplicationModel
logger.info " ...stopped thread for '#{job.method}'" logger.info " ...stopped thread for '#{job.method}'"
ActiveRecord::Base.connection.close ActiveRecord::Base.connection.close
# release thread lock # release thread lock and remove thread handle
@@jobs_started[ job.id ] = false @@jobs_started.delete(job.id)
end end
end end
@ -199,7 +247,8 @@ class Scheduler < ApplicationModel
if try_run_max > try_count if try_run_max > try_count
_start_job(job, try_count, try_run_time) _start_job(job, try_count, try_run_time)
else else
@@jobs_started[ job.id ] = false # release thread lock and remove thread handle
@@jobs_started.delete(job.id)
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}" error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
logger.error error logger.error error