2016-10-19 03:11:36 +00:00
|
|
|
# Copyright (C) 2012-2016 Zammad Foundation, http://zammad-foundation.org/
|
2016-04-26 09:30:46 +00:00
|
|
|
|
2013-03-05 08:38:58 +00:00
|
|
|
class Scheduler < ApplicationModel
|
|
|
|
|
2015-05-20 23:07:13 +00:00
|
|
|
# rubocop:disable Style/ClassVars
|
|
|
|
@@jobs_started = {}
|
|
|
|
# rubocop:enable Style/ClassVars
|
|
|
|
|
|
|
|
# start threads
|
|
|
|
def self.threads
|
2013-03-05 08:38:58 +00:00
|
|
|
|
|
|
|
Thread.abort_on_exception = true
|
2013-06-12 15:59:58 +00:00
|
|
|
|
2017-05-13 13:50:28 +00:00
|
|
|
# reconnect in case db connection is lost
|
|
|
|
begin
|
|
|
|
ActiveRecord::Base.connection.reconnect!
|
|
|
|
rescue => e
|
|
|
|
logger.error "Can't reconnect to database #{e.inspect}"
|
|
|
|
end
|
|
|
|
|
2017-05-11 15:18:03 +00:00
|
|
|
# cleanup old background jobs
|
|
|
|
cleanup
|
|
|
|
|
2015-05-20 23:07:13 +00:00
|
|
|
# start worker for background jobs
|
|
|
|
worker
|
|
|
|
|
|
|
|
# start loop to execute scheduler jobs
|
2015-05-05 14:10:06 +00:00
|
|
|
loop do
|
2015-05-20 23:07:13 +00:00
|
|
|
logger.info 'Scheduler running...'
|
2013-05-07 20:45:00 +00:00
|
|
|
|
2014-03-27 13:07:57 +00:00
|
|
|
# reconnect in case db connection is lost
|
|
|
|
begin
|
|
|
|
ActiveRecord::Base.connection.reconnect!
|
|
|
|
rescue => e
|
2015-07-03 15:18:01 +00:00
|
|
|
logger.error "Can't reconnect to database #{e.inspect}"
|
2014-03-27 13:07:57 +00:00
|
|
|
end
|
|
|
|
|
2013-05-07 20:45:00 +00:00
|
|
|
# read/load jobs and check if it is alredy started
|
2016-01-05 08:48:26 +00:00
|
|
|
jobs = Scheduler.where('active = ?', true).order('prio ASC')
|
2017-10-01 12:25:52 +00:00
|
|
|
jobs.each do |job|
|
2015-05-20 23:07:13 +00:00
|
|
|
|
|
|
|
# ignore job is still running
|
2018-01-09 16:46:25 +00:00
|
|
|
next if skip_job?(job)
|
2015-05-20 23:07:13 +00:00
|
|
|
|
|
|
|
# check job.last_run
|
2016-01-05 08:48:26 +00:00
|
|
|
next if job.last_run && job.period && job.last_run > (Time.zone.now - job.period)
|
2015-05-20 23:07:13 +00:00
|
|
|
|
|
|
|
# run job as own thread
|
2018-01-09 16:46:25 +00:00
|
|
|
@@jobs_started[ job.id ] = start_job(job)
|
2015-06-30 22:26:02 +00:00
|
|
|
sleep 10
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-05-20 23:07:13 +00:00
|
|
|
sleep 60
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-01-09 16:46:25 +00:00
|
|
|
# Checks if a Scheduler Job should get started or not.
|
|
|
|
# The decision is based on if there is a running thread or not.
|
|
|
|
# Invalid threads get canceled and new threads can get started.
|
|
|
|
#
|
|
|
|
# @param [Scheduler] job The job that should get checked for running threads.
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
# Scheduler.skip_job(job)
|
|
|
|
#
|
|
|
|
# return [Boolean]
|
|
|
|
def self.skip_job?(job)
|
|
|
|
thread = @@jobs_started[ job.id ]
|
|
|
|
return false if thread.blank?
|
|
|
|
|
|
|
|
# check for validity of thread instance
|
|
|
|
if !thread.respond_to?(:status)
|
|
|
|
logger.error "Invalid thread stored for job '#{job.name}' (#{job.method}): #{thread.inspect}. Deleting and resting job."
|
|
|
|
@@jobs_started.delete(job.id)
|
|
|
|
return false
|
|
|
|
end
|
|
|
|
|
|
|
|
# check thread state:
|
|
|
|
# http://devdocs.io/ruby~2.4/thread#method-i-status
|
|
|
|
status = thread.status
|
|
|
|
|
|
|
|
# non falsly state means it has some literal running state
|
|
|
|
if status.present?
|
|
|
|
logger.info "Running job thread for '#{job.name}' (#{job.method}) status is: #{status}"
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
|
|
|
# the following cases should not happen since the
|
|
|
|
# @@jobs_started cleanup is performed inside of the
|
|
|
|
# thread itself
|
|
|
|
# therefore we have to log an error and remove it
|
|
|
|
# from our threadpool @@jobs_started
|
|
|
|
how = 'unknownly'
|
|
|
|
if status.nil?
|
|
|
|
how = 'via an exception'
|
|
|
|
elsif status == false
|
|
|
|
how = 'normally'
|
|
|
|
end
|
|
|
|
|
|
|
|
logger.error "Job thread terminated #{how} found for '#{job.name}' (#{job.method}). This should not happen. Please report."
|
|
|
|
@@jobs_started.delete(job.id)
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
2017-05-11 15:18:03 +00:00
|
|
|
# Checks all delayed jobs that are locked and cleans them up.
|
|
|
|
# Should only get called when the Scheduler gets started.
|
|
|
|
#
|
|
|
|
# @see Scheduler#cleanup_delayed
|
|
|
|
#
|
|
|
|
# @param [Boolean] force forces the cleanup if not called in Scheduler starting context.
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
# Scheduler.cleanup
|
|
|
|
#
|
|
|
|
# @raise [RuntimeError] If called without force and not when Scheduler gets started.
|
|
|
|
#
|
|
|
|
# return [nil]
|
|
|
|
def self.cleanup(force: false)
|
|
|
|
|
2017-11-23 08:09:44 +00:00
|
|
|
if !force && caller_locations(1..1).first.label != 'threads'
|
2017-05-11 15:18:03 +00:00
|
|
|
raise 'This method should only get called when Scheduler.threads are initialized. Use `force: true` to start anyway.'
|
|
|
|
end
|
|
|
|
|
|
|
|
Delayed::Job.all.each do |job|
|
|
|
|
cleanup_delayed(job)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Checks if the given job can be rescheduled or destroys it. Logs the action as warn.
|
|
|
|
# Works only for locked jobs. Jobs that are not locked are ignored and
|
|
|
|
# should get destroyed directly.
|
|
|
|
# Checks the delayed job object for a method called .reschedule?. The memthod is called
|
|
|
|
# with the delayed job as a parameter. The result value is expected as a Boolean. If the
|
|
|
|
# result is true the lock gets removed and the delayed job gets rescheduled. If the return
|
|
|
|
# value is false it will get destroyed which is the default behaviour.
|
|
|
|
#
|
|
|
|
# @param [Delayed::Job] job the job that should get checked for destroying/rescheduling.
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
# Scheduler.cleanup_delayed(job)
|
|
|
|
#
|
|
|
|
# return [nil]
|
|
|
|
def self.cleanup_delayed(job)
|
|
|
|
return if job.locked_at.blank?
|
|
|
|
|
|
|
|
job_name = job.name
|
|
|
|
payload_object = job.payload_object
|
|
|
|
reschedule = false
|
|
|
|
if payload_object.present?
|
|
|
|
if payload_object.respond_to?(:object)
|
|
|
|
object = payload_object.object
|
|
|
|
|
|
|
|
if object.respond_to?(:id)
|
|
|
|
job_name += " (id: #{object.id})"
|
|
|
|
end
|
|
|
|
|
|
|
|
if object.respond_to?(:reschedule?) && object.reschedule?(job)
|
|
|
|
reschedule = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if payload_object.respond_to?(:args)
|
|
|
|
job_name += " - ARGS: #{payload_object.args.inspect}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
if reschedule
|
|
|
|
action = 'Rescheduling'
|
|
|
|
job.unlock
|
|
|
|
job.save
|
|
|
|
else
|
|
|
|
action = 'Destroyed'
|
|
|
|
job.destroy
|
|
|
|
end
|
|
|
|
|
|
|
|
Rails.logger.warn "#{action} locked delayed job: #{job_name}"
|
|
|
|
end
|
|
|
|
|
2016-01-05 08:48:26 +00:00
|
|
|
def self.start_job(job)
|
2013-03-05 08:38:58 +00:00
|
|
|
|
2018-01-09 16:46:25 +00:00
|
|
|
# start job and return thread handle
|
2017-10-01 12:25:52 +00:00
|
|
|
Thread.new do
|
2016-08-20 19:29:22 +00:00
|
|
|
ApplicationHandleInfo.current = 'scheduler'
|
2015-05-20 23:07:13 +00:00
|
|
|
|
2015-05-21 06:37:21 +00:00
|
|
|
logger.info "Started job thread for '#{job.name}' (#{job.method})..."
|
2015-05-20 23:07:13 +00:00
|
|
|
|
2018-01-09 16:46:25 +00:00
|
|
|
# start loop for periods equal or under 5 minutes
|
|
|
|
if job.period && job.period <= 5.minutes
|
2015-05-05 14:10:06 +00:00
|
|
|
loop do
|
2015-06-30 22:26:02 +00:00
|
|
|
_start_job(job)
|
|
|
|
job = Scheduler.lookup(id: job.id)
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit is job got deleted
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit if job is not active anymore
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job.active
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit if there is no loop period defined
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job.period
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# wait until next run
|
2013-03-05 08:38:58 +00:00
|
|
|
sleep job.period
|
|
|
|
end
|
|
|
|
else
|
2015-06-30 22:26:02 +00:00
|
|
|
_start_job(job)
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
|
|
|
job.pid = ''
|
|
|
|
job.save
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info " ...stopped thread for '#{job.method}'"
|
2015-02-16 12:41:46 +00:00
|
|
|
ActiveRecord::Base.connection.close
|
2015-05-20 23:07:13 +00:00
|
|
|
|
2018-01-09 16:46:25 +00:00
|
|
|
# release thread lock and remove thread handle
|
|
|
|
@@jobs_started.delete(job.id)
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
|
|
|
|
2016-01-05 08:48:26 +00:00
|
|
|
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
|
2017-09-11 11:16:08 +00:00
|
|
|
job.update!(
|
2017-09-07 13:00:12 +00:00
|
|
|
last_run: Time.zone.now,
|
|
|
|
pid: Thread.current.object_id,
|
|
|
|
status: 'ok',
|
|
|
|
error_message: '',
|
|
|
|
)
|
|
|
|
|
2015-07-03 15:30:05 +00:00
|
|
|
logger.info "execute #{job.method} (try_count #{try_count})..."
|
2017-11-23 08:09:44 +00:00
|
|
|
eval job.method() # rubocop:disable Security/Eval
|
2015-07-03 15:30:05 +00:00
|
|
|
rescue => e
|
|
|
|
logger.error "execute #{job.method} (try_count #{try_count}) exited with error #{e.inspect}"
|
|
|
|
|
|
|
|
# reconnect in case db connection is lost
|
2014-03-27 13:07:57 +00:00
|
|
|
begin
|
2015-07-03 15:30:05 +00:00
|
|
|
ActiveRecord::Base.connection.reconnect!
|
2014-03-27 13:07:57 +00:00
|
|
|
rescue => e
|
2015-07-03 15:30:05 +00:00
|
|
|
logger.error "Can't reconnect to database #{e.inspect}"
|
|
|
|
end
|
2014-03-27 13:07:57 +00:00
|
|
|
|
2015-07-03 15:30:05 +00:00
|
|
|
try_run_max = 10
|
|
|
|
try_count += 1
|
2014-03-27 13:07:57 +00:00
|
|
|
|
2015-07-03 15:30:05 +00:00
|
|
|
# reset error counter if to old
|
2016-01-05 08:48:26 +00:00
|
|
|
if try_run_time + (60 * 5) < Time.zone.now
|
2015-07-03 15:30:05 +00:00
|
|
|
try_count = 0
|
|
|
|
end
|
|
|
|
try_run_time = Time.zone.now
|
2014-03-27 13:07:57 +00:00
|
|
|
|
2015-07-03 15:30:05 +00:00
|
|
|
# restart job again
|
|
|
|
if try_run_max > try_count
|
2016-01-05 08:48:26 +00:00
|
|
|
_start_job(job, try_count, try_run_time)
|
2015-07-03 15:30:05 +00:00
|
|
|
else
|
2018-01-09 16:46:25 +00:00
|
|
|
# release thread lock and remove thread handle
|
|
|
|
@@jobs_started.delete(job.id)
|
2017-05-15 11:50:55 +00:00
|
|
|
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
|
|
|
|
logger.error error
|
|
|
|
|
2017-09-11 11:16:08 +00:00
|
|
|
job.update!(
|
2017-09-07 13:00:12 +00:00
|
|
|
error_message: error,
|
|
|
|
status: 'error',
|
|
|
|
active: false,
|
|
|
|
)
|
2014-03-27 13:07:57 +00:00
|
|
|
end
|
2018-01-09 16:47:43 +00:00
|
|
|
|
|
|
|
# rescue any other Exceptions that are not StandardError or childs of it
|
|
|
|
# https://stackoverflow.com/questions/10048173/why-is-it-bad-style-to-rescue-exception-e-in-ruby
|
|
|
|
# http://rubylearning.com/satishtalim/ruby_exceptions.html
|
|
|
|
rescue Exception => e # rubocop:disable Lint/RescueException
|
|
|
|
logger.error "execute #{job.method} (try_count #{try_count}) exited with a non standard-error #{e.inspect}"
|
|
|
|
raise
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
2013-08-04 21:56:02 +00:00
|
|
|
|
2016-04-26 09:30:46 +00:00
|
|
|
def self.worker(foreground = false)
|
|
|
|
|
|
|
|
# used for tests
|
|
|
|
if foreground
|
2016-08-20 19:29:22 +00:00
|
|
|
original_interface_handle = ApplicationHandleInfo.current
|
|
|
|
ApplicationHandleInfo.current = 'scheduler'
|
|
|
|
|
2016-04-26 09:30:46 +00:00
|
|
|
original_user_id = UserInfo.current_user_id
|
|
|
|
UserInfo.current_user_id = nil
|
2016-08-20 19:29:22 +00:00
|
|
|
|
2016-04-26 09:30:46 +00:00
|
|
|
loop do
|
|
|
|
success, failure = Delayed::Worker.new.work_off
|
2016-07-26 22:02:28 +00:00
|
|
|
if failure.nonzero?
|
2016-04-26 09:30:46 +00:00
|
|
|
raise "ERROR: #{failure} failed background jobs: #{Delayed::Job.where('last_error IS NOT NULL').inspect}"
|
|
|
|
end
|
2016-07-26 22:02:28 +00:00
|
|
|
break if success.zero?
|
2016-04-26 09:30:46 +00:00
|
|
|
end
|
|
|
|
UserInfo.current_user_id = original_user_id
|
2016-08-20 19:29:22 +00:00
|
|
|
ApplicationHandleInfo.current = original_interface_handle
|
2016-04-26 09:30:46 +00:00
|
|
|
return
|
|
|
|
end
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2016-04-26 09:30:46 +00:00
|
|
|
# used for production
|
|
|
|
wait = 8
|
2017-10-01 12:25:52 +00:00
|
|
|
Thread.new do
|
2015-05-20 23:07:13 +00:00
|
|
|
sleep wait
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2015-05-21 06:37:21 +00:00
|
|
|
logger.info "Starting worker thread #{Delayed::Job}"
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2015-05-20 23:07:13 +00:00
|
|
|
loop do
|
2016-08-20 19:29:22 +00:00
|
|
|
ApplicationHandleInfo.current = 'scheduler'
|
2015-05-20 23:07:13 +00:00
|
|
|
result = nil
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2015-05-20 23:07:13 +00:00
|
|
|
realtime = Benchmark.realtime do
|
2016-04-26 09:30:46 +00:00
|
|
|
logger.debug "*** worker thread, #{Delayed::Job.all.count} in queue"
|
2015-05-20 23:07:13 +00:00
|
|
|
result = Delayed::Worker.new.work_off
|
|
|
|
end
|
|
|
|
|
|
|
|
count = result.sum
|
|
|
|
|
|
|
|
if count.zero?
|
|
|
|
sleep wait
|
|
|
|
logger.debug '*** worker thread loop'
|
|
|
|
else
|
|
|
|
format "*** #{count} jobs processed at %.4f j/s, %d failed ...\n", count / realtime, result.last
|
|
|
|
end
|
2014-02-03 18:26:41 +00:00
|
|
|
end
|
2015-05-20 23:07:13 +00:00
|
|
|
|
|
|
|
logger.info ' ...stopped worker thread'
|
|
|
|
ActiveRecord::Base.connection.close
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-05-20 23:07:13 +00:00
|
|
|
|
2014-02-03 18:26:41 +00:00
|
|
|
end
|
|
|
|
|
2017-09-07 16:07:48 +00:00
|
|
|
# This function returns a list of failed jobs
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
# Scheduler.failed_jobs
|
|
|
|
#
|
|
|
|
# return [Array]
|
|
|
|
def self.failed_jobs
|
|
|
|
where(status: 'error', active: false)
|
|
|
|
end
|
|
|
|
|
2017-09-07 15:34:32 +00:00
|
|
|
# This function restarts failed jobs to retry them
|
|
|
|
#
|
|
|
|
# @example
|
|
|
|
# Scheduler.restart_failed_jobs
|
|
|
|
#
|
|
|
|
# return [true]
|
|
|
|
def self.restart_failed_jobs
|
2017-09-07 16:07:48 +00:00
|
|
|
failed_jobs.each do |job|
|
2017-09-11 11:16:08 +00:00
|
|
|
job.update!(active: true)
|
2017-09-07 15:34:32 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2013-06-12 15:59:58 +00:00
|
|
|
end
|