2014-02-03 19:23:00 +00:00
|
|
|
# Copyright (C) 2012-2014 Zammad Foundation, http://zammad-foundation.org/
|
2015-05-04 18:58:28 +00:00
|
|
|
# rubocop:disable Rails/Output
|
2013-03-05 08:38:58 +00:00
|
|
|
class Scheduler < ApplicationModel
|
|
|
|
|
2014-02-03 18:26:41 +00:00
|
|
|
def self.run( runner, runner_count )
|
2013-03-05 08:38:58 +00:00
|
|
|
|
|
|
|
Thread.abort_on_exception = true
|
2013-06-12 15:59:58 +00:00
|
|
|
|
2013-05-07 20:45:00 +00:00
|
|
|
jobs_started = {}
|
2013-03-05 08:38:58 +00:00
|
|
|
while true
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info "Scheduler running (runner #{runner} of #{runner_count})..."
|
2013-05-07 20:45:00 +00:00
|
|
|
|
2014-03-27 13:07:57 +00:00
|
|
|
# reconnect in case db connection is lost
|
|
|
|
begin
|
|
|
|
ActiveRecord::Base.connection.reconnect!
|
|
|
|
rescue => e
|
2015-05-04 18:58:28 +00:00
|
|
|
logger.error "Can't reconnect to database #{ e.inspect }"
|
2014-03-27 13:07:57 +00:00
|
|
|
end
|
|
|
|
|
2013-05-07 20:45:00 +00:00
|
|
|
# read/load jobs and check if it is alredy started
|
2014-02-03 18:26:41 +00:00
|
|
|
jobs = Scheduler.where( 'active = ? AND prio = ?', true, runner )
|
2013-05-07 20:45:00 +00:00
|
|
|
jobs.each {|job|
|
|
|
|
next if jobs_started[ job.id ]
|
|
|
|
jobs_started[ job.id ] = true
|
2014-02-03 18:26:41 +00:00
|
|
|
self.start_job( job, runner, runner_count )
|
2013-05-07 20:45:00 +00:00
|
|
|
}
|
2014-03-27 13:07:57 +00:00
|
|
|
sleep 90
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-03 18:26:41 +00:00
|
|
|
def self.start_job( job, runner, runner_count )
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info "started job thread for '#{job.name}' (#{job.method})..."
|
2013-05-07 20:45:00 +00:00
|
|
|
sleep 4
|
2013-03-05 08:38:58 +00:00
|
|
|
|
|
|
|
Thread.new {
|
|
|
|
if job.period
|
|
|
|
while true
|
2014-02-03 18:26:41 +00:00
|
|
|
self._start_job( job, runner, runner_count )
|
2015-04-27 13:42:53 +00:00
|
|
|
job = Scheduler.lookup( id: job.id )
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit is job got deleted
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit if job is not active anymore
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job.active
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# exit if there is no loop period defined
|
2013-03-05 08:38:58 +00:00
|
|
|
break if !job.period
|
2013-05-07 20:45:00 +00:00
|
|
|
|
|
|
|
# wait until next run
|
2013-03-05 08:38:58 +00:00
|
|
|
sleep job.period
|
|
|
|
end
|
|
|
|
else
|
2014-02-03 18:26:41 +00:00
|
|
|
self._start_job( job, runner, runner_count )
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
2013-06-12 15:59:58 +00:00
|
|
|
# raise "Exception from thread"
|
2013-03-05 08:38:58 +00:00
|
|
|
job.pid = ''
|
|
|
|
job.save
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info " ...stopped thread for '#{job.method}'"
|
2015-02-16 12:41:46 +00:00
|
|
|
ActiveRecord::Base.connection.close
|
2013-03-05 08:38:58 +00:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2014-03-27 13:07:57 +00:00
|
|
|
def self._start_job( job, runner, runner_count, try_count = 0, try_run_time = Time.now )
|
|
|
|
sleep 5
|
|
|
|
begin
|
|
|
|
job.last_run = Time.now
|
|
|
|
job.pid = Thread.current.object_id
|
|
|
|
job.save
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count})..."
|
2014-03-27 13:07:57 +00:00
|
|
|
eval job.method()
|
|
|
|
rescue => e
|
2015-05-04 18:58:28 +00:00
|
|
|
logger.error "execute #{job.method} (runner #{runner} of #{runner_count}, try_count #{try_count}) exited with error #{ e.inspect }"
|
2014-03-27 13:07:57 +00:00
|
|
|
|
|
|
|
# reconnect in case db connection is lost
|
|
|
|
begin
|
|
|
|
ActiveRecord::Base.connection.reconnect!
|
|
|
|
rescue => e
|
2015-05-04 18:58:28 +00:00
|
|
|
logger.error "Can't reconnect to database #{ e.inspect }"
|
2014-03-27 13:07:57 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
try_run_max = 10
|
|
|
|
try_count += 1
|
|
|
|
|
|
|
|
# reset error counter if to old
|
|
|
|
if try_run_time + ( 60 * 5 ) < Time.now
|
|
|
|
try_count = 0
|
2014-10-26 12:17:00 +00:00
|
|
|
end
|
2014-03-27 13:07:57 +00:00
|
|
|
try_run_time = Time.now
|
|
|
|
|
|
|
|
# restart job again
|
|
|
|
if try_run_max > try_count
|
|
|
|
self._start_job( job, runner, runner_count, try_count, try_run_time)
|
|
|
|
else
|
|
|
|
raise "STOP thread for #{job.method} (runner #{runner} of #{runner_count} after #{try_count} tries"
|
|
|
|
end
|
|
|
|
end
|
2013-03-05 08:38:58 +00:00
|
|
|
end
|
2013-08-04 21:56:02 +00:00
|
|
|
|
2014-02-03 18:26:41 +00:00
|
|
|
def self.worker
|
2014-02-03 19:23:00 +00:00
|
|
|
wait = 10
|
2014-10-26 12:17:00 +00:00
|
|
|
logger.info "*** Starting worker #{Delayed::Job.to_s}"
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2014-02-03 19:23:00 +00:00
|
|
|
loop do
|
|
|
|
result = nil
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2014-02-03 19:23:00 +00:00
|
|
|
realtime = Benchmark.realtime do
|
|
|
|
result = Delayed::Worker.new.work_off
|
|
|
|
end
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2014-02-03 19:23:00 +00:00
|
|
|
count = result.sum
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2014-02-03 19:23:00 +00:00
|
|
|
break if $exit
|
2014-02-03 18:26:41 +00:00
|
|
|
|
2014-02-03 19:23:00 +00:00
|
|
|
if count.zero?
|
|
|
|
sleep(wait)
|
2015-04-27 13:20:16 +00:00
|
|
|
logger.info '*** worker loop'
|
2014-02-03 19:23:00 +00:00
|
|
|
else
|
2015-05-04 14:37:53 +00:00
|
|
|
format "*** #{count} jobs processed at %.4f j/s, %d failed ...\n", count / realtime, result.last
|
2014-02-03 18:26:41 +00:00
|
|
|
end
|
2014-02-03 19:23:00 +00:00
|
|
|
end
|
2014-02-03 18:26:41 +00:00
|
|
|
end
|
|
|
|
|
2013-08-04 21:56:02 +00:00
|
|
|
def self.check( name, time_warning = 10, time_critical = 20 )
|
|
|
|
time_warning_time = Time.now - time_warning.minutes
|
|
|
|
time_critical_time = Time.now - time_critical.minutes
|
2015-04-27 13:42:53 +00:00
|
|
|
scheduler = Scheduler.where( name: name ).first
|
2013-08-04 21:56:02 +00:00
|
|
|
if !scheduler
|
|
|
|
puts "CRITICAL - no such scheduler jobs '#{name}'"
|
|
|
|
return true
|
|
|
|
end
|
2015-05-04 18:58:28 +00:00
|
|
|
logger.debug scheduler.inspect
|
2013-08-04 21:56:02 +00:00
|
|
|
if !scheduler.last_run
|
|
|
|
puts "CRITICAL - scheduler jobs never started '#{name}'"
|
|
|
|
exit 2
|
|
|
|
end
|
|
|
|
if scheduler.last_run < time_critical_time
|
|
|
|
puts "CRITICAL - scheduler jobs was not running in last '#{time_critical.to_s}' minutes - last run at '#{scheduler.last_run.to_s}' '#{name}'"
|
|
|
|
exit 2
|
|
|
|
end
|
|
|
|
if scheduler.last_run < time_warning_time
|
|
|
|
puts "CRITICAL - scheduler jobs was not running in last '#{time_warning.to_s}' minutes - last run at '#{scheduler.last_run.to_s}' '#{name}'"
|
|
|
|
exit 2
|
|
|
|
end
|
|
|
|
puts "ok - scheduler jobs was running at '#{scheduler.last_run.to_s}' '#{name}'"
|
|
|
|
exit 0
|
|
|
|
end
|
2013-06-12 15:59:58 +00:00
|
|
|
end
|