Fixes #2014 - Import job (LDAP/Exchange) hangs on scheduler restart.
This commit is contained in:
parent
163a5f2f26
commit
c4ea22470a
3 changed files with 132 additions and 48 deletions
|
@ -5,6 +5,8 @@ class ImportJob < ApplicationModel
|
||||||
store :payload
|
store :payload
|
||||||
store :result
|
store :result
|
||||||
|
|
||||||
|
scope :running, -> { where(finished_at: nil, dry_run: false).where.not(started_at: nil) }
|
||||||
|
|
||||||
# Starts the import backend class based on the name attribute.
|
# Starts the import backend class based on the name attribute.
|
||||||
# Import backend class is initialized with the current instance.
|
# Import backend class is initialized with the current instance.
|
||||||
# Logs the start and end time (if ended successfully) and logs
|
# Logs the start and end time (if ended successfully) and logs
|
||||||
|
|
|
@ -121,9 +121,24 @@ class Scheduler < ApplicationModel
|
||||||
raise 'This method should only get called when Scheduler.threads are initialized. Use `force: true` to start anyway.'
|
raise 'This method should only get called when Scheduler.threads are initialized. Use `force: true` to start anyway.'
|
||||||
end
|
end
|
||||||
|
|
||||||
log_start_finish(:info, 'Cleanup of left over locked delayed jobs') do
|
start_time = Time.zone.now
|
||||||
|
|
||||||
Delayed::Job.all.each do |job|
|
cleanup_delayed_jobs(start_time)
|
||||||
|
cleanup_import_jobs(start_time)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Checks for locked delayed jobs and tries to reschedule or destroy each of them.
|
||||||
|
#
|
||||||
|
# @param [ActiveSupport::TimeWithZone] after the time the cleanup was started
|
||||||
|
#
|
||||||
|
# @example
|
||||||
|
# Scheduler.cleanup_delayed_jobs(TimeZone.now)
|
||||||
|
#
|
||||||
|
# return [nil]
|
||||||
|
def self.cleanup_delayed_jobs(after)
|
||||||
|
log_start_finish(:info, "Cleanup of left over locked delayed jobs #{after}") do
|
||||||
|
|
||||||
|
Delayed::Job.where('updated_at < ?', after).where.not(locked_at: nil).each do |job|
|
||||||
log_start_finish(:info, "Checking left over delayed job #{job.inspect}") do
|
log_start_finish(:info, "Checking left over delayed job #{job.inspect}") do
|
||||||
cleanup_delayed(job)
|
cleanup_delayed(job)
|
||||||
end
|
end
|
||||||
|
@ -131,13 +146,13 @@ class Scheduler < ApplicationModel
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Checks if the given job can be rescheduled or destroys it. Logs the action as warn.
|
# Checks if the given delayed job can be rescheduled or destroys it. Logs the action as warn.
|
||||||
# Works only for locked jobs. Jobs that are not locked are ignored and
|
# Works only for locked delayed jobs. Delayed jobs that are not locked are ignored and
|
||||||
# should get destroyed directly.
|
# should get destroyed directly.
|
||||||
# Checks the delayed job object for a method called .reschedule?. The memthod is called
|
# Checks the Delayed::Job instance for a method called .reschedule?. The method is called
|
||||||
# with the delayed job as a parameter. The result value is expected as a Boolean. If the
|
# with the Delayed::Job instance as a parameter. The result value is expected to be a Boolean.
|
||||||
# result is true the lock gets removed and the delayed job gets rescheduled. If the return
|
# If the result is true the lock gets removed and the delayed job gets rescheduled.
|
||||||
# value is false it will get destroyed which is the default behaviour.
|
# If the return value is false it will get destroyed which is the default behaviour.
|
||||||
#
|
#
|
||||||
# @param [Delayed::Job] job the job that should get checked for destroying/rescheduling.
|
# @param [Delayed::Job] job the job that should get checked for destroying/rescheduling.
|
||||||
#
|
#
|
||||||
|
@ -181,6 +196,33 @@ class Scheduler < ApplicationModel
|
||||||
logger.warn "#{action} locked delayed job: #{job_name}"
|
logger.warn "#{action} locked delayed job: #{job_name}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Checks for killed import jobs and marks them as finished and adds a note.
|
||||||
|
#
|
||||||
|
# @param [ActiveSupport::TimeWithZone] after the time the cleanup was started
|
||||||
|
#
|
||||||
|
# @example
|
||||||
|
# Scheduler.cleanup_import_jobs(TimeZone.now)
|
||||||
|
#
|
||||||
|
# return [nil]
|
||||||
|
def self.cleanup_import_jobs(after)
|
||||||
|
log_start_finish(:info, "Cleanup of left over import jobs #{after}") do
|
||||||
|
error = 'Interrupted by scheduler restart. Please restart manually or wait till next execution time.'.freeze
|
||||||
|
|
||||||
|
# we need to exclude jobs that were updated at or since we started
|
||||||
|
# cleaning up (via the #reschedule? call) because they might
|
||||||
|
# were started `.delay`-ed and are flagged for restart
|
||||||
|
ImportJob.running.where('updated_at < ?', after).each do |job|
|
||||||
|
|
||||||
|
job.update!(
|
||||||
|
finished_at: after,
|
||||||
|
result: {
|
||||||
|
error: error
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def self.start_job(job)
|
def self.start_job(job)
|
||||||
|
|
||||||
# start job and return thread handle
|
# start job and return thread handle
|
||||||
|
|
|
@ -99,56 +99,24 @@ RSpec.describe Scheduler do
|
||||||
described_class.cleanup
|
described_class.cleanup
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'keeps unlocked Delayed::Job-s' do
|
context 'Delayed::Job' do
|
||||||
# meta :)
|
|
||||||
described_class.delay.cleanup
|
|
||||||
|
|
||||||
expect do
|
it 'keeps unlocked' do
|
||||||
simulate_threads_call
|
|
||||||
end.not_to change {
|
|
||||||
Delayed::Job.count
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
context 'locked Delayed::Job' do
|
|
||||||
|
|
||||||
it 'gets destroyed' do
|
|
||||||
# meta :)
|
# meta :)
|
||||||
described_class.delay.cleanup
|
described_class.delay.cleanup
|
||||||
|
|
||||||
# lock job (simluates interrupted scheduler task)
|
|
||||||
locked_job = Delayed::Job.last
|
|
||||||
locked_job.update!(locked_at: Time.zone.now)
|
|
||||||
|
|
||||||
expect do
|
expect do
|
||||||
simulate_threads_call
|
simulate_threads_call
|
||||||
end.to change {
|
end.not_to change {
|
||||||
Delayed::Job.count
|
Delayed::Job.count
|
||||||
}.by(-1)
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
context 'respond to reschedule?' do
|
context 'locked' do
|
||||||
|
|
||||||
it 'gets rescheduled for positive responses' do
|
it 'gets destroyed' do
|
||||||
SpecSpace::DelayedJobBackend.reschedule = true
|
# meta :)
|
||||||
SpecSpace::DelayedJobBackend.delay.start
|
described_class.delay.cleanup
|
||||||
|
|
||||||
# lock job (simluates interrupted scheduler task)
|
|
||||||
locked_job = Delayed::Job.last
|
|
||||||
locked_job.update!(locked_at: Time.zone.now)
|
|
||||||
|
|
||||||
expect do
|
|
||||||
simulate_threads_call
|
|
||||||
end.to not_change {
|
|
||||||
Delayed::Job.count
|
|
||||||
}.and change {
|
|
||||||
Delayed::Job.last.locked_at
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'gets destroyed for negative responses' do
|
|
||||||
SpecSpace::DelayedJobBackend.reschedule = false
|
|
||||||
SpecSpace::DelayedJobBackend.delay.start
|
|
||||||
|
|
||||||
# lock job (simluates interrupted scheduler task)
|
# lock job (simluates interrupted scheduler task)
|
||||||
locked_job = Delayed::Job.last
|
locked_job = Delayed::Job.last
|
||||||
|
@ -160,6 +128,78 @@ RSpec.describe Scheduler do
|
||||||
Delayed::Job.count
|
Delayed::Job.count
|
||||||
}.by(-1)
|
}.by(-1)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'respond to reschedule?' do
|
||||||
|
|
||||||
|
it 'gets rescheduled for positive responses' do
|
||||||
|
SpecSpace::DelayedJobBackend.reschedule = true
|
||||||
|
SpecSpace::DelayedJobBackend.delay.start
|
||||||
|
|
||||||
|
# lock job (simluates interrupted scheduler task)
|
||||||
|
locked_job = Delayed::Job.last
|
||||||
|
locked_job.update!(locked_at: Time.zone.now)
|
||||||
|
|
||||||
|
expect do
|
||||||
|
simulate_threads_call
|
||||||
|
end.to not_change {
|
||||||
|
Delayed::Job.count
|
||||||
|
}.and change {
|
||||||
|
Delayed::Job.last.locked_at
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'gets destroyed for negative responses' do
|
||||||
|
SpecSpace::DelayedJobBackend.reschedule = false
|
||||||
|
SpecSpace::DelayedJobBackend.delay.start
|
||||||
|
|
||||||
|
# lock job (simluates interrupted scheduler task)
|
||||||
|
locked_job = Delayed::Job.last
|
||||||
|
locked_job.update!(locked_at: Time.zone.now)
|
||||||
|
|
||||||
|
expect do
|
||||||
|
simulate_threads_call
|
||||||
|
end.to change {
|
||||||
|
Delayed::Job.count
|
||||||
|
}.by(-1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'ImportJob' do
|
||||||
|
|
||||||
|
context 'affected job' do
|
||||||
|
|
||||||
|
let(:job) { create(:import_job, started_at: 5.minutes.ago) }
|
||||||
|
|
||||||
|
it 'finishes stuck jobs' do
|
||||||
|
|
||||||
|
expect do
|
||||||
|
simulate_threads_call
|
||||||
|
end.to change {
|
||||||
|
job.reload.finished_at
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'adds an error message to the result' do
|
||||||
|
|
||||||
|
expect do
|
||||||
|
simulate_threads_call
|
||||||
|
end.to change {
|
||||||
|
job.reload.result[:error]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't change jobs added after stop" do
|
||||||
|
|
||||||
|
job = create(:import_job)
|
||||||
|
|
||||||
|
expect do
|
||||||
|
simulate_threads_call
|
||||||
|
end.not_to change {
|
||||||
|
job.reload
|
||||||
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue