Merge branch 'private-pull-request-1118' into develop
This commit is contained in:
commit
0ef0066c31
10 changed files with 160 additions and 10 deletions
|
@ -4,6 +4,7 @@ class Index extends App.ControllerSubContent
|
|||
events:
|
||||
'click .js-resetToken': 'resetToken'
|
||||
'click .js-select': 'selectAll'
|
||||
'click .js-restartFailedJobs': 'restartFailedJobs'
|
||||
|
||||
constructor: ->
|
||||
super
|
||||
|
@ -42,4 +43,14 @@ class Index extends App.ControllerSubContent
|
|||
@load()
|
||||
)
|
||||
|
||||
restartFailedJobs: (e) =>
|
||||
e.preventDefault()
|
||||
@ajax(
|
||||
id: 'restart_failed_jobs_request'
|
||||
type: 'POST'
|
||||
url: "#{@apiPath}/monitoring/restart_failed_jobs"
|
||||
success: (data) =>
|
||||
@load()
|
||||
)
|
||||
|
||||
App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin')
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
<% end %>
|
||||
<% end %>
|
||||
</ul>
|
||||
<% if _.contains(@data.actions, 'restart_failed_jobs'): %>
|
||||
<button class="btn btn--primary js-restartFailedJobs"><%- @T('Restart failed jobs') %></button>
|
||||
<% end %>
|
||||
</div>
|
||||
|
||||
</div>
|
|
@ -30,6 +30,7 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
|||
token_or_permission_check
|
||||
|
||||
issues = []
|
||||
actions = Set.new
|
||||
|
||||
# channel check
|
||||
last_run_tolerance = Time.zone.now - 1.hour
|
||||
|
@ -81,6 +82,11 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
|||
issues.push 'scheduler not running'
|
||||
end
|
||||
|
||||
Scheduler.failed_jobs.each do |job|
|
||||
issues.push "Failed to run scheduled job '#{job.name}'. Cause: #{job.error_message}"
|
||||
actions.add(:restart_failed_jobs)
|
||||
end
|
||||
|
||||
token = Setting.get('monitoring_token')
|
||||
|
||||
if issues.empty?
|
||||
|
@ -97,6 +103,7 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
|||
healthy: false,
|
||||
message: issues.join(';'),
|
||||
issues: issues,
|
||||
actions: actions,
|
||||
token: token,
|
||||
}
|
||||
render json: result
|
||||
|
@ -173,6 +180,14 @@ curl http://localhost/api/v1/monitoring/status?token=XXX
|
|||
render json: result, status: :created
|
||||
end
|
||||
|
||||
def restart_failed_jobs
|
||||
access_check
|
||||
|
||||
Scheduler.restart_failed_jobs
|
||||
|
||||
render json: {}, status: :ok
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def token_or_permission_check
|
||||
|
|
|
@ -169,9 +169,13 @@ class Scheduler < ApplicationModel
|
|||
end
|
||||
|
||||
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
|
||||
job.last_run = Time.zone.now
|
||||
job.pid = Thread.current.object_id
|
||||
job.save
|
||||
job.update(
|
||||
last_run: Time.zone.now,
|
||||
pid: Thread.current.object_id,
|
||||
status: 'ok',
|
||||
error_message: '',
|
||||
)
|
||||
|
||||
logger.info "execute #{job.method} (try_count #{try_count})..."
|
||||
eval job.method() # rubocop:disable Lint/Eval
|
||||
rescue => e
|
||||
|
@ -197,7 +201,15 @@ class Scheduler < ApplicationModel
|
|||
if try_run_max > try_count
|
||||
_start_job(job, try_count, try_run_time)
|
||||
else
|
||||
raise "STOP thread for #{job.method} after #{try_count} tries (#{e.inspect})"
|
||||
@@jobs_started[ job.id ] = false
|
||||
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
|
||||
logger.error error
|
||||
|
||||
job.update(
|
||||
error_message: error,
|
||||
status: 'error',
|
||||
active: false,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -255,4 +267,28 @@ class Scheduler < ApplicationModel
|
|||
|
||||
end
|
||||
|
||||
# This function returns a list of failed jobs
|
||||
#
|
||||
# @example
|
||||
# Scheduler.failed_jobs
|
||||
#
|
||||
# return [Array]
|
||||
def self.failed_jobs
|
||||
where(status: 'error', active: false)
|
||||
end
|
||||
|
||||
# This function restarts failed jobs to retry them
|
||||
#
|
||||
# @example
|
||||
# Scheduler.restart_failed_jobs
|
||||
#
|
||||
# return [true]
|
||||
def self.restart_failed_jobs
|
||||
failed_jobs.each do |job|
|
||||
job.update(active: true)
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -4,5 +4,6 @@ Zammad::Application.routes.draw do
|
|||
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
|
||||
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
|
||||
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
|
||||
match api_path + '/monitoring/restart_failed_jobs', to: 'monitoring#restart_failed_jobs', via: :post
|
||||
|
||||
end
|
||||
|
|
|
@ -514,6 +514,8 @@ class CreateBase < ActiveRecord::Migration
|
|||
t.integer :prio, null: false
|
||||
t.string :pid, limit: 250, null: true
|
||||
t.string :note, limit: 250, null: true
|
||||
t.string :error_message, null: true
|
||||
t.string :status, null: true
|
||||
t.boolean :active, null: false, default: false
|
||||
t.integer :updated_by_id, null: false
|
||||
t.integer :created_by_id, null: false
|
||||
|
|
12
db/migrate/20170515000001_scheduler_status.rb
Normal file
12
db/migrate/20170515000001_scheduler_status.rb
Normal file
|
@ -0,0 +1,12 @@
|
|||
class SchedulerStatus < ActiveRecord::Migration
|
||||
def up
|
||||
|
||||
# return if it's a new setup
|
||||
return if !Setting.find_by(name: 'system_init_done')
|
||||
|
||||
change_table :schedulers do |t|
|
||||
t.string :error_message, null: true
|
||||
t.string :status, null: true
|
||||
end
|
||||
end
|
||||
end
|
25
spec/factories/scheduler.rb
Normal file
25
spec/factories/scheduler.rb
Normal file
|
@ -0,0 +1,25 @@
|
|||
FactoryGirl.define do
|
||||
sequence :test_scheduler_name do |n|
|
||||
"Testscheduler#{n}"
|
||||
end
|
||||
end
|
||||
|
||||
FactoryGirl.define do
|
||||
|
||||
factory :scheduler do
|
||||
name { generate(:test_scheduler_name) }
|
||||
last_run { Time.zone.now }
|
||||
pid 1337
|
||||
prio 1
|
||||
status 'ok'
|
||||
active true
|
||||
period { 10.minutes }
|
||||
running false
|
||||
note 'test'
|
||||
updated_by_id 1
|
||||
created_by_id 1
|
||||
created_at 1
|
||||
updated_at 1
|
||||
add_attribute(:method) { 'test' }
|
||||
end
|
||||
end
|
|
@ -26,6 +26,45 @@ RSpec.describe Scheduler do
|
|||
SpecSpace.send(:remove_const, :DelayedJobBackend)
|
||||
end
|
||||
|
||||
describe '.failed_jobs' do
|
||||
|
||||
it 'does list failed jobs' do
|
||||
job = create(:scheduler, status: 'error', active: false)
|
||||
failed_list = described_class.failed_jobs
|
||||
expect(failed_list).to be_present
|
||||
expect(failed_list).to include(job)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
describe '.restart_failed_jobs' do
|
||||
|
||||
it 'does restart failed jobs' do
|
||||
job = create(:scheduler, status: 'error', active: false)
|
||||
described_class.restart_failed_jobs
|
||||
job.reload
|
||||
expect(job.active).to be true
|
||||
end
|
||||
end
|
||||
|
||||
describe '._start_job' do
|
||||
|
||||
it 'sets error status/message for failed jobs' do
|
||||
job = create(:scheduler)
|
||||
described_class._start_job(job)
|
||||
expect(job.status).to eq 'error'
|
||||
expect(job.active).to be false
|
||||
expect(job.error_message).to be_present
|
||||
end
|
||||
|
||||
it 'executes job that is expected to succeed' do
|
||||
expect(Setting).to receive(:reload)
|
||||
job = create(:scheduler, method: 'Setting.reload')
|
||||
described_class._start_job(job)
|
||||
expect(job.status).to eq 'ok'
|
||||
end
|
||||
end
|
||||
|
||||
describe '.cleanup' do
|
||||
|
||||
it 'gets called by .threads' do
|
||||
|
|
|
@ -391,4 +391,10 @@ class MonitoringControllerTest < ActionDispatch::IntegrationTest
|
|||
|
||||
end
|
||||
|
||||
test '09 check restart_failed_jobs' do
|
||||
credentials = ActionController::HttpAuthentication::Basic.encode_credentials('monitoring-admin@example.com', 'adminpw')
|
||||
post '/api/v1/monitoring/restart_failed_jobs', {}, @headers.merge('Authorization' => credentials)
|
||||
assert_response(200)
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue