No longer raise exception when scheduler job fails to run after ten tries, but record the error instead.

This commit is contained in:
Maarten Raaphorst 2017-05-15 13:50:55 +02:00 committed by Maarten Raaphorst
parent 2e062693ea
commit e40772f7b7
6 changed files with 69 additions and 7 deletions

View file

@ -4,6 +4,7 @@ class Index extends App.ControllerSubContent
events: events:
'click .js-resetToken': 'resetToken' 'click .js-resetToken': 'resetToken'
'click .js-select': 'selectAll' 'click .js-select': 'selectAll'
'click .js-restartDeadJobs': 'restartDeadJobs'
constructor: -> constructor: ->
super super
@ -29,7 +30,10 @@ class Index extends App.ControllerSubContent
) )
render: => render: =>
@html App.view('monitoring')(data: @data) @html App.view('monitoring')(
data: @data
job_restart_count: @job_restart_count
)
resetToken: (e) => resetToken: (e) =>
e.preventDefault() e.preventDefault()
@ -42,4 +46,15 @@ class Index extends App.ControllerSubContent
@load() @load()
) )
restartDeadJobs: (e) =>
e.preventDefault()
@ajax(
id: 'restart_dead_jobs_request'
type: 'POST'
url: "#{@apiPath}/monitoring/restart_dead_jobs"
success: (data) =>
@job_restart_count = data.job_restart_count
@render()
)
App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin') App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin')

View file

@ -34,6 +34,15 @@
<% end %> <% end %>
<% end %> <% end %>
</ul> </ul>
<% if !_.isEmpty(@data.issues): %>
<button class="btn btn--primary js-restartDeadJobs"><%- @T('Restart Dead Jobs') %></button>
<% if !_.isUndefined(@job_restart_count): %>
<p>
<%- @T('Detected %s dead job(s) available for restart', @job_restart_count) %>
<%- ', restarting...' if @job_restart_count > 0 %>
</p>
<% end %>
<% end %>
</div> </div>
</div> </div>

View file

@ -81,6 +81,10 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
issues.push 'scheduler not running' issues.push 'scheduler not running'
end end
Scheduler.where(status: 'error').each { |scheduler|
issues.push "Failed to run scheduled job \'#{scheduler.name}\'. Cause: #{scheduler.error_message}"
}
token = Setting.get('monitoring_token') token = Setting.get('monitoring_token')
if issues.empty? if issues.empty?
@ -173,6 +177,22 @@ curl http://localhost/api/v1/monitoring/status?token=XXX
render json: result, status: :created render json: result, status: :created
end end
def restart_dead_jobs
access_check
count = 0
Scheduler.where(status: 'error').where(active: false).each { |scheduler|
scheduler.active = true
scheduler.save
count += 1
}
result = {
job_restart_count: count
}
render json: result
end
private private
def token_or_permission_check def token_or_permission_check

View file

@ -171,6 +171,8 @@ class Scheduler < ApplicationModel
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now) def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
job.last_run = Time.zone.now job.last_run = Time.zone.now
job.pid = Thread.current.object_id job.pid = Thread.current.object_id
job.status = 'ok'
job.error_message = ''
job.save job.save
logger.info "execute #{job.method} (try_count #{try_count})..." logger.info "execute #{job.method} (try_count #{try_count})..."
eval job.method() # rubocop:disable Lint/Eval eval job.method() # rubocop:disable Lint/Eval
@ -197,7 +199,14 @@ class Scheduler < ApplicationModel
if try_run_max > try_count if try_run_max > try_count
_start_job(job, try_count, try_run_time) _start_job(job, try_count, try_run_time)
else else
raise "STOP thread for #{job.method} after #{try_count} tries (#{e.inspect})" @@jobs_started[ job.id ] = false
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
logger.error error
job.error_message = error
job.status = 'error'
job.active = false
job.save
end end
end end

View file

@ -4,5 +4,6 @@ Zammad::Application.routes.draw do
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
match api_path + '/monitoring/restart_dead_jobs', to: 'monitoring#restart_dead_jobs', via: :post
end end

View file

@ -0,0 +1,8 @@
class SchedulerStatus < ActiveRecord::Migration
def up
change_table :schedulers do |t|
t.string :error_message
t.string :status
end
end
end