Merge branch 'private-pull-request-1118' into develop
This commit is contained in:
commit
0ef0066c31
10 changed files with 160 additions and 10 deletions
|
@ -4,6 +4,7 @@ class Index extends App.ControllerSubContent
|
||||||
events:
|
events:
|
||||||
'click .js-resetToken': 'resetToken'
|
'click .js-resetToken': 'resetToken'
|
||||||
'click .js-select': 'selectAll'
|
'click .js-select': 'selectAll'
|
||||||
|
'click .js-restartFailedJobs': 'restartFailedJobs'
|
||||||
|
|
||||||
constructor: ->
|
constructor: ->
|
||||||
super
|
super
|
||||||
|
@ -42,4 +43,14 @@ class Index extends App.ControllerSubContent
|
||||||
@load()
|
@load()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
restartFailedJobs: (e) =>
|
||||||
|
e.preventDefault()
|
||||||
|
@ajax(
|
||||||
|
id: 'restart_failed_jobs_request'
|
||||||
|
type: 'POST'
|
||||||
|
url: "#{@apiPath}/monitoring/restart_failed_jobs"
|
||||||
|
success: (data) =>
|
||||||
|
@load()
|
||||||
|
)
|
||||||
|
|
||||||
App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin')
|
App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin')
|
||||||
|
|
|
@ -34,6 +34,9 @@
|
||||||
<% end %>
|
<% end %>
|
||||||
<% end %>
|
<% end %>
|
||||||
</ul>
|
</ul>
|
||||||
|
<% if _.contains(@data.actions, 'restart_failed_jobs'): %>
|
||||||
|
<button class="btn btn--primary js-restartFailedJobs"><%- @T('Restart failed jobs') %></button>
|
||||||
|
<% end %>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
|
@ -30,6 +30,7 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
||||||
token_or_permission_check
|
token_or_permission_check
|
||||||
|
|
||||||
issues = []
|
issues = []
|
||||||
|
actions = Set.new
|
||||||
|
|
||||||
# channel check
|
# channel check
|
||||||
last_run_tolerance = Time.zone.now - 1.hour
|
last_run_tolerance = Time.zone.now - 1.hour
|
||||||
|
@ -81,6 +82,11 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
||||||
issues.push 'scheduler not running'
|
issues.push 'scheduler not running'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Scheduler.failed_jobs.each do |job|
|
||||||
|
issues.push "Failed to run scheduled job '#{job.name}'. Cause: #{job.error_message}"
|
||||||
|
actions.add(:restart_failed_jobs)
|
||||||
|
end
|
||||||
|
|
||||||
token = Setting.get('monitoring_token')
|
token = Setting.get('monitoring_token')
|
||||||
|
|
||||||
if issues.empty?
|
if issues.empty?
|
||||||
|
@ -97,6 +103,7 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
|
||||||
healthy: false,
|
healthy: false,
|
||||||
message: issues.join(';'),
|
message: issues.join(';'),
|
||||||
issues: issues,
|
issues: issues,
|
||||||
|
actions: actions,
|
||||||
token: token,
|
token: token,
|
||||||
}
|
}
|
||||||
render json: result
|
render json: result
|
||||||
|
@ -173,6 +180,14 @@ curl http://localhost/api/v1/monitoring/status?token=XXX
|
||||||
render json: result, status: :created
|
render json: result, status: :created
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def restart_failed_jobs
|
||||||
|
access_check
|
||||||
|
|
||||||
|
Scheduler.restart_failed_jobs
|
||||||
|
|
||||||
|
render json: {}, status: :ok
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def token_or_permission_check
|
def token_or_permission_check
|
||||||
|
|
|
@ -169,9 +169,13 @@ class Scheduler < ApplicationModel
|
||||||
end
|
end
|
||||||
|
|
||||||
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
|
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
|
||||||
job.last_run = Time.zone.now
|
job.update(
|
||||||
job.pid = Thread.current.object_id
|
last_run: Time.zone.now,
|
||||||
job.save
|
pid: Thread.current.object_id,
|
||||||
|
status: 'ok',
|
||||||
|
error_message: '',
|
||||||
|
)
|
||||||
|
|
||||||
logger.info "execute #{job.method} (try_count #{try_count})..."
|
logger.info "execute #{job.method} (try_count #{try_count})..."
|
||||||
eval job.method() # rubocop:disable Lint/Eval
|
eval job.method() # rubocop:disable Lint/Eval
|
||||||
rescue => e
|
rescue => e
|
||||||
|
@ -197,7 +201,15 @@ class Scheduler < ApplicationModel
|
||||||
if try_run_max > try_count
|
if try_run_max > try_count
|
||||||
_start_job(job, try_count, try_run_time)
|
_start_job(job, try_count, try_run_time)
|
||||||
else
|
else
|
||||||
raise "STOP thread for #{job.method} after #{try_count} tries (#{e.inspect})"
|
@@jobs_started[ job.id ] = false
|
||||||
|
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
|
||||||
|
logger.error error
|
||||||
|
|
||||||
|
job.update(
|
||||||
|
error_message: error,
|
||||||
|
status: 'error',
|
||||||
|
active: false,
|
||||||
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -255,4 +267,28 @@ class Scheduler < ApplicationModel
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# This function returns a list of failed jobs
|
||||||
|
#
|
||||||
|
# @example
|
||||||
|
# Scheduler.failed_jobs
|
||||||
|
#
|
||||||
|
# return [Array]
|
||||||
|
def self.failed_jobs
|
||||||
|
where(status: 'error', active: false)
|
||||||
|
end
|
||||||
|
|
||||||
|
# This function restarts failed jobs to retry them
|
||||||
|
#
|
||||||
|
# @example
|
||||||
|
# Scheduler.restart_failed_jobs
|
||||||
|
#
|
||||||
|
# return [true]
|
||||||
|
def self.restart_failed_jobs
|
||||||
|
failed_jobs.each do |job|
|
||||||
|
job.update(active: true)
|
||||||
|
end
|
||||||
|
|
||||||
|
true
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -4,5 +4,6 @@ Zammad::Application.routes.draw do
|
||||||
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
|
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
|
||||||
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
|
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
|
||||||
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
|
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
|
||||||
|
match api_path + '/monitoring/restart_failed_jobs', to: 'monitoring#restart_failed_jobs', via: :post
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -514,6 +514,8 @@ class CreateBase < ActiveRecord::Migration
|
||||||
t.integer :prio, null: false
|
t.integer :prio, null: false
|
||||||
t.string :pid, limit: 250, null: true
|
t.string :pid, limit: 250, null: true
|
||||||
t.string :note, limit: 250, null: true
|
t.string :note, limit: 250, null: true
|
||||||
|
t.string :error_message, null: true
|
||||||
|
t.string :status, null: true
|
||||||
t.boolean :active, null: false, default: false
|
t.boolean :active, null: false, default: false
|
||||||
t.integer :updated_by_id, null: false
|
t.integer :updated_by_id, null: false
|
||||||
t.integer :created_by_id, null: false
|
t.integer :created_by_id, null: false
|
||||||
|
|
12
db/migrate/20170515000001_scheduler_status.rb
Normal file
12
db/migrate/20170515000001_scheduler_status.rb
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
class SchedulerStatus < ActiveRecord::Migration
|
||||||
|
def up
|
||||||
|
|
||||||
|
# return if it's a new setup
|
||||||
|
return if !Setting.find_by(name: 'system_init_done')
|
||||||
|
|
||||||
|
change_table :schedulers do |t|
|
||||||
|
t.string :error_message, null: true
|
||||||
|
t.string :status, null: true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
25
spec/factories/scheduler.rb
Normal file
25
spec/factories/scheduler.rb
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
FactoryGirl.define do
|
||||||
|
sequence :test_scheduler_name do |n|
|
||||||
|
"Testscheduler#{n}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
FactoryGirl.define do
|
||||||
|
|
||||||
|
factory :scheduler do
|
||||||
|
name { generate(:test_scheduler_name) }
|
||||||
|
last_run { Time.zone.now }
|
||||||
|
pid 1337
|
||||||
|
prio 1
|
||||||
|
status 'ok'
|
||||||
|
active true
|
||||||
|
period { 10.minutes }
|
||||||
|
running false
|
||||||
|
note 'test'
|
||||||
|
updated_by_id 1
|
||||||
|
created_by_id 1
|
||||||
|
created_at 1
|
||||||
|
updated_at 1
|
||||||
|
add_attribute(:method) { 'test' }
|
||||||
|
end
|
||||||
|
end
|
|
@ -26,6 +26,45 @@ RSpec.describe Scheduler do
|
||||||
SpecSpace.send(:remove_const, :DelayedJobBackend)
|
SpecSpace.send(:remove_const, :DelayedJobBackend)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe '.failed_jobs' do
|
||||||
|
|
||||||
|
it 'does list failed jobs' do
|
||||||
|
job = create(:scheduler, status: 'error', active: false)
|
||||||
|
failed_list = described_class.failed_jobs
|
||||||
|
expect(failed_list).to be_present
|
||||||
|
expect(failed_list).to include(job)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '.restart_failed_jobs' do
|
||||||
|
|
||||||
|
it 'does restart failed jobs' do
|
||||||
|
job = create(:scheduler, status: 'error', active: false)
|
||||||
|
described_class.restart_failed_jobs
|
||||||
|
job.reload
|
||||||
|
expect(job.active).to be true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '._start_job' do
|
||||||
|
|
||||||
|
it 'sets error status/message for failed jobs' do
|
||||||
|
job = create(:scheduler)
|
||||||
|
described_class._start_job(job)
|
||||||
|
expect(job.status).to eq 'error'
|
||||||
|
expect(job.active).to be false
|
||||||
|
expect(job.error_message).to be_present
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'executes job that is expected to succeed' do
|
||||||
|
expect(Setting).to receive(:reload)
|
||||||
|
job = create(:scheduler, method: 'Setting.reload')
|
||||||
|
described_class._start_job(job)
|
||||||
|
expect(job.status).to eq 'ok'
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe '.cleanup' do
|
describe '.cleanup' do
|
||||||
|
|
||||||
it 'gets called by .threads' do
|
it 'gets called by .threads' do
|
||||||
|
|
|
@ -391,4 +391,10 @@ class MonitoringControllerTest < ActionDispatch::IntegrationTest
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test '09 check restart_failed_jobs' do
|
||||||
|
credentials = ActionController::HttpAuthentication::Basic.encode_credentials('monitoring-admin@example.com', 'adminpw')
|
||||||
|
post '/api/v1/monitoring/restart_failed_jobs', {}, @headers.merge('Authorization' => credentials)
|
||||||
|
assert_response(200)
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue