Merge branch 'private-pull-request-1118' into develop

This commit is contained in:
Rolf Schmidt 2017-09-08 09:33:06 +02:00
commit 0ef0066c31
10 changed files with 160 additions and 10 deletions

View file

@ -4,6 +4,7 @@ class Index extends App.ControllerSubContent
events:
'click .js-resetToken': 'resetToken'
'click .js-select': 'selectAll'
'click .js-restartFailedJobs': 'restartFailedJobs'
constructor: ->
super
@ -42,4 +43,14 @@ class Index extends App.ControllerSubContent
@load()
)
restartFailedJobs: (e) =>
e.preventDefault()
@ajax(
id: 'restart_failed_jobs_request'
type: 'POST'
url: "#{@apiPath}/monitoring/restart_failed_jobs"
success: (data) =>
@load()
)
App.Config.set('Monitoring', { prio: 3600, name: 'Monitoring', parent: '#system', target: '#system/monitoring', controller: Index, permission: ['admin.monitoring'] }, 'NavBarAdmin')

View file

@ -34,6 +34,9 @@
<% end %>
<% end %>
</ul>
<% if _.contains(@data.actions, 'restart_failed_jobs'): %>
<button class="btn btn--primary js-restartFailedJobs"><%- @T('Restart failed jobs') %></button>
<% end %>
</div>
</div>
</div>

View file

@ -30,6 +30,7 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
token_or_permission_check
issues = []
actions = Set.new
# channel check
last_run_tolerance = Time.zone.now - 1.hour
@ -81,6 +82,11 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
issues.push 'scheduler not running'
end
Scheduler.failed_jobs.each do |job|
issues.push "Failed to run scheduled job '#{job.name}'. Cause: #{job.error_message}"
actions.add(:restart_failed_jobs)
end
token = Setting.get('monitoring_token')
if issues.empty?
@ -96,8 +102,9 @@ curl http://localhost/api/v1/monitoring/health_check?token=XXX
result = {
healthy: false,
message: issues.join(';'),
issues: issues,
token: token,
issues: issues,
actions: actions,
token: token,
}
render json: result
end
@ -173,6 +180,14 @@ curl http://localhost/api/v1/monitoring/status?token=XXX
render json: result, status: :created
end
def restart_failed_jobs
access_check
Scheduler.restart_failed_jobs
render json: {}, status: :ok
end
private
def token_or_permission_check

View file

@ -169,9 +169,13 @@ class Scheduler < ApplicationModel
end
def self._start_job(job, try_count = 0, try_run_time = Time.zone.now)
job.last_run = Time.zone.now
job.pid = Thread.current.object_id
job.save
job.update(
last_run: Time.zone.now,
pid: Thread.current.object_id,
status: 'ok',
error_message: '',
)
logger.info "execute #{job.method} (try_count #{try_count})..."
eval job.method() # rubocop:disable Lint/Eval
rescue => e
@ -197,7 +201,15 @@ class Scheduler < ApplicationModel
if try_run_max > try_count
_start_job(job, try_count, try_run_time)
else
raise "STOP thread for #{job.method} after #{try_count} tries (#{e.inspect})"
@@jobs_started[ job.id ] = false
error = "Failed to run #{job.method} after #{try_count} tries #{e.inspect}"
logger.error error
job.update(
error_message: error,
status: 'error',
active: false,
)
end
end
@ -255,4 +267,28 @@ class Scheduler < ApplicationModel
end
# This function returns a list of failed jobs
#
# @example
# Scheduler.failed_jobs
#
# return [Array]
def self.failed_jobs
where(status: 'error', active: false)
end
# This function restarts failed jobs to retry them
#
# @example
# Scheduler.restart_failed_jobs
#
# return [true]
def self.restart_failed_jobs
failed_jobs.each do |job|
job.update(active: true)
end
true
end
end

View file

@ -1,8 +1,9 @@
Zammad::Application.routes.draw do
api_path = Rails.configuration.api_path
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
match api_path + '/monitoring/health_check', to: 'monitoring#health_check', via: :get
match api_path + '/monitoring/status', to: 'monitoring#status', via: :get
match api_path + '/monitoring/token', to: 'monitoring#token', via: :post
match api_path + '/monitoring/restart_failed_jobs', to: 'monitoring#restart_failed_jobs', via: :post
end

View file

@ -514,6 +514,8 @@ class CreateBase < ActiveRecord::Migration
t.integer :prio, null: false
t.string :pid, limit: 250, null: true
t.string :note, limit: 250, null: true
t.string :error_message, null: true
t.string :status, null: true
t.boolean :active, null: false, default: false
t.integer :updated_by_id, null: false
t.integer :created_by_id, null: false

View file

@ -0,0 +1,12 @@
class SchedulerStatus < ActiveRecord::Migration
def up
# return if it's a new setup
return if !Setting.find_by(name: 'system_init_done')
change_table :schedulers do |t|
t.string :error_message, null: true
t.string :status, null: true
end
end
end

View file

@ -0,0 +1,25 @@
FactoryGirl.define do
sequence :test_scheduler_name do |n|
"Testscheduler#{n}"
end
end
FactoryGirl.define do
factory :scheduler do
name { generate(:test_scheduler_name) }
last_run { Time.zone.now }
pid 1337
prio 1
status 'ok'
active true
period { 10.minutes }
running false
note 'test'
updated_by_id 1
created_by_id 1
created_at 1
updated_at 1
add_attribute(:method) { 'test' }
end
end

View file

@ -26,6 +26,45 @@ RSpec.describe Scheduler do
SpecSpace.send(:remove_const, :DelayedJobBackend)
end
describe '.failed_jobs' do
it 'does list failed jobs' do
job = create(:scheduler, status: 'error', active: false)
failed_list = described_class.failed_jobs
expect(failed_list).to be_present
expect(failed_list).to include(job)
end
end
describe '.restart_failed_jobs' do
it 'does restart failed jobs' do
job = create(:scheduler, status: 'error', active: false)
described_class.restart_failed_jobs
job.reload
expect(job.active).to be true
end
end
describe '._start_job' do
it 'sets error status/message for failed jobs' do
job = create(:scheduler)
described_class._start_job(job)
expect(job.status).to eq 'error'
expect(job.active).to be false
expect(job.error_message).to be_present
end
it 'executes job that is expected to succeed' do
expect(Setting).to receive(:reload)
job = create(:scheduler, method: 'Setting.reload')
described_class._start_job(job)
expect(job.status).to eq 'ok'
end
end
describe '.cleanup' do
it 'gets called by .threads' do

View file

@ -391,4 +391,10 @@ class MonitoringControllerTest < ActionDispatch::IntegrationTest
end
test '09 check restart_failed_jobs' do
credentials = ActionController::HttpAuthentication::Basic.encode_credentials('monitoring-admin@example.com', 'adminpw')
post '/api/v1/monitoring/restart_failed_jobs', {}, @headers.merge('Authorization' => credentials)
assert_response(200)
end
end