From 7a7d9471c9d1bc14da10d837dd323de9009bb308 Mon Sep 17 00:00:00 2001 From: Rolf Schmidt Date: Wed, 7 Oct 2015 20:42:29 +0200 Subject: [PATCH 1/2] Added proving backend to detect signatures by diff module 'diffy' to reduce redundancy in article views. --- Gemfile | 2 + Gemfile.lock | 2 + lib/signature_detection.rb | 122 ++++++++++++++++++ .../email_signature_detection/client_a_1.txt | 2 + .../email_signature_detection/client_a_2.txt | 14 ++ test/unit/email_signatur_detection_test.rb | 101 ++++++++------- 6 files changed, 198 insertions(+), 45 deletions(-) create mode 100644 lib/signature_detection.rb diff --git a/Gemfile b/Gemfile index 999424f09..41621e07f 100644 --- a/Gemfile +++ b/Gemfile @@ -62,6 +62,8 @@ gem 'browser' gem 'eventmachine' gem 'em-websocket' +gem 'diffy' + # Gems used only for develop/test and not required # in production environments by default. group :development, :test do diff --git a/Gemfile.lock b/Gemfile.lock index 51cdaa3eb..a579b5567 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -76,6 +76,7 @@ GEM delayed_job_active_record (4.1.0) activerecord (>= 3.0, < 5) delayed_job (>= 3.0, < 5) + diffy (3.0.7) docile (1.1.5) eco (1.0.0) coffee-script @@ -302,6 +303,7 @@ DEPENDENCIES coffee-script-source daemons delayed_job_active_record + diffy eco em-websocket eventmachine diff --git a/lib/signature_detection.rb b/lib/signature_detection.rb new file mode 100644 index 000000000..81f55e665 --- /dev/null +++ b/lib/signature_detection.rb @@ -0,0 +1,122 @@ +module SignatureDetection + +=begin + +try to detect the signature in list of articles for example + + signature = SignatureDetection.find_signature(string_list) + +returns + + signature = '...signature possible match...' + +=end + + def self.find_signature(string_list) + + # hash with possible signature and count of matches in string list + possible_signatures = {} + + # loop all strings in array + #for main_string_index in 0 .. string_list.length - 1 + ( 0..string_list.length - 1 ).each {|main_string_index| + break if main_string_index + 1 > string_list.length - 1 + + # loop all all strings in array except of the previous index + ( main_string_index + 1..string_list.length - 1 ).each {|second_string_index| + + # get content of string 1 + string1_content = string_list[main_string_index] + + # get content of string 2 + string2_content = string_list[second_string_index] + + # diff strings + diff_result = Diffy::Diff.new(string1_content, string2_content) + + # split diff result by new line + diff_result_array = diff_result.to_s.split("\n") + + # define start index for blocks with no difference + match_block = nil + + # loop of lines of the diff result + ( 0..diff_result_array.length - 1 ).each {|diff_string_index| + + # if no block with difference is defined then we try to find a string block without a difference + if !match_block + match_block = diff_string_index + end + + # get line of diff result with current loop inde + line = diff_result_array[diff_string_index] + + # check if the line starts with + # + = new content incoming + # - = removed content + # \ = end of file + # or if the current line is the last line of the diff result + next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1 + + # if the count of the lines without any difference is higher than 5 lines + if diff_string_index - match_block > 5 + + # define the block size without any difference + # except "-" because in this case 1 line is removed to much + match_block_total = diff_string_index + (line =~ /^(\\|\+)/i ? -1 : 0) + + # get string of possible signature + match_content = '' + ( match_block..match_block_total ).each {|match_block_index| + match_content += "#{diff_result_array[match_block_index][1..-1]}\n" + } + + # count the match of the signature in string list to rank + # the signature + possible_signatures[match_content] ||= 0 + possible_signatures[match_content] += 1 + + end + + match_block = nil + } + } + } + + # loop all possible signature by rating and return highest rating + possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score| + return content.chomp + end + + nil + end + +=begin + +this function will search for a signature string in a string (e.g. article) and return the line number of the signature start + + signature_line = SignatureDetection.find_signature_line(signature, string) + +returns + + signature_line = 123 + + or + + signature_line = nil + +=end + + def self.find_signature_line(signature, string) + + # try to find the char position of the signature + search_position = string.index(signature) + + return if search_position.nil? + + # count new lines up to signature + search_newlines = string[0..search_position].split("\n").length + 1 + + search_newlines + end +end diff --git a/test/fixtures/email_signature_detection/client_a_1.txt b/test/fixtures/email_signature_detection/client_a_1.txt index 41beee0b8..f587cdcb2 100644 --- a/test/fixtures/email_signature_detection/client_a_1.txt +++ b/test/fixtures/email_signature_detection/client_a_1.txt @@ -1,5 +1,7 @@ Hi, +123 + uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll. Das Modul Custom/Kernel/System/Ticket/Article.pm sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder? diff --git a/test/fixtures/email_signature_detection/client_a_2.txt b/test/fixtures/email_signature_detection/client_a_2.txt index 7812c7d82..95795147c 100644 --- a/test/fixtures/email_signature_detection/client_a_2.txt +++ b/test/fixtures/email_signature_detection/client_a_2.txt @@ -1,7 +1,21 @@ Hi Martin, +123 + ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen). +asd +fa +sdf +a +sdf +asd +f +as +df +asd +f + Vielen Dank! Mit freundlichen Grüßen diff --git a/test/unit/email_signatur_detection_test.rb b/test/unit/email_signatur_detection_test.rb index 46bd45953..31eb20f45 100644 --- a/test/unit/email_signatur_detection_test.rb +++ b/test/unit/email_signatur_detection_test.rb @@ -6,61 +6,72 @@ class EmailSignaturDetectionTest < ActiveSupport::TestCase test 'test case I - sender a' do # fixtures of sender a - fixture_files = [ - 'email_signature_detection/client_a_1.txt', - 'email_signature_detection/client_a_2.txt', - 'email_signature_detection/client_a_3.txt', - ] - - # detect signature - match_structure = '' - - # tests - # 'email_signature_detection/client_a_1.txt' - result_should = { - line: 9 + fixture_files = { + 'email_signature_detection/client_a_1.txt' => { line: 10 }, + 'email_signature_detection/client_a_2.txt' => { line: 20 }, + 'email_signature_detection/client_a_3.txt' => { line: 6 }, } - # 'email_signature_detection/client_a_2.txt' - result_should = { - line: 7 - } + fixture_files_string_list = [] - # 'email_signature_detection/client_a_3.txt' - result_should = { - line: 7 - } - assert(true) + fixture_files.keys.each do |filepath| + + file_content = '' + + file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r') + while (line = file.gets) + file_content += line + end + file.close + + fixture_files[filepath][:content] = file_content + fixture_files_string_list.push(file_content) + end + + signature = SignatureDetection.find_signature(fixture_files_string_list) + expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2\n12345 Hornhausen\nTel.: +49 911 6760\nFax: +49 911 85 6760\nMobil: +49 173 911\nE-Mail: Bob.Smith@music.com\nhttp://www.music.com\n\nMusik AG | Kastanien 2 | 12345 Hornhausen\nSitz der AG: Hornhausen, HRB xxxxx | USt.-ID: DE 111222333444\nVorstand: Marc Smith, Weber Huber\nAufsichtsrat: Max Mix (Vors.)" + assert_equal(expected_signature, signature) + + fixture_files.keys.each do |filepath| + expected_signature_position = fixture_files[filepath][:line] + + assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content])) + end end test 'test case II - sender b' do - # fixtures of sender a - fixture_files = [ - 'email_signature_detection/client_b_1.txt', - 'email_signature_detection/client_b_2.txt', - 'email_signature_detection/client_b_3.txt', - ] - - # detect signature - match_structure = '' - - # tests - # 'email_signature_detection/client_b_1.txt' - result_should = { - line: 27 + fixture_files = { + 'email_signature_detection/client_b_1.txt' => { line: 26 }, + 'email_signature_detection/client_b_2.txt' => { line: 4 }, + 'email_signature_detection/client_b_3.txt' => { line: 6 }, } - # 'email_signature_detection/client_b_2.txt' - result_should = { - line: 5 - } + fixture_files_string_list = [] - # 'email_signature_detection/client_b_3.txt' - result_should = { - line: 7 - } - assert(true) + fixture_files.keys.each do |filepath| + + file_content = '' + + file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r') + while (line = file.gets) + file_content += line + end + file.close + + fixture_files[filepath][:content] = file_content + fixture_files_string_list.push(file_content) + end + + signature = SignatureDetection.find_signature(fixture_files_string_list) + expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139\nE-Mail Günter.Lässig@example.com\n\nExample. Zusammen für eine bessere Welt.\n[cid:image001.png@01CE92A6.EC495B60]\n\n[cid:image002.png@01CE92A6.EC495B60]\n\n[cid:image003.png@01CE92A6.EC495B60]\n\n[cid:image004.png@01CE92A6.EC495B60]\n\n[cid:image005.jpg@01CE92A6.EC495B60]\n\n[cid:image006.png@01CE92A6.EC495B60]\n\nSitz der Gesellschaft: Max City, Amtsgericht Max City HRB Nr. 1234\nGeschäftsführer: Bob Smith\nVorsitzender des Aufsichtsrats: Alex Marx" + assert_equal(expected_signature, signature) + + fixture_files.keys.each do |filepath| + expected_signature_position = fixture_files[filepath][:line] + + assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content])) + end end end From 4029987caa7e5153df3109558c6a7390c6a0567b Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Thu, 8 Oct 2015 08:33:01 +0200 Subject: [PATCH 2/2] Detect signature for users. Added signature lookup for new customer articles. --- .../ticket_zoom/article_view.coffee | 18 +++++- .../views/ticket_zoom/article_view.jst.eco | 2 +- .../article/email_signature_detection.rb | 29 ++++++++++ config/application.rb | 1 + lib/signature_detection.rb | 56 +++++++++++++++++-- 5 files changed, 99 insertions(+), 7 deletions(-) create mode 100644 app/models/observer/ticket/article/email_signature_detection.rb diff --git a/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee b/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee index 3565dcd45..0a161c81c 100644 --- a/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee +++ b/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee @@ -108,11 +108,25 @@ class ArticleViewItem extends App.Controller return # prepare html body + signatureDetected = false if @article.content_type is 'text/html' @article['html'] = @article.body else - @article['html'] = App.Utils.textCleanup( @article.body ) - @article['html'] = App.Utils.text2html( @article.body ) + + # check if signature got detected in backend + body = @article.body + if @article.preferences && @article.preferences.signature_detection + signatureDetected = '########SIGNATURE########' + body = body.split("\n") + body.splice(@article.preferences.signature_detection, 0, signatureDetected) + body = body.join("\n") + body = App.Utils.textCleanup(body) + @article['html'] = App.Utils.text2html(body) + + if signatureDetected + @article['html'] = @article['html'].replace(signatureDetected, '') + else + @article['html'] = App.Utils.signatureIdentify( @article['html'] ) @html App.view('ticket_zoom/article_view')( ticket: @ticket diff --git a/app/assets/javascripts/app/views/ticket_zoom/article_view.jst.eco b/app/assets/javascripts/app/views/ticket_zoom/article_view.jst.eco index 80bf73eb6..714ac349f 100644 --- a/app/assets/javascripts/app/views/ticket_zoom/article_view.jst.eco +++ b/app/assets/javascripts/app/views/ticket_zoom/article_view.jst.eco @@ -41,7 +41,7 @@
- <%- App.Utils.signatureIdentify( @article.html ) %> + <%- @article.html %>
<%- @T('See more') %>
diff --git a/app/models/observer/ticket/article/email_signature_detection.rb b/app/models/observer/ticket/article/email_signature_detection.rb new file mode 100644 index 000000000..595da2296 --- /dev/null +++ b/app/models/observer/ticket/article/email_signature_detection.rb @@ -0,0 +1,29 @@ +# Copyright (C) 2012-2014 Zammad Foundation, http://zammad-foundation.org/ + +class Observer::Ticket::Article::EmailSignatureDetection < ActiveRecord::Observer + observe 'ticket::_article' + + def before_create(record) + + # return if we run import mode + return if Setting.get('import_mode') + + # if sender is not customer, do not change anything + sender = Ticket::Article::Sender.lookup( id: record.sender_id ) + return if !sender + return if sender['name'] != 'Customer' + + # set email attributes + type = Ticket::Article::Type.lookup( id: record.type_id ) + return if type['name'] != 'email' + + # user + user = User.lookup(id: record.created_by_id) + return if !user + return if !user.preferences + return if !user.preferences[:signature_detection] + + record.preferences[:signature_detection] = SignatureDetection.find_signature_line(user.preferences[:signature_detection], record.body) + + end +end diff --git a/config/application.rb b/config/application.rb index 7f77e647f..4417a999c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -30,6 +30,7 @@ module Zammad 'observer::_ticket::_article::_communicate_email', 'observer::_ticket::_article::_communicate_facebook', 'observer::_ticket::_article::_communicate_twitter', + 'observer::_ticket::_article::_email_signature_detection', 'observer::_ticket::_notification', 'observer::_ticket::_reset_new_state', 'observer::_ticket::_escalation_calculation', diff --git a/lib/signature_detection.rb b/lib/signature_detection.rb index 81f55e665..435d21066 100644 --- a/lib/signature_detection.rb +++ b/lib/signature_detection.rb @@ -18,7 +18,6 @@ returns possible_signatures = {} # loop all strings in array - #for main_string_index in 0 .. string_list.length - 1 ( 0..string_list.length - 1 ).each {|main_string_index| break if main_string_index + 1 > string_list.length - 1 @@ -76,6 +75,7 @@ returns possible_signatures[match_content] ||= 0 possible_signatures[match_content] += 1 + break end match_block = nil @@ -115,8 +115,56 @@ returns return if search_position.nil? # count new lines up to signature - search_newlines = string[0..search_position].split("\n").length + 1 - - search_newlines + string[0..search_position].split("\n").length + 1 end + +=begin + +this function will search for a signature string in all articles of a given user_id + + signature = SignatureDetection.by_user_id(user_id) + +returns + + signature = '...signature possible match...' + +=end + + def self.by_user_id(user_id) + + article_type = Ticket::Article::Type.lookup(name: 'email') + article_bodies = [] + tickets = Ticket.where(created_by_id: user_id, create_article_type_id: article_type.id).limit(10).order(id: :desc) + tickets.each {|ticket| + article = ticket.articles.first + article_bodies.push article.body + } + find_signature( article_bodies ) + end + +=begin + +rebuild signature for each user + + SignatureDetection.rebuild_all + +returns + + true/false + +=end + + def self.rebuild_all + + User.select('id').where(active: true).each {|local_user| + signature_detection = by_user_id(local_user.id) + next if !signature_detection + user = User.find(local_user.id) + next if user.preferences[:signature_detection] == signature_detection + user.preferences[:signature_detection] = signature_detection + user.save + } + true + end + end