diff --git a/lib/signature_detection.rb b/lib/signature_detection.rb index a6ee19278..a48067548 100644 --- a/lib/signature_detection.rb +++ b/lib/signature_detection.rb @@ -21,14 +21,10 @@ returns def self.find_signature(messages) - string_list = [] - messages.each do |message| - if message[:content_type].match?(%r{text/html}i) - string_list.push message[:content].html2text(true) - next - end - string_list.push message[:content] - end + string_list = messages.map { |m| m[:content] } + .map do |c| + c.match?(%r{text/html}i) ? c.html2text(true) : c + end # hash with possible signature and count of matches in string list possible_signatures = {} diff --git a/spec/lib/signature_detection_spec.rb b/spec/lib/signature_detection_spec.rb new file mode 100644 index 000000000..34a8b32cf --- /dev/null +++ b/spec/lib/signature_detection_spec.rb @@ -0,0 +1,162 @@ +require 'rails_helper' + +RSpec.describe SignatureDetection do + describe '.find_signature' do + context 'when given an array of hashes' do + let(:messages) do + raw_message_files.map do |f| + { content: File.read(f), content_type: content_type } + end + end + + context 'with plain text messages in their :content keys (sample input 1)' do + let(:content_type) { 'text/plain' } + + let(:raw_message_files) do + [ + Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_2.txt'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_3.txt') + ] + end + + it 'returns the first 5–10-line substring they share in common' do + expect(SignatureDetection.find_signature(messages)).to eq(<<~SIG.chomp) + + Mit freundlichen Grüßen + + Bob Smith + Berechtigungen und dez. Department + ________________________________ + + Musik AG + Berechtigungen und dez. Department (ITPBM) + Kastanien 2 + SIG + end + end + + context 'with plain text messages in their :content keys (sample input 2)' do + let(:content_type) { 'text/plain' } + + let(:raw_message_files) do + [ + Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_1.txt'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_2.txt'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_3.txt') + ] + end + + it 'returns the first 5–10-line substring they share in common' do + expect(SignatureDetection.find_signature(messages)).to eq(<<~SIG.chomp) + + Freundliche Grüße + + Günter Lässig + Lokale Daten + + Music GmbH + Baustraße 123, 12345 Max City + Telefon 0123 5432114 + Telefax 0123 5432139 + SIG + end + end + + context 'with HTML messages in their :content keys' do + let(:content_type) { 'text/html' } + + let(:raw_message_files) do + [ + Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_1.html'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_2.html'), + Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_3.html') + ] + end + + it 'converts messages (via #html2text) then returns the first 5–10-line substring they share in common' do + expect(SignatureDetection.find_signature(messages)).to eq(<<~SIG.chomp) + + ChristianSmith + Technik + + Tel: +49 12 34 56 78 441 + Fax: +49 12 34 56 78 499 + Email: Christian.Smith@example.com + Web: www.example.com + ABC KFZ- und Flugzeug B.V. & Co. KG + Hauptverwaltung + SIG + end + end + end + end + + describe '.find_signature_line' do + context 'when given a plain text message' do + let(:content_type) { 'text/plain' } + let(:content) { File.read(Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt')) } + + context 'and a substring it contains' do + let(:signature) { <<~SIG.chomp } + + Mit freundlichen Grüßen + + Bob Smith + Berechtigungen und dez. Department + ________________________________ + + Musik AG + Berechtigungen und dez. Department (ITPBM) + Kastanien 2 + SIG + + it 'returns the line of the message where the signature begins' do + expect(SignatureDetection.find_signature_line(signature, content, content_type)).to eq(10) + end + end + end + + context 'when given an HTML message' do + let(:content_type) { 'text/html' } + let(:content) { File.read(Rails.root.join('test', 'data', 'email_signature_detection', 'example1.html')) } + + context 'and a substring it contains' do + let(:signature) { <<~SIG.chomp } + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Bob Smith + ABC Organisation + + EXAMPLE IT-Service GmbH + Dorten 5 F&E + 12345 Da / Germany + Phone: +49 (0) 1234 567 890 / +49 (0) 1234 567 891 + Fax:     +49 (0) 1234 567 892 + SIG + + it 'converts messages (via #html2text) then returns the line of the message where the signature begins' do + expect(SignatureDetection.find_signature_line(signature, content, content_type)).to eq(11) + end + end + end + end + + describe '.rebuild_all_articles' do + context 'when a user exists with a recorded signature' do + let!(:customer) { create(:customer_user, preferences: { signature_detection: "\nbar" }) } + + context 'and multiple articles exist for that customer' do + let!(:articles) do + [create(:ticket_article, created_by_id: customer.id, body: "foo\nfoo\nbar"), + create(:ticket_article, created_by_id: customer.id, body: "foo\nbar")] + end + + it 'updates the signature-line data of all articles' do + expect { SignatureDetection.rebuild_all_articles } + .to change { articles.first.reload.preferences[:signature_detection] }.to(3) + .and change { articles.second.reload.preferences[:signature_detection] }.to(2) + end + end + end + end +end diff --git a/spec/models/channel/email_parser_spec.rb b/spec/models/channel/email_parser_spec.rb index af8b5a163..9498865b0 100644 --- a/spec/models/channel/email_parser_spec.rb +++ b/spec/models/channel/email_parser_spec.rb @@ -316,6 +316,53 @@ RSpec.describe Channel::EmailParser, type: :model do end end + describe 'signature detection' do + let(:raw_mail) { header + File.read(message_file) } + + let(:header) { <<~HEADER } + From: Bob.Smith@music.com + To: test@zammad.org + Subject: test + + HEADER + + context 'for emails from an unrecognized email address' do + let(:message_file) { Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt') } + + it 'does not detect signatures' do + described_class.new.process({}, raw_mail) + + expect { Scheduler.worker(true) } + .to not_change { Ticket.last.customer.preferences[:signature_detection] }.from(nil) + .and not_change { Ticket.last.articles.first.preferences[:signature_detection] }.from(nil) + end + end + + context 'for emails from a previously processed sender' do + before do + described_class.new.process({}, header + File.read(previous_message_file)) + end + + let(:previous_message_file) { Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt') } + + let(:message_file) { Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_2.txt') } + + it 'sets detected signature on user (in a background job)' do + described_class.new.process({}, raw_mail) + + expect { Scheduler.worker(true) } + .to change { Ticket.last.customer.preferences[:signature_detection] } + end + + it 'sets line of detected signature on article (in a background job)' do + described_class.new.process({}, raw_mail) + + expect { Scheduler.worker(true) } + .to change { Ticket.last.articles.first.preferences[:signature_detection] }.to(20) + end + end + end + describe 'charset handling' do # see https://github.com/zammad/zammad/issues/2224 context 'when header specifies Windows-1258 charset (#2224)' do diff --git a/test/unit/email_signature_detection_test.rb b/test/unit/email_signature_detection_test.rb deleted file mode 100644 index c31aff317..000000000 --- a/test/unit/email_signature_detection_test.rb +++ /dev/null @@ -1,124 +0,0 @@ -require 'test_helper' - -class EmailSignatureDetectionTest < ActiveSupport::TestCase - - test 'test case 1 - sender a' do - message_files = [Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_2.txt'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_3.txt')] - signature_lines = [10, 20, 6] - - messages = message_files.zip(signature_lines).map do |f, l| - { content: File.read(Rails.root.join('test', 'data', f)), - content_type: 'text/plain', - line: l } - end - - signature = SignatureDetection.find_signature(messages) - expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2" - assert_equal(expected_signature, signature) - - messages.each do |m| - assert_equal(m[:line], SignatureDetection.find_signature_line(signature, m[:content], m[:content_type])) - end - end - - test 'test case 2 - sender b' do - message_files = [Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_1.txt'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_2.txt'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_b_3.txt')] - signature_lines = [26, 4, 6] - - messages = message_files.zip(signature_lines).map do |f, l| - { content: File.read(Rails.root.join('test', 'data', f)), - content_type: 'text/plain', - line: l } - end - - signature = SignatureDetection.find_signature(messages) - expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139" - assert_equal(expected_signature, signature) - - messages.each do |m| - assert_equal(m[:line], SignatureDetection.find_signature_line(signature, m[:content], m[:content_type])) - end - end - - test 'test case 3 - just tests' do - signature = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nBob Smith\nABC Organisation\n\nEXAMPLE IT-Service GmbH\nDorten 5 F&E\n12345 Da / Germany\nPhone: +49 (0) 1234 567 890 / +49 (0) 1234 567 891\nFax:     +49 (0) 1234 567 892" - message = File.read(Rails.root.join('test', 'data', 'email_signature_detection', 'example1.html')) - signature_line = SignatureDetection.find_signature_line(signature, message, 'text/html') - assert_equal(11, signature_line) - end - - test 'test case 4 - sender c' do - message_files = [Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_1.html'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_2.html'), - Rails.root.join('test', 'data', 'email_signature_detection', 'client_c_3.html')] - signature_lines = [8, 29, 6] - - messages = message_files.zip(signature_lines).map do |f, l| - { content: File.read(Rails.root.join('test', 'data', f)), - content_type: 'text/html', - line: l } - end - - signature = SignatureDetection.find_signature(messages) - expected_signature = "\nChristianSmith\nTechnik\n\nTel: +49 12 34 56 78 441\nFax: +49 12 34 56 78 499\nEmail: Christian.Smith@example.com\nWeb: www.example.com\nABC KFZ- und Flugzeug B.V. & Co. KG\nHauptverwaltung" - assert_equal(expected_signature, signature) - - messages.each do |m| - assert_equal(m[:line], SignatureDetection.find_signature_line(signature, m[:content], m[:content_type])) - end - end - - test 'test case III - sender a - full cycle' do - header = "From: Bob.Smith@music.com\nTo: test@zammad.org\nSubject: test\n\n" - - # process email I - body = File.binread(Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_1.txt')) - raw_email = header + body - ticket1, article1, user1, mail = Channel::EmailParser.new.process({}, raw_email) - assert(ticket1) - assert(article1) - Scheduler.worker(true) - - # process email II - body = File.binread(Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_2.txt')) - raw_email = header + body - ticket2, article2, user2, mail = Channel::EmailParser.new.process({}, raw_email) - assert(ticket2) - assert(article2) - Scheduler.worker(true) - - # check if user2 has a signature_detection value - user2 = User.find(user2.id) - assert(user2.preferences[:signature_detection]) - - # process email III - body = File.binread(Rails.root.join('test', 'data', 'email_signature_detection', 'client_a_3.txt')) - raw_email = header + body - ticket3, article3, user3, mail = Channel::EmailParser.new.process({}, raw_email) - assert(ticket3) - assert(article3) - Scheduler.worker(true) - - # check if article3 has a signature_detection value - article3 = Ticket::Article.find(article3.id) - assert_equal(article3.preferences[:signature_detection], 6) - - # relbuild all - SignatureDetection.rebuild_all_articles - - article1 = Ticket::Article.find(article1.id) - assert_equal(article1.preferences[:signature_detection], 10) - - article2 = Ticket::Article.find(article2.id) - assert_equal(article2.preferences[:signature_detection], 20) - - article3 = Ticket::Article.find(article3.id) - assert_equal(article3.preferences[:signature_detection], 6) - - end - -end