From ca141bbb9be33f9cbf67c680689a0f5bc528eb40 Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Tue, 28 Jun 2016 22:49:38 +0200 Subject: [PATCH] Enabled signature detection for html emails. --- .../ticket_zoom/article_view.coffee | 19 +- app/models/transaction/signature_detection.rb | 8 +- lib/core_ext/string.rb | 82 +++++++- lib/signature_detection.rb | 49 ++++- .../email_signature_detection/client_c_1.html | 155 +++++++++++++++ .../email_signature_detection/client_c_2.html | 182 ++++++++++++++++++ .../email_signature_detection/client_c_3.html | 167 ++++++++++++++++ test/unit/aaa_string_test.rb | 82 ++++++++ test/unit/email_signatur_detection_test.rb | 75 ++++---- 9 files changed, 768 insertions(+), 51 deletions(-) create mode 100644 test/fixtures/email_signature_detection/client_c_1.html create mode 100644 test/fixtures/email_signature_detection/client_c_2.html create mode 100644 test/fixtures/email_signature_detection/client_c_3.html diff --git a/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee b/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee index 45b3b160e..174ede8cc 100644 --- a/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee +++ b/app/assets/javascripts/app/controllers/ticket_zoom/article_view.coffee @@ -91,10 +91,14 @@ class ArticleViewItem extends App.ObserverController # prepare html body if article.content_type is 'text/html' - if article.sender.name is 'Agent' - article['html'] = App.Utils.signatureIdentify(article.body, false, true) - else - article['html'] = App.Utils.signatureIdentify(article.body) + body = article.body + if article.preferences && article.preferences.signature_detection + signatureDetected = '' + body = body.replace(signatureDetected, '') + body = body.split('
') + body.splice(article.preferences.signature_detection, 0, signatureDetected) + body = body.join('
') + article['html'] = body else # client signature detection @@ -182,13 +186,16 @@ class ArticleViewItem extends App.ObserverController bubbleOvervlowContainer.css('opacity', '') # remember offset of "see more" - offsetTop = bubbleContent.find('.js-signatureMarker').position() + signatureMarker = bubbleContent.find('.js-signatureMarker') + if !signatureMarker.get(0) + signatureMarker = bubbleContent.find('div [data-signature=true]') + offsetTop = signatureMarker.position() # safari - workaround # in safari somethimes the marker is directly on top via .top and inspector but it isn't # in this case use the next element if offsetTop && offsetTop.top is 0 - offsetTop = bubbleContent.find('.js-signatureMarker').next('div, p').position() + offsetTop = signatureMarker.next('div, p, br').position() # remember bubble heigth heigth = bubbleContent.height() diff --git a/app/models/transaction/signature_detection.rb b/app/models/transaction/signature_detection.rb index 349ee34e6..58f6dec78 100644 --- a/app/models/transaction/signature_detection.rb +++ b/app/models/transaction/signature_detection.rb @@ -44,7 +44,7 @@ class Transaction::SignatureDetection type = Ticket::Article::Type.lookup(id: article.type_id) return if type['name'] != 'email' - # add queue job to update current signature of user id + # update current signature of user id SignatureDetection.rebuild_user(article.created_by_id) # user @@ -52,7 +52,11 @@ class Transaction::SignatureDetection return if !user return if !user.preferences return if !user.preferences[:signature_detection] - article.preferences[:signature_detection] = SignatureDetection.find_signature_line(user.preferences[:signature_detection], article.body) + article.preferences[:signature_detection] = SignatureDetection.find_signature_line( + user.preferences[:signature_detection], + article.body, + article.content_type, + ) article.save end diff --git a/lib/core_ext/string.rb b/lib/core_ext/string.rb index bdbcdd3cc..f2e5f4505 100644 --- a/lib/core_ext/string.rb +++ b/lib/core_ext/string.rb @@ -287,13 +287,93 @@ class String =end - def html2html_strict + def html2html_strict(force = false) string = html2text(true, true) + string.signature_identify(force) string = string.text2html string.gsub!(%r{######LINKEXT:(.+?)/TEXT:(.+?)######}, '\2') string.gsub!(/######LINKRAW:(.+?)######/, '\1') + marker_template = '' + string.sub!(/######SIGNATURE_MARKER######/, marker_template) + string.gsub!(/######SIGNATURE_MARKER######/, '') string.gsub!(/######(.+?)######/, '<\1>') string.chomp end + def signature_identify(force = false) + string = self + + # if we do have less then 10 lines and less then 300 chars ignore this + if !force + lines = string.split("\n") + return if lines.count < 10 && string.length < 300 + end + + marker = '######SIGNATURE_MARKER######' + + # search for signature seperator "--\n" + string.sub!(/^\s{0,2}--\s{0,2}$/) { |placeholder| + placeholder = "#{marker}#{placeholder}" + } + + map = {} + # Apple Mail + # On 01/04/15 10:55, Bob Smith wrote: + map['apple-en'] = '^(On)[[:space:]].{6,20}[[:space:]].{3,10}[[:space:]].{1,250}[[:space:]](wrote):' + + # Am 03.04.2015 um 20:58 schrieb Martin Edenhofer : + map['apple-de'] = '^(Am)[[:space:]].{6,20}[[:space:]](um)[[:space:]].{3,10}[[:space:]](schrieb)[[:space:]].{1,250}:' + + # Thunderbird + # Am 04.03.2015 um 12:47 schrieb Alf Aardvark: + map['thunderbird-de'] = '^(Am)[[:space:]].{6,20}[[:space:]](um)[[:space:]].{3,10}[[:space:]](schrieb)[[:space:]].{1,250}:' + + # Thunderbird default - http://kb.mozillazine.org/Reply_header_settings + # On 01-01-2007 11:00 AM, Alf Aardvark wrote: + map['thunderbird-en-default'] = '^(On)[[:space:]].{6,20}[[:space:]].{3,10},[[:space:]].{1,250}(wrote):' + + # http://kb.mozillazine.org/Reply_header_settings + # Alf Aardvark wrote, on 01-01-2007 11:00 AM: + map['thunderbird-en'] = '^.{1,250}[[:space:]](wrote),[[:space:]]on[[:space:]].{3,20}:' + + # otrs + # 25.02.2015 10:26 - edv hotline wrote: + # 25.02.2015 10:26 - edv hotline schrieb: + map['otrs-en-de'] = '^.{6,10}[[:space:]].{3,10}[[:space:]]-[[:space:]].{1,250}[[:space:]](wrote|schrieb):' + + # Ms + # rubocop:disable Style/AsciiComments + # From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc] + # Send: Donnerstag, 2. April 2015 10:00 + # To/Cc/Bcc: xxx + # Subject: xxx + # - or - + # From: xxx + # To/Cc/Bcc: xxx + # Date: 01.04.2015 12:41 + # Subject: xxx + # - or - + # De : xxx + # À/?/?: xxx + # Envoyé : mercredi 29 avril 2015 17:31 + # Objet : xxx + # rubocop:enable Style/AsciiComments + + # en/de/fr | sometimes ms adds a space to "xx : value" + map['ms-en-de-fr_from'] = '^(From|Von|De)( ?):[[:space:]].+?' + map['ms-en-de-fr_from_html'] = "\n######b######(From|Von|De)([[:space:]]?):([[:space:]]?)(######\/b######)[[:space:]].+?" + + # word 14 + # edv hotline wrote: + # edv hotline schrieb: + #map['word-en-de'] = "[^#{marker}].{1,250}\s(wrote|schrieb):" + + map.each {|_key, regexp| + string.sub!(/#{regexp}/) { |placeholder| + placeholder = "#{marker}#{placeholder}" + } + } + + string + end end diff --git a/lib/signature_detection.rb b/lib/signature_detection.rb index 9b4187a9c..570a9ab72 100644 --- a/lib/signature_detection.rb +++ b/lib/signature_detection.rb @@ -4,7 +4,14 @@ module SignatureDetection try to detect the signature in list of articles for example - signature = SignatureDetection.find_signature(string_list) + messages = [ + { + content: 'some content', + content_type: 'text/plain', + }, + ] + + signature = SignatureDetection.find_signature(messages) returns @@ -12,13 +19,22 @@ returns =end - def self.find_signature(string_list) + def self.find_signature(messages) + + string_list = [] + messages.each {|message| + if message[:content_type] =~ %r{text/html}i + string_list.push message[:content].html2text(true) + next + end + string_list.push message[:content] + } # hash with possible signature and count of matches in string list possible_signatures = {} # loop all strings in array - ( 0..string_list.length - 1 ).each {|main_string_index| + string_list.each_with_index { |_main_string, main_string_index| break if main_string_index + 1 > string_list.length - 1 # loop all all strings in array except of the previous index @@ -95,7 +111,7 @@ returns this function will search for a signature string in a string (e.g. article) and return the line number of the signature start - signature_line = SignatureDetection.find_signature_line(signature, string) + signature_line = SignatureDetection.find_signature_line(signature, message, content_type) returns @@ -107,7 +123,11 @@ returns =end - def self.find_signature_line(signature, string) + def self.find_signature_line(signature, string, content_type) + + if content_type =~ %r{text/html}i + string = string.html2text(true) + end # try to find the char position of the signature search_position = string.index(signature) @@ -133,12 +153,20 @@ returns def self.by_user_id(user_id) type = Ticket::Article::Type.lookup(name: 'email') sender = Ticket::Article::Sender.lookup(name: 'Customer') + tickets = Ticket.where( + created_by_id: user_id, + create_article_type_id: type.id, + create_article_sender_id: sender.id + ).limit(5).order(id: :desc) article_bodies = [] - tickets = Ticket.where(created_by_id: user_id, create_article_type_id: type.id, create_article_sender_id: sender.id).limit(5).order(id: :desc) tickets.each {|ticket| article = ticket.articles.first next if !article - article_bodies.push article.body + data = { + content: article.body, + content_type: article.content_type, + } + article_bodies.push data } find_signature(article_bodies) @@ -157,7 +185,6 @@ returns =end def self.rebuild_all_user - User.select('id').where(active: true).order(id: :desc).each {|local_user| rebuild_user(local_user.id) } @@ -209,7 +236,11 @@ returns user = User.find(article.created_by_id) next if !user.preferences[:signature_detection] - signature_line = find_signature_line(user.preferences[:signature_detection], article.body) + signature_line = find_signature_line( + user.preferences[:signature_detection], + article.body, + article.content_type, + ) next if !signature_line next if article.preferences[:signature_detection] == signature_line diff --git a/test/fixtures/email_signature_detection/client_c_1.html b/test/fixtures/email_signature_detection/client_c_1.html new file mode 100644 index 000000000..3c9df5d76 --- /dev/null +++ b/test/fixtures/email_signature_detection/client_c_1.html @@ -0,0 +1,155 @@ + + + + + + + + + +

+

+
+

Guten Abend Herr Smith,

+

 

+

die Test-Instanz steht bereit. Sie ist ein Klon der Produktiv-Instanz.

+

FQDN:                  999sv3902-Test.ad.org-unit.de

+

IPv4:                     10.45.0.140

+

Die Anmeldedaten sind mit denen der Produktiv-Instanz identisch.

+

Alle POP3-Abrufe habe ich entfernt und das Senden von e-mails deaktiviert.

+

 

+
+
+
+

+

+ + + + + + + + + + + +
+

+ + + + + + + + + +
Christian +Smith
Technik
+

+

 

+
+ + + + + + + + + + + + + + + + + + + +
Tel:+49 12 34 56 78 441
Fax:+49 12 34 56 78 499
Email: +Christian.Smith@example.com
Web:www.example.com
+
ABC KFZ- und Flugzeug B.V. & Co. KG
+Hauptverwaltung
+Ost Straße 2
+12345 Somewhere
+
+

+ +

+ + + +
+

+ +


+ABC KFZ- und Flugzeug B.V. &  Co. KG
+
Sitz: Zuhause, HARA 123 Stern/Tief
+phG: ABC Beteiligungs B.V.
+Wo: Dorten, Kammer van What 1234567
+Auch noch: Zuhause, HRB ABC Stern/Tief
+Geschäftsführer: André Bob / Gery Hauer
+
+

+ + \ No newline at end of file diff --git a/test/fixtures/email_signature_detection/client_c_2.html b/test/fixtures/email_signature_detection/client_c_2.html new file mode 100644 index 000000000..f3856a651 --- /dev/null +++ b/test/fixtures/email_signature_detection/client_c_2.html @@ -0,0 +1,182 @@ + + + + + + + + + +

+

+
+

Guten Tag Herr Smith,

+

 

+

die folgenden Gruppen wurden erstellt und die Benutzer entsprechend der Anfrage als Mitlgieder hinzugeügt.

+

org.local:

+

DistinguishedName

+

-----------------

+

CN=sec-xyz-R-Leiter-ABCD---Debitor,OU=ABC,OU=X,DC=org,DC=local

+

CN=sec-xyz-R-Leiter-ABCD---Hauptbuch,OU=ABC,OU=X,DC=org,DC=local

+

CN=sec-xyz-R-Leiter-ABCD---Kreditor,OU=ABC,OU=X,DC=org,DC=local

+

CN=sec-xyz-R-MA-ABCD---Stammdaten,OU=ABC,OU=X,DC=org,DC=local

+

 

+

ad.org-unit.de:

+

DistinguishedName

+

-----------------

+

CN=sec-xyz-R-Leiter-ABCD---Debitor,OU=Z,OU=Gruppen,OU=Service,DC=ad,DC=org-unit,DC=de

+

CN=sec-xyz-R-Leiter-ABCD---Hauptbuch,OU=Z,OU=Gruppen,OU=Service,DC=ad,DC=org-unit,DC=de

+

CN=sec-xyz-R-Leiter-ABCD---Kreditor,OU=Z,OU=Gruppen,OU=Service,DC=ad,DC=org-unit,DC=de

+

CN=sec-xyz-R-MA-ABCD---Stammdaten,OU=Z,OU=Gruppen,OU=Service,DC=ad,DC=org-unit,DC=de

+

 

+

example.local:

+

DistinguishedName

+

-----------------

+

CN=sec-xyz-R-Leiter-ABCD---Debitor,OU=ABC,OU=Admin-Gruppen,OU=Service-Bereich,DC=example,DC=local

+

CN=sec-xyz-R-Leiter-ABCD---Hauptbuch,OU=ABC,OU=Admin-Gruppen,OU=Service-Bereich,DC=example,DC=local

+

CN=sec-xyz-R-Leiter-ABCD---Kreditor,OU=ABC,OU=Admin-Gruppen,OU=Service-Bereich,DC=example,DC=local

+

CN=sec-xyz-R-MA-ABCD---Stammdaten,OU=ABC,OU=Admin-Gruppen,OU=Service-Bereich,DC=example,DC=local

+

 

+
+

Mit freundlichen Grüßen

+
+

 

+
+
+
+

+

+ + + + + + + + + + + +
+

+ + + + + + + + + +
Christian +Smith
Technik
+

+

 

+
+ + + + + + + + + + + + + + + + + + + +
Tel:+49 12 34 56 78 441
Fax:+49 12 34 56 78 499
Email: +Christian.Smith@example.com
Web:www.example.com
+
ABC KFZ- und Flugzeug B.V. & Co. KG
+Hauptverwaltung
+Ost Straße 2
+12345 Somewhere
+
+

+ +

+ + + +
+

+ +


+ABC KFZ- und Flugzeug B.V. &  Co. KG
+
Sitz: Zuhause, HARA 123 Stern/Tief
+phG: ABC Beteiligungs B.V.
+Wo: Dorten, Kammer van What 1234567
+Auch noch: Zuhause, HRB ABC Stern/Tief
+Geschäftsführer: André Bob / Gery Hauer
+
+

+ + \ No newline at end of file diff --git a/test/fixtures/email_signature_detection/client_c_3.html b/test/fixtures/email_signature_detection/client_c_3.html new file mode 100644 index 000000000..a2b4b1201 --- /dev/null +++ b/test/fixtures/email_signature_detection/client_c_3.html @@ -0,0 +1,167 @@ + + + + + + + + + +

+

+
+


+
+
+Sehr geehrte Damen und Herren,
+
+ich bin z.Zt. nicht per E-Mail erreichbar.
+In dringenden Fällen wenden Sie sich bitte an it-support@example.de
+Ihre E-Mail wird nicht weitergeleitet.

+
+
+

+

+ + + + + + + + + + + +
+

+ + + + + + + + + +
Christian +Smith
Technik
+

+

 

+
+ + + + + + + + + + + + + + + + + + + +
Tel:+49 12 34 56 78 441
Fax:+49 12 34 56 78 499
Email: +Christian.Smith@example.com
Web:www.example.com
+
ABC KFZ- und Flugzeug B.V. & Co. KG
+Hauptverwaltung
+Ost Straße 2
+12345 Somewhere
+Tel.: +001 1234 0000 0
+Fax: +001 1234 0000 99
+
+

+ +

+ + + +
+

+ +


+ABC KFZ- und Flugzeug B.V. &  Co. KG
+
Sitz: Zuhause, HARA 123 Stern/Tief
+phG: ABC Beteiligungs B.V.
+Wo: Dorten, Kammer van What 1234567
+Auch noch: Zuhause, HRB ABC Stern/Tief
+Geschäftsführer: André Bob / Gery Hauer
+
+

+


+
+

+
  +

+

+

+

+

+

+

+

+

+

+

+

+

+

+ + diff --git a/test/unit/aaa_string_test.rb b/test/unit/aaa_string_test.rb index 540ef82e7..9b1c3ec84 100644 --- a/test/unit/aaa_string_test.rb +++ b/test/unit/aaa_string_test.rb @@ -574,4 +574,86 @@ Men-----------------------' end + test 'signature_identify function' do + marker_template = '######SIGNATURE_MARKER######' + + source = 'test' + result = 'test' + assert_equal(result, source.signature_identify(true)) + + source = "test\n--\nend" + result = "test\n#{marker_template}--\nend" + assert_equal(result, source.signature_identify(true)) + + source = "On 01/04/15 10:55, Bob Smith wrote:" + result = "#{marker_template}On 01/04/15 10:55, Bob Smith wrote:" + assert_equal(result, source.signature_identify(true)) + + source = "Am 03.04.2015 um 20:58 schrieb Martin Edenhofer :" + result = "#{marker_template}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer :" + assert_equal(result, source.signature_identify(true)) + + source = "\ntest 123 \n1\n2\n3\n4\n5\n6\n7\n8\n9\n--\nBob Smith\n" + result = "\ntest 123 \n1\n2\n3\n4\n5\n6\n7\n8\n9\n#{marker_template}--\nBob Smith\n" + assert_equal(result, source.signature_identify(true)) + + source = "test 123 \n--no not match--\n--\nBob Smith\n" + result = "test 123 \n--no not match--\n#{marker_template}--\nBob Smith\n" + assert_equal(result, source.signature_identify(true)) + + source = "test 123 \n--no not match--\n -- \nBob Smith\n" + result = "test 123 \n--no not match--\n#{marker_template} -- \nBob Smith\n" + assert_equal(result, source.signature_identify(true)) + + source = "test 123 \n\n--\nBob Smith\n\n\n\n\n--\nBob Smith\n" + result = "test 123 \n#{marker_template}\n--\nBob Smith\n\n\n\n\n--\nBob Smith\n" + assert_equal(result, source.signature_identify(true)) + + source = "test 123\ntest 123\n--\nBob Smith\n" + result = "test 123\ntest 123\n#{marker_template}--\nBob Smith\n" + assert_equal(result, source.signature_identify(true)) + + source = "test 123\ntest 123\n--\nBob Smith\n\n" + result = "test 123\ntest 123\n#{marker_template}--\nBob Smith\n\n" + assert_equal(result, source.signature_identify(true)) + + # apple + # en + source = "test 123 \n--no not match--\nBob Smith\nOn 01/04/15 10:55, Bob Smith wrote:\nlalala\n--\nsome test" + result = "test 123 \n--no not match--\nBob Smith\n#{marker_template}On 01/04/15 10:55, Bob Smith wrote:\nlalala\n#{marker_template}--\nsome test" + assert_equal(result, source.signature_identify(true)) + + # de + source = "test 123 \n\n--no not match--\n\nBob Smith\nAm 03.04.2015 um 20:58 schrieb Bob Smith :\nlalala" + result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}Am 03.04.2015 um 20:58 schrieb Bob Smith :\nlalala" + assert_equal(result, source.signature_identify(true)) + + # ms + # en + source = "test 123 \n\n--no not match--\n\nBob Smith\nFrom: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nSent: Donnerstag, 2. April 2015 10:00\nlalala" + result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nSent: Donnerstag, 2. April 2015 10:00\nlalala" + assert_equal(result, source.signature_identify(true)) + + # de + source = "test 123 \n\n--no not match--\n\nBob Smith\nVon: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nGesendet: Donnerstag, 2. April 2015 10:00\nBetreff: lalala\n" + result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nGesendet: Donnerstag, 2. April 2015 10:00\nBetreff: lalala\n" + assert_equal(result, source.signature_identify(true)) + + # fr + source = "\ntest 123 \n\n--no not match--\n\nBob Smith\nDe : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nEnvoyé : mercredi 29 avril 2015 17:31\nObjet : lalala\n" + result = "\ntest 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nEnvoyé : mercredi 29 avril 2015 17:31\nObjet : lalala\n" + assert_equal(result, source.signature_identify(true)) + + + marker_template = '' + html = "
lalala
--
Max Mix" + result = "lalala
#{marker_template}--
Max Mix" + assert_equal(result, html.html2html_strict(true)) + + html = "den.

Von: Fritz Bauer [mailto:me@example.com]
Gesendet: Donnerstag, 3. Mai 2012 11:51
An: John Smith
Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

Hallo,

ich versuche an den Punkten" + result = "den.
#{marker_template}
Von: Fritz Bauer [mailto:me@example.com]
Gesendet: Donnerstag, 3. Mai 2012 11:51
An: John Smith
Cc: Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com
Betreff: Re: OTRS::XXX Erweiterung - Anhänge an CI's

Hallo,

ich versuche an den Punkten" + assert_equal(result, html.html2html_strict(true)) + + end + end diff --git a/test/unit/email_signatur_detection_test.rb b/test/unit/email_signatur_detection_test.rb index 533c6c822..7a68cbb16 100644 --- a/test/unit/email_signatur_detection_test.rb +++ b/test/unit/email_signatur_detection_test.rb @@ -7,60 +7,69 @@ class EmailSignaturDetectionTest < ActiveSupport::TestCase # fixtures of sender a fixture_files = { - 'email_signature_detection/client_a_1.txt' => { line: 10 }, - 'email_signature_detection/client_a_2.txt' => { line: 20 }, - 'email_signature_detection/client_a_3.txt' => { line: 6 }, + 'email_signature_detection/client_a_1.txt' => { line: 10, content_type: 'text/plain' }, + 'email_signature_detection/client_a_2.txt' => { line: 20, content_type: 'text/plain' }, + 'email_signature_detection/client_a_3.txt' => { line: 6, content_type: 'text/plain' }, } - fixture_files_string_list = [] - - fixture_files.keys.each do |filepath| - - file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r') - - file_content = file.read - fixture_files[filepath][:content] = file_content - fixture_files_string_list.push(file_content) + fixture_messages = [] + fixture_files.each do |filepath, value| + value[:content] = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r').read + fixture_messages.push value end - signature = SignatureDetection.find_signature(fixture_files_string_list) + signature = SignatureDetection.find_signature(fixture_messages) expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2\n12345 Hornhausen\nTel.: +49 911 6760\nFax: +49 911 85 6760\nMobil: +49 173 911\nE-Mail: Bob.Smith@music.com\nhttp://www.music.com\n\nMusik AG | Kastanien 2 | 12345 Hornhausen\nSitz der AG: Hornhausen, HRB xxxxx | USt.-ID: DE 111222333444\nVorstand: Marc Smith, Weber Huber\nAufsichtsrat: Max Mix (Vors.)" assert_equal(expected_signature, signature) - fixture_files.keys.each do |filepath| - expected_signature_position = fixture_files[filepath][:line] - - assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content])) + fixture_files.each do |_filepath, value| + assert_equal(value[:line], SignatureDetection.find_signature_line(signature, value[:content], value[:content_type])) end end test 'test case II - sender b' do fixture_files = { - 'email_signature_detection/client_b_1.txt' => { line: 26 }, - 'email_signature_detection/client_b_2.txt' => { line: 4 }, - 'email_signature_detection/client_b_3.txt' => { line: 6 }, + 'email_signature_detection/client_b_1.txt' => { line: 26, content_type: 'text/plain' }, + 'email_signature_detection/client_b_2.txt' => { line: 4, content_type: 'text/plain' }, + 'email_signature_detection/client_b_3.txt' => { line: 6, content_type: 'text/plain' }, } - fixture_files_string_list = [] - - fixture_files.keys.each do |filepath| - - file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r') - - file_content = file.read - fixture_files[filepath][:content] = file_content - fixture_files_string_list.push(file_content) + fixture_messages = [] + fixture_files.each do |filepath, value| + value[:content] = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r').read + fixture_messages.push value end - signature = SignatureDetection.find_signature(fixture_files_string_list) + signature = SignatureDetection.find_signature(fixture_messages) expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139\nE-Mail Günter.Lässig@example.com\n\nExample. Zusammen für eine bessere Welt.\n[cid:image001.png@01CE92A6.EC495B60]\n\n[cid:image002.png@01CE92A6.EC495B60]\n\n[cid:image003.png@01CE92A6.EC495B60]\n\n[cid:image004.png@01CE92A6.EC495B60]\n\n[cid:image005.jpg@01CE92A6.EC495B60]\n\n[cid:image006.png@01CE92A6.EC495B60]\n\nSitz der Gesellschaft: Max City, Amtsgericht Max City HRB Nr. 1234\nGeschäftsführer: Bob Smith\nVorsitzender des Aufsichtsrats: Alex Marx" assert_equal(expected_signature, signature) - fixture_files.keys.each do |filepath| - expected_signature_position = fixture_files[filepath][:line] + fixture_files.each do |_filepath, value| + assert_equal(value[:line], SignatureDetection.find_signature_line(signature, value[:content], value[:content_type])) + end + end - assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content])) + test 'test case III - sender c' do + + fixture_files = { + 'email_signature_detection/client_c_1.html' => { line: 8, content_type: 'text/html' }, + 'email_signature_detection/client_c_2.html' => { line: 29, content_type: 'text/html' }, + 'email_signature_detection/client_c_3.html' => { line: 9, content_type: 'text/html' }, + } + + fixture_messages = [] + fixture_files.each do |filepath, value| + value[:content] = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r').read + fixture_messages.push value + end + + signature = SignatureDetection.find_signature(fixture_messages) + expected_signature = "\nChristianSmith\nTechnik\n\nTel: +49 12 34 56 78 441\nFax: +49 12 34 56 78 499\nEmail: Christian.Smith@example.com\nWeb: www.example.com\nABC KFZ- und Flugzeug B.V. & Co. KG\nHauptverwaltung\nOst Straße 2\n12345 Somewhere" + assert_equal(expected_signature, signature) + + fixture_files.each do |filepath, value| + assert_equal(value[:line], SignatureDetection.find_signature_line(signature, value[:content], value[:content_type]), filepath) end end