Fixed issue #2227 - Unprocessable email ERROR: #<ArgumentError: invalid byte sequence in UTF-8> and improved email parser tests to also verify attachments.

This commit is contained in:
Martin Edenhofer 2018-09-07 14:20:11 +02:00
parent 33346ddea9
commit 9d860fbc03
11 changed files with 2125 additions and 49 deletions

View file

@ -599,15 +599,29 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# get filename from content-disposition # get filename from content-disposition
# workaround for: NoMethodError: undefined method `filename' for #<Mail::UnstructuredField:0x007ff109e80678> # workaround for: NoMethodError: undefined method `filename' for #<Mail::UnstructuredField:0x007ff109e80678>
filename = file.header[:content_disposition].try(:filename) begin
filename = file.header[:content_disposition].try(:filename)
rescue
begin
if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $3
end
rescue
Rails.logger.debug { 'Unable to get filename' }
end
end
begin begin
if file.header[:content_disposition].to_s =~ /filename="(.+?)"/i if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $1 filename = $3
elsif file.header[:content_disposition].to_s =~ /filename='(.+?)'/i elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $1 filename = $3
elsif file.header[:content_disposition].to_s =~ /filename=(.+?);/i elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $1 filename = $3
end end
rescue rescue
Rails.logger.debug { 'Unable to get filename' } Rails.logger.debug { 'Unable to get filename' }
@ -615,12 +629,12 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# as fallback, use raw values # as fallback, use raw values
if filename.blank? if filename.blank?
if headers_store['Content-Disposition'].to_s =~ /filename="(.+?)"/i if headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $1 filename = $3
elsif headers_store['Content-Disposition'].to_s =~ /filename='(.+?)'/i elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $1 filename = $3
elsif headers_store['Content-Disposition'].to_s =~ /filename=(.+?);/i elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $1 filename = $3
end end
end end
@ -654,9 +668,9 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# e. g. Content-Type: video/quicktime; name="Video.MOV"; # e. g. Content-Type: video/quicktime; name="Video.MOV";
if filename.blank? if filename.blank?
['name="(.+?)"(;|$)', "name='(.+?)'(;|$)", 'name=(.+?)(;|$)'].each do |regexp| ['(filename|name)(\*{0,1})="(.+?)"(;|$)', '(filename|name)(\*{0,1})=\'(.+?)\'(;|$)', '(filename|name)(\*{0,1})=(.+?)(;|$)'].each do |regexp|
if headers_store['Content-Type'] =~ /#{regexp}/i if headers_store['Content-Type'] =~ /#{regexp}/i
filename = $1 filename = $3
break break
end end
end end

File diff suppressed because one or more lines are too long

View file

@ -2,9 +2,8 @@
from: '"Müller, Bernd" <Bernd.Mueller@example.com>' from: '"Müller, Bernd" <Bernd.Mueller@example.com>'
from_email: Bernd.Mueller@example.com from_email: Bernd.Mueller@example.com
from_display_name: Müller, Bernd from_display_name: Müller, Bernd
subject: 'AW: OTRS [Ticket#118192]'
to: "'Martin Edenhofer via Znuny Sales' <sales@znuny.com>" to: "'Martin Edenhofer via Znuny Sales' <sales@znuny.com>"
content_type: text/plain subject: 'AW: OTRS [Ticket#118192]'
body: | body: |
äöüß ad asd äöüß ad asd
@ -12,3 +11,62 @@ body: |
-- --
Old programmers never die. They just branch to a new address. Old programmers never die. They just branch to a new address.
content_type: text/plain
attachments:
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:15:07 +0100\nReceived:
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:15:07 +0100\nReceived:
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:15:06 +0100\nReceived:
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:15:05 +0100\nReceived:
from mailout02.ispamone.de ( [212.211.146.205]) by ntserver-1.example.com\n\t(Reddoxx
engine) with SMTP id 56767845A33; Wed, 5 Dec 2012 21:15:05 +0100\nFrom: \"helpdesk@example.com\"
<helpdesk@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject: Neue Anfrage
erstellt - 33284\nThread-Topic: Neue Anfrage erstellt - 33284\nThread-Index: Ac3TJBhe4TPAltHrS6m3GwtF5lDkwA==\nContent-Class:
urn:content-classes:message\nDate: Wed, 5 Dec 2012 20:07:04 +0000\nMessage-ID:
<174905662.1354738495468.JavaMail.babeldoc@atlas>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
0\nX-MS-TNEF-Correlator:\nContent-Type: multipart/alternative;\n\tboundary=\"_000_1749056621354738495468JavaMailbabeldocatlas_\"\nMIME-Version:
1.0\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type: text/plain;
charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\ndGhpcyBpcyBhIHRlc3Q=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type:
text/html; charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\nPGgxPnRoaXMgaXMgYSB0ZXN0PC9oMT4=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_--\n"
filename: Neue Anfrage erstellt - 33284.eml
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: message/rfc822
Mime-Type: message/rfc822
Charset: UTF-8
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:06:21 +0100\nReceived:
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:06:21 +0100\nReceived:
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
from mailout01.ispamone.de ( [212.211.146.204]) by ntserver-1.example.com\n\t(Reddoxx
engine) with SMTP id 40D026EFCF5; Wed, 5 Dec 2012 21:06:19 +0100\nFrom: \"bestellung.example@example.com\"
<bestellung.example@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject:
Call: HW-Anforderung; Best-nr.47524152\nThread-Topic: Call: HW-Anforderung; Best-nr.47524152\nThread-Index:
Ac3TI/8Xh/P5FTJFSYuBz+vjlXSWbg==\nDate: Wed, 5 Dec 2012 20:06:21 +0000\nMessage-ID:
<13553079.1354737948277.JavaMail.jboss@fpep>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
0\nX-MS-TNEF-Correlator:\nContent-Type: text/plain; charset=\"iso-8859-1\"\nContent-Transfer-Encoding:
quoted-printable\nMIME-Version: 1.0\n\n\n\n\nWarenempf=E4nger:\nFirma: Example
GmbH\nName: Rehm\nVorname: Joachim\nStra=DFe K=E4ssstr.
19\nOrt: 00000 Ulm\nE-Mail: helpdesk@example.com\nTelefon:
\ 0000 166-2399\nFax: 0000 166-2309\nAbteilung: TN
23\nGeb=E4ude: EG, Zimmer E60\n\n\nBestellpositionen:\nPos. Menge Art.-Nr.
\ Bezeichnung Ansprechpartner =\n Aufstellort Bemerkungen
des Bestellers\n10 1.0 588164 Dell UltraSharp U2410 24 Andreas
Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n20 1.0 71705 Crossoverkabel
S/FTP, PiMF, Cat... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n30
\ 1.0 777577_638Dell Latitude E6430 mit UMTS / ... Andreas Ring, GE-P=\nEW
\ K=E4ssstr. 19, 3. OG, Zi 306\n40 1.0 920-003052Logitech K270 cordless
Keyboard... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n"
filename: 'Call: HW-Anforderung; Best-nr.47524152.eml'
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: message/rfc822
Mime-Type: message/rfc822
Charset: UTF-8

View file

@ -2,6 +2,17 @@
from: oracle@IG0-1-DB01.example.com from: oracle@IG0-1-DB01.example.com
from_email: oracle@IG0-1-DB01.example.com from_email: oracle@IG0-1-DB01.example.com
from_display_name: '' from_display_name: ''
subject: 'Regelsets im Test-Status gefunden: 1'
to: support@example.com to: support@example.com
subject: 'Regelsets im Test-Status gefunden: 1'
body: no visible content body: no visible content
content_type: text/plain
attachments:
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: |
RULESET_ID;NAME;ACTIV;RUN_MODE;AUDIT_MODIFY_DATE
387;DP DHL JOIN - EN : Einladung eAC;T;SM;1.09.14
filename: rulesets-report.csv
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: text/csv
Mime-Type: text/csv
Charset: UTF-8

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1837
test/data/mail/mail073.box Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -8,23 +8,25 @@ class EmailParserTest < ActiveSupport::TestCase
to write new .yml files for emails you can use the following code: to write new .yml files for emails you can use the following code:
File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to').to_yaml) File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to', :attachments).to_yaml)
=end =end
test 'parse' do test 'parse' do
msg_files = Dir.glob(Rails.root.join('test', 'data', 'mail', 'mail*.box')).sort msg_files = Dir.glob(Rails.root.join('test', 'data', 'mail', 'mail*.box')).sort
messages = []
messages = msg_files.select { |f| File.exists?(f.ext('yml')) } msg_files.each do |f|
.map do |f| next if !File.exists?(f.ext('yml'))
{ item = {
source: File.basename(f), source: File.basename(f),
content: YAML.load(File.read(f.ext('yml'))), content: YAML.load(File.read(f.ext('yml'))),
parsed: Channel::EmailParser.new.parse(File.read(f)), parsed: Channel::EmailParser.new.parse(File.read(f)),
} }
end messages.push item
end
messages.each do |m| messages.each do |m|
# assert: raw content hash is a subset of parsed message hash # assert: raw content hash is a subset of parsed message hash
expected_msg = m[:content].except(:attachments) expected_msg = m[:content].except(:attachments)
parsed_msg = m[:parsed].slice(*expected_msg.keys) parsed_msg = m[:parsed].slice(*expected_msg.keys)
@ -35,23 +37,24 @@ File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.rea
# assert: attachments in parsed message hash match metadata in raw hash # assert: attachments in parsed message hash match metadata in raw hash
next if m[:content][:attachments].blank? next if m[:content][:attachments].blank?
attachments_found = []
# the formats of m[:content][:attachments] and m[:parsed][:attachments] don't match, m[:content][:attachments].each do |expected_attachment|
# so we have to convert one to the other expected_attachment_md5 = Digest::MD5.hexdigest(expected_attachment[:data])
parsed_attachment_metadata = m[:parsed][:attachments].map do |a| m[:parsed][:attachments].each do |parsed_attachment|
{ parsed_attachment_md5 = Digest::MD5.hexdigest(parsed_attachment[:data])
md5: Digest::MD5.hexdigest(a[:data]), next if attachments_found.include?(parsed_attachment_md5)
cid: a[:preferences]['Content-ID'], next if expected_attachment_md5 != parsed_attachment_md5
filename: a[:filename], attachments_found.push parsed_attachment_md5
}.with_indifferent_access expected_attachment.each do |key, value|
end assert_equal(value, parsed_attachment[key], "#{key} is different")
end
m[:content][:attachments].sort_by { |a| a[:md5] } next
.zip(parsed_attachment_metadata.sort_by { |a| a[:md5] }) end
.each do |content, parsed| end
assert_operator(content, :<=, parsed, next if attachments_found.count == m[:content][:attachments].count
"parsed attachment data from #{m[:source]} does not match " \ m[:content][:attachments].each do |expected_attachment|
"attachment metadata from #{m[:source].ext('yml')}") next if attachments_found.include?(Digest::MD5.hexdigest(expected_attachment[:data]))
assert(false, "Attachment not found test/data/mail/#{m[:source]}: #{expected_attachment.inspect}")
end end
end end
end end