Fixed issue #2227 - Unprocessable email ERROR: #<ArgumentError: invalid byte sequence in UTF-8> and improved email parser tests to also verify attachments.

This commit is contained in:
Martin Edenhofer 2018-09-07 14:20:11 +02:00
parent 33346ddea9
commit 9d860fbc03
11 changed files with 2125 additions and 49 deletions

View file

@ -599,15 +599,29 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# get filename from content-disposition
# workaround for: NoMethodError: undefined method `filename' for #<Mail::UnstructuredField:0x007ff109e80678>
begin
filename = file.header[:content_disposition].try(:filename)
rescue
begin
if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $3
end
rescue
Rails.logger.debug { 'Unable to get filename' }
end
end
begin
if file.header[:content_disposition].to_s =~ /filename="(.+?)"/i
filename = $1
elsif file.header[:content_disposition].to_s =~ /filename='(.+?)'/i
filename = $1
elsif file.header[:content_disposition].to_s =~ /filename=(.+?);/i
filename = $1
if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $3
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $3
end
rescue
Rails.logger.debug { 'Unable to get filename' }
@ -615,12 +629,12 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# as fallback, use raw values
if filename.blank?
if headers_store['Content-Disposition'].to_s =~ /filename="(.+?)"/i
filename = $1
elsif headers_store['Content-Disposition'].to_s =~ /filename='(.+?)'/i
filename = $1
elsif headers_store['Content-Disposition'].to_s =~ /filename=(.+?);/i
filename = $1
if headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
filename = $3
elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
filename = $3
elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
filename = $3
end
end
@ -654,9 +668,9 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# e. g. Content-Type: video/quicktime; name="Video.MOV";
if filename.blank?
['name="(.+?)"(;|$)', "name='(.+?)'(;|$)", 'name=(.+?)(;|$)'].each do |regexp|
['(filename|name)(\*{0,1})="(.+?)"(;|$)', '(filename|name)(\*{0,1})=\'(.+?)\'(;|$)', '(filename|name)(\*{0,1})=(.+?)(;|$)'].each do |regexp|
if headers_store['Content-Type'] =~ /#{regexp}/i
filename = $1
filename = $3
break
end
end

File diff suppressed because one or more lines are too long

View file

@ -2,9 +2,8 @@
from: '"Müller, Bernd" <Bernd.Mueller@example.com>'
from_email: Bernd.Mueller@example.com
from_display_name: Müller, Bernd
subject: 'AW: OTRS [Ticket#118192]'
to: "'Martin Edenhofer via Znuny Sales' <sales@znuny.com>"
content_type: text/plain
subject: 'AW: OTRS [Ticket#118192]'
body: |
äöüß ad asd
@ -12,3 +11,62 @@ body: |
--
Old programmers never die. They just branch to a new address.
content_type: text/plain
attachments:
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:15:07 +0100\nReceived:
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:15:07 +0100\nReceived:
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:15:06 +0100\nReceived:
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:15:05 +0100\nReceived:
from mailout02.ispamone.de ( [212.211.146.205]) by ntserver-1.example.com\n\t(Reddoxx
engine) with SMTP id 56767845A33; Wed, 5 Dec 2012 21:15:05 +0100\nFrom: \"helpdesk@example.com\"
<helpdesk@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject: Neue Anfrage
erstellt - 33284\nThread-Topic: Neue Anfrage erstellt - 33284\nThread-Index: Ac3TJBhe4TPAltHrS6m3GwtF5lDkwA==\nContent-Class:
urn:content-classes:message\nDate: Wed, 5 Dec 2012 20:07:04 +0000\nMessage-ID:
<174905662.1354738495468.JavaMail.babeldoc@atlas>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
0\nX-MS-TNEF-Correlator:\nContent-Type: multipart/alternative;\n\tboundary=\"_000_1749056621354738495468JavaMailbabeldocatlas_\"\nMIME-Version:
1.0\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type: text/plain;
charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\ndGhpcyBpcyBhIHRlc3Q=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type:
text/html; charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\nPGgxPnRoaXMgaXMgYSB0ZXN0PC9oMT4=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_--\n"
filename: Neue Anfrage erstellt - 33284.eml
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: message/rfc822
Mime-Type: message/rfc822
Charset: UTF-8
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:06:21 +0100\nReceived:
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:06:21 +0100\nReceived:
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
from mailout01.ispamone.de ( [212.211.146.204]) by ntserver-1.example.com\n\t(Reddoxx
engine) with SMTP id 40D026EFCF5; Wed, 5 Dec 2012 21:06:19 +0100\nFrom: \"bestellung.example@example.com\"
<bestellung.example@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject:
Call: HW-Anforderung; Best-nr.47524152\nThread-Topic: Call: HW-Anforderung; Best-nr.47524152\nThread-Index:
Ac3TI/8Xh/P5FTJFSYuBz+vjlXSWbg==\nDate: Wed, 5 Dec 2012 20:06:21 +0000\nMessage-ID:
<13553079.1354737948277.JavaMail.jboss@fpep>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
0\nX-MS-TNEF-Correlator:\nContent-Type: text/plain; charset=\"iso-8859-1\"\nContent-Transfer-Encoding:
quoted-printable\nMIME-Version: 1.0\n\n\n\n\nWarenempf=E4nger:\nFirma: Example
GmbH\nName: Rehm\nVorname: Joachim\nStra=DFe K=E4ssstr.
19\nOrt: 00000 Ulm\nE-Mail: helpdesk@example.com\nTelefon:
\ 0000 166-2399\nFax: 0000 166-2309\nAbteilung: TN
23\nGeb=E4ude: EG, Zimmer E60\n\n\nBestellpositionen:\nPos. Menge Art.-Nr.
\ Bezeichnung Ansprechpartner =\n Aufstellort Bemerkungen
des Bestellers\n10 1.0 588164 Dell UltraSharp U2410 24 Andreas
Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n20 1.0 71705 Crossoverkabel
S/FTP, PiMF, Cat... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n30
\ 1.0 777577_638Dell Latitude E6430 mit UMTS / ... Andreas Ring, GE-P=\nEW
\ K=E4ssstr. 19, 3. OG, Zi 306\n40 1.0 920-003052Logitech K270 cordless
Keyboard... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n"
filename: 'Call: HW-Anforderung; Best-nr.47524152.eml'
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: message/rfc822
Mime-Type: message/rfc822
Charset: UTF-8

View file

@ -2,6 +2,17 @@
from: oracle@IG0-1-DB01.example.com
from_email: oracle@IG0-1-DB01.example.com
from_display_name: ''
subject: 'Regelsets im Test-Status gefunden: 1'
to: support@example.com
subject: 'Regelsets im Test-Status gefunden: 1'
body: no visible content
content_type: text/plain
attachments:
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
data: |
RULESET_ID;NAME;ACTIV;RUN_MODE;AUDIT_MODIFY_DATE
387;DP DHL JOIN - EN : Einladung eAC;T;SM;1.09.14
filename: rulesets-report.csv
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
Content-Type: text/csv
Mime-Type: text/csv
Charset: UTF-8

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1837
test/data/mail/mail073.box Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -8,23 +8,25 @@ class EmailParserTest < ActiveSupport::TestCase
to write new .yml files for emails you can use the following code:
File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to').to_yaml)
File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to', :attachments).to_yaml)
=end
test 'parse' do
msg_files = Dir.glob(Rails.root.join('test', 'data', 'mail', 'mail*.box')).sort
messages = msg_files.select { |f| File.exists?(f.ext('yml')) }
.map do |f|
{
messages = []
msg_files.each do |f|
next if !File.exists?(f.ext('yml'))
item = {
source: File.basename(f),
content: YAML.load(File.read(f.ext('yml'))),
parsed: Channel::EmailParser.new.parse(File.read(f)),
}
messages.push item
end
messages.each do |m|
# assert: raw content hash is a subset of parsed message hash
expected_msg = m[:content].except(:attachments)
parsed_msg = m[:parsed].slice(*expected_msg.keys)
@ -35,23 +37,24 @@ File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.rea
# assert: attachments in parsed message hash match metadata in raw hash
next if m[:content][:attachments].blank?
# the formats of m[:content][:attachments] and m[:parsed][:attachments] don't match,
# so we have to convert one to the other
parsed_attachment_metadata = m[:parsed][:attachments].map do |a|
{
md5: Digest::MD5.hexdigest(a[:data]),
cid: a[:preferences]['Content-ID'],
filename: a[:filename],
}.with_indifferent_access
attachments_found = []
m[:content][:attachments].each do |expected_attachment|
expected_attachment_md5 = Digest::MD5.hexdigest(expected_attachment[:data])
m[:parsed][:attachments].each do |parsed_attachment|
parsed_attachment_md5 = Digest::MD5.hexdigest(parsed_attachment[:data])
next if attachments_found.include?(parsed_attachment_md5)
next if expected_attachment_md5 != parsed_attachment_md5
attachments_found.push parsed_attachment_md5
expected_attachment.each do |key, value|
assert_equal(value, parsed_attachment[key], "#{key} is different")
end
m[:content][:attachments].sort_by { |a| a[:md5] }
.zip(parsed_attachment_metadata.sort_by { |a| a[:md5] })
.each do |content, parsed|
assert_operator(content, :<=, parsed,
"parsed attachment data from #{m[:source]} does not match " \
"attachment metadata from #{m[:source].ext('yml')}")
next
end
end
next if attachments_found.count == m[:content][:attachments].count
m[:content][:attachments].each do |expected_attachment|
next if attachments_found.include?(Digest::MD5.hexdigest(expected_attachment[:data]))
assert(false, "Attachment not found test/data/mail/#{m[:source]}: #{expected_attachment.inspect}")
end
end
end