Fixed issue #2227 - Unprocessable email ERROR: #<ArgumentError: invalid byte sequence in UTF-8> and improved email parser tests to also verify attachments.
This commit is contained in:
parent
33346ddea9
commit
9d860fbc03
11 changed files with 2125 additions and 49 deletions
|
@ -599,15 +599,29 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
|
||||||
# get filename from content-disposition
|
# get filename from content-disposition
|
||||||
|
|
||||||
# workaround for: NoMethodError: undefined method `filename' for #<Mail::UnstructuredField:0x007ff109e80678>
|
# workaround for: NoMethodError: undefined method `filename' for #<Mail::UnstructuredField:0x007ff109e80678>
|
||||||
|
begin
|
||||||
filename = file.header[:content_disposition].try(:filename)
|
filename = file.header[:content_disposition].try(:filename)
|
||||||
|
rescue
|
||||||
|
begin
|
||||||
|
if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
|
||||||
|
filename = $3
|
||||||
|
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
|
||||||
|
filename = $3
|
||||||
|
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
|
||||||
|
filename = $3
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
Rails.logger.debug { 'Unable to get filename' }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
begin
|
begin
|
||||||
if file.header[:content_disposition].to_s =~ /filename="(.+?)"/i
|
if file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
|
||||||
filename = $1
|
filename = $3
|
||||||
elsif file.header[:content_disposition].to_s =~ /filename='(.+?)'/i
|
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
|
||||||
filename = $1
|
filename = $3
|
||||||
elsif file.header[:content_disposition].to_s =~ /filename=(.+?);/i
|
elsif file.header[:content_disposition].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
|
||||||
filename = $1
|
filename = $3
|
||||||
end
|
end
|
||||||
rescue
|
rescue
|
||||||
Rails.logger.debug { 'Unable to get filename' }
|
Rails.logger.debug { 'Unable to get filename' }
|
||||||
|
@ -615,12 +629,12 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
|
||||||
|
|
||||||
# as fallback, use raw values
|
# as fallback, use raw values
|
||||||
if filename.blank?
|
if filename.blank?
|
||||||
if headers_store['Content-Disposition'].to_s =~ /filename="(.+?)"/i
|
if headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})="(.+?)"/i
|
||||||
filename = $1
|
filename = $3
|
||||||
elsif headers_store['Content-Disposition'].to_s =~ /filename='(.+?)'/i
|
elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})='(.+?)'/i
|
||||||
filename = $1
|
filename = $3
|
||||||
elsif headers_store['Content-Disposition'].to_s =~ /filename=(.+?);/i
|
elsif headers_store['Content-Disposition'].to_s =~ /(filename|name)(\*{0,1})=(.+?);/i
|
||||||
filename = $1
|
filename = $3
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -654,9 +668,9 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
|
||||||
|
|
||||||
# e. g. Content-Type: video/quicktime; name="Video.MOV";
|
# e. g. Content-Type: video/quicktime; name="Video.MOV";
|
||||||
if filename.blank?
|
if filename.blank?
|
||||||
['name="(.+?)"(;|$)', "name='(.+?)'(;|$)", 'name=(.+?)(;|$)'].each do |regexp|
|
['(filename|name)(\*{0,1})="(.+?)"(;|$)', '(filename|name)(\*{0,1})=\'(.+?)\'(;|$)', '(filename|name)(\*{0,1})=(.+?)(;|$)'].each do |regexp|
|
||||||
if headers_store['Content-Type'] =~ /#{regexp}/i
|
if headers_store['Content-Type'] =~ /#{regexp}/i
|
||||||
filename = $1
|
filename = $3
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -2,9 +2,8 @@
|
||||||
from: '"Müller, Bernd" <Bernd.Mueller@example.com>'
|
from: '"Müller, Bernd" <Bernd.Mueller@example.com>'
|
||||||
from_email: Bernd.Mueller@example.com
|
from_email: Bernd.Mueller@example.com
|
||||||
from_display_name: Müller, Bernd
|
from_display_name: Müller, Bernd
|
||||||
subject: 'AW: OTRS [Ticket#118192]'
|
|
||||||
to: "'Martin Edenhofer via Znuny Sales' <sales@znuny.com>"
|
to: "'Martin Edenhofer via Znuny Sales' <sales@znuny.com>"
|
||||||
content_type: text/plain
|
subject: 'AW: OTRS [Ticket#118192]'
|
||||||
body: |
|
body: |
|
||||||
äöüß ad asd
|
äöüß ad asd
|
||||||
|
|
||||||
|
@ -12,3 +11,62 @@ body: |
|
||||||
|
|
||||||
--
|
--
|
||||||
Old programmers never die. They just branch to a new address.
|
Old programmers never die. They just branch to a new address.
|
||||||
|
content_type: text/plain
|
||||||
|
attachments:
|
||||||
|
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
|
||||||
|
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:15:07 +0100\nReceived:
|
||||||
|
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
|
||||||
|
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:15:07 +0100\nReceived:
|
||||||
|
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
|
||||||
|
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:15:06 +0100\nReceived:
|
||||||
|
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
|
||||||
|
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:15:05 +0100\nReceived:
|
||||||
|
from mailout02.ispamone.de ( [212.211.146.205]) by ntserver-1.example.com\n\t(Reddoxx
|
||||||
|
engine) with SMTP id 56767845A33; Wed, 5 Dec 2012 21:15:05 +0100\nFrom: \"helpdesk@example.com\"
|
||||||
|
<helpdesk@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject: Neue Anfrage
|
||||||
|
erstellt - 33284\nThread-Topic: Neue Anfrage erstellt - 33284\nThread-Index: Ac3TJBhe4TPAltHrS6m3GwtF5lDkwA==\nContent-Class:
|
||||||
|
urn:content-classes:message\nDate: Wed, 5 Dec 2012 20:07:04 +0000\nMessage-ID:
|
||||||
|
<174905662.1354738495468.JavaMail.babeldoc@atlas>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
|
||||||
|
0\nX-MS-TNEF-Correlator:\nContent-Type: multipart/alternative;\n\tboundary=\"_000_1749056621354738495468JavaMailbabeldocatlas_\"\nMIME-Version:
|
||||||
|
1.0\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type: text/plain;
|
||||||
|
charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\ndGhpcyBpcyBhIHRlc3Q=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_\nContent-Type:
|
||||||
|
text/html; charset=\"utf-8\"\nContent-Transfer-Encoding: base64\n\nPGgxPnRoaXMgaXMgYSB0ZXN0PC9oMT4=\n\n--_000_1749056621354738495468JavaMailbabeldocatlas_--\n"
|
||||||
|
filename: Neue Anfrage erstellt - 33284.eml
|
||||||
|
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
Content-Type: message/rfc822
|
||||||
|
Mime-Type: message/rfc822
|
||||||
|
Charset: UTF-8
|
||||||
|
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
data: "Received: from NTEXCHANGE3.example.dom ([172.17.253.64]) by ntexchange.example.dom\n
|
||||||
|
with Microsoft SMTPSVC(6.0.3790.4675);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
|
||||||
|
from NTEXCHANGE2.example.dom (172.17.253.213) by NTEXCHANGE3.example.dom\n (172.17.253.64)
|
||||||
|
with Microsoft SMTP Server (TLS) id 14.2.318.4; Wed, 5 Dec\n 2012 21:06:21 +0100\nReceived:
|
||||||
|
from ntserver-1.example.com (172.17.253.224) by NTEXCHANGE2.example.dom\n (172.17.253.213)
|
||||||
|
with Microsoft SMTP Server id 14.2.318.4; Wed, 5 Dec 2012\n 21:06:21 +0100\nReceived:
|
||||||
|
from ntserver-1.example.com ([192.168.100.117]) by ntserver-1.example.com with\n
|
||||||
|
Microsoft SMTPSVC(7.5.7601.17514);\t Wed, 5 Dec 2012 21:06:21 +0100\nReceived:
|
||||||
|
from mailout01.ispamone.de ( [212.211.146.204]) by ntserver-1.example.com\n\t(Reddoxx
|
||||||
|
engine) with SMTP id 40D026EFCF5; Wed, 5 Dec 2012 21:06:19 +0100\nFrom: \"bestellung.example@example.com\"
|
||||||
|
<bestellung.example@example.com>\nTo: HelpDesk <HelpDesk@example.com>\nSubject:
|
||||||
|
Call: HW-Anforderung; Best-nr.47524152\nThread-Topic: Call: HW-Anforderung; Best-nr.47524152\nThread-Index:
|
||||||
|
Ac3TI/8Xh/P5FTJFSYuBz+vjlXSWbg==\nDate: Wed, 5 Dec 2012 20:06:21 +0000\nMessage-ID:
|
||||||
|
<13553079.1354737948277.JavaMail.jboss@fpep>\nContent-Language: de-DE\nX-MS-Has-Attach:\nX-MS-Exchange-Organization-SCL:
|
||||||
|
0\nX-MS-TNEF-Correlator:\nContent-Type: text/plain; charset=\"iso-8859-1\"\nContent-Transfer-Encoding:
|
||||||
|
quoted-printable\nMIME-Version: 1.0\n\n\n\n\nWarenempf=E4nger:\nFirma: Example
|
||||||
|
GmbH\nName: Rehm\nVorname: Joachim\nStra=DFe K=E4ssstr.
|
||||||
|
19\nOrt: 00000 Ulm\nE-Mail: helpdesk@example.com\nTelefon:
|
||||||
|
\ 0000 166-2399\nFax: 0000 166-2309\nAbteilung: TN
|
||||||
|
23\nGeb=E4ude: EG, Zimmer E60\n\n\nBestellpositionen:\nPos. Menge Art.-Nr.
|
||||||
|
\ Bezeichnung Ansprechpartner =\n Aufstellort Bemerkungen
|
||||||
|
des Bestellers\n10 1.0 588164 Dell UltraSharp U2410 24 Andreas
|
||||||
|
Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n20 1.0 71705 Crossoverkabel
|
||||||
|
S/FTP, PiMF, Cat... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n30
|
||||||
|
\ 1.0 777577_638Dell Latitude E6430 mit UMTS / ... Andreas Ring, GE-P=\nEW
|
||||||
|
\ K=E4ssstr. 19, 3. OG, Zi 306\n40 1.0 920-003052Logitech K270 cordless
|
||||||
|
Keyboard... Andreas Ring, GE-P=\nEW K=E4ssstr. 19, 3. OG, Zi 306\n"
|
||||||
|
filename: 'Call: HW-Anforderung; Best-nr.47524152.eml'
|
||||||
|
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
Content-Type: message/rfc822
|
||||||
|
Mime-Type: message/rfc822
|
||||||
|
Charset: UTF-8
|
||||||
|
|
|
@ -2,6 +2,17 @@
|
||||||
from: oracle@IG0-1-DB01.example.com
|
from: oracle@IG0-1-DB01.example.com
|
||||||
from_email: oracle@IG0-1-DB01.example.com
|
from_email: oracle@IG0-1-DB01.example.com
|
||||||
from_display_name: ''
|
from_display_name: ''
|
||||||
subject: 'Regelsets im Test-Status gefunden: 1'
|
|
||||||
to: support@example.com
|
to: support@example.com
|
||||||
|
subject: 'Regelsets im Test-Status gefunden: 1'
|
||||||
body: no visible content
|
body: no visible content
|
||||||
|
content_type: text/plain
|
||||||
|
attachments:
|
||||||
|
- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
data: |
|
||||||
|
RULESET_ID;NAME;ACTIV;RUN_MODE;AUDIT_MODIFY_DATE
|
||||||
|
387;DP DHL JOIN - EN : Einladung eAC;T;SM;1.09.14
|
||||||
|
filename: rulesets-report.csv
|
||||||
|
preferences: !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
Content-Type: text/csv
|
||||||
|
Mime-Type: text/csv
|
||||||
|
Charset: UTF-8
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1837
test/data/mail/mail073.box
Normal file
1837
test/data/mail/mail073.box
Normal file
File diff suppressed because it is too large
Load diff
39
test/data/mail/mail073.yml
Normal file
39
test/data/mail/mail073.yml
Normal file
File diff suppressed because one or more lines are too long
|
@ -8,23 +8,25 @@ class EmailParserTest < ActiveSupport::TestCase
|
||||||
|
|
||||||
to write new .yml files for emails you can use the following code:
|
to write new .yml files for emails you can use the following code:
|
||||||
|
|
||||||
File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to').to_yaml)
|
File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.read('test/data/mail/mailXXX.box')).slice(:from, :from_email, :from_display_name, :to, :cc, :subject, :body, :content_type, :'reply-to', :attachments).to_yaml)
|
||||||
|
|
||||||
=end
|
=end
|
||||||
|
|
||||||
test 'parse' do
|
test 'parse' do
|
||||||
msg_files = Dir.glob(Rails.root.join('test', 'data', 'mail', 'mail*.box')).sort
|
msg_files = Dir.glob(Rails.root.join('test', 'data', 'mail', 'mail*.box')).sort
|
||||||
|
messages = []
|
||||||
messages = msg_files.select { |f| File.exists?(f.ext('yml')) }
|
msg_files.each do |f|
|
||||||
.map do |f|
|
next if !File.exists?(f.ext('yml'))
|
||||||
{
|
item = {
|
||||||
source: File.basename(f),
|
source: File.basename(f),
|
||||||
content: YAML.load(File.read(f.ext('yml'))),
|
content: YAML.load(File.read(f.ext('yml'))),
|
||||||
parsed: Channel::EmailParser.new.parse(File.read(f)),
|
parsed: Channel::EmailParser.new.parse(File.read(f)),
|
||||||
}
|
}
|
||||||
|
messages.push item
|
||||||
end
|
end
|
||||||
|
|
||||||
messages.each do |m|
|
messages.each do |m|
|
||||||
|
|
||||||
# assert: raw content hash is a subset of parsed message hash
|
# assert: raw content hash is a subset of parsed message hash
|
||||||
expected_msg = m[:content].except(:attachments)
|
expected_msg = m[:content].except(:attachments)
|
||||||
parsed_msg = m[:parsed].slice(*expected_msg.keys)
|
parsed_msg = m[:parsed].slice(*expected_msg.keys)
|
||||||
|
@ -35,23 +37,24 @@ File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.rea
|
||||||
|
|
||||||
# assert: attachments in parsed message hash match metadata in raw hash
|
# assert: attachments in parsed message hash match metadata in raw hash
|
||||||
next if m[:content][:attachments].blank?
|
next if m[:content][:attachments].blank?
|
||||||
|
attachments_found = []
|
||||||
# the formats of m[:content][:attachments] and m[:parsed][:attachments] don't match,
|
m[:content][:attachments].each do |expected_attachment|
|
||||||
# so we have to convert one to the other
|
expected_attachment_md5 = Digest::MD5.hexdigest(expected_attachment[:data])
|
||||||
parsed_attachment_metadata = m[:parsed][:attachments].map do |a|
|
m[:parsed][:attachments].each do |parsed_attachment|
|
||||||
{
|
parsed_attachment_md5 = Digest::MD5.hexdigest(parsed_attachment[:data])
|
||||||
md5: Digest::MD5.hexdigest(a[:data]),
|
next if attachments_found.include?(parsed_attachment_md5)
|
||||||
cid: a[:preferences]['Content-ID'],
|
next if expected_attachment_md5 != parsed_attachment_md5
|
||||||
filename: a[:filename],
|
attachments_found.push parsed_attachment_md5
|
||||||
}.with_indifferent_access
|
expected_attachment.each do |key, value|
|
||||||
|
assert_equal(value, parsed_attachment[key], "#{key} is different")
|
||||||
end
|
end
|
||||||
|
next
|
||||||
m[:content][:attachments].sort_by { |a| a[:md5] }
|
end
|
||||||
.zip(parsed_attachment_metadata.sort_by { |a| a[:md5] })
|
end
|
||||||
.each do |content, parsed|
|
next if attachments_found.count == m[:content][:attachments].count
|
||||||
assert_operator(content, :<=, parsed,
|
m[:content][:attachments].each do |expected_attachment|
|
||||||
"parsed attachment data from #{m[:source]} does not match " \
|
next if attachments_found.include?(Digest::MD5.hexdigest(expected_attachment[:data]))
|
||||||
"attachment metadata from #{m[:source].ext('yml')}")
|
assert(false, "Attachment not found test/data/mail/#{m[:source]}: #{expected_attachment.inspect}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue