Improved file name detection.

This commit is contained in:
Martin Edenhofer 2021-04-14 15:20:28 +02:00
parent 950b828a22
commit 3baec9c606
4 changed files with 2792 additions and 6 deletions

View file

@ -701,6 +701,9 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
end end
# cleanup content id, <> will be added automatically later # cleanup content id, <> will be added automatically later
if headers_store['Content-ID'].blank? && headers_store['Content-Id'].present?
headers_store['Content-ID'] = headers_store['Content-Id']
end
if headers_store['Content-ID'] if headers_store['Content-ID']
headers_store['Content-ID'].delete_prefix!('<') headers_store['Content-ID'].delete_prefix!('<')
headers_store['Content-ID'].delete_suffix!('>') headers_store['Content-ID'].delete_suffix!('>')
@ -742,11 +745,6 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
# for some broken sm mail clients (X-MimeOLE: Produced By Microsoft Exchange V6.5) # for some broken sm mail clients (X-MimeOLE: Produced By Microsoft Exchange V6.5)
filename ||= file.header[:content_location].to_s.force_encoding('utf-8') filename ||= file.header[:content_location].to_s.force_encoding('utf-8')
# generate file name based on content-id
if filename.blank? && headers_store['Content-ID'].present? && headers_store['Content-ID'] =~ /(.+?)@.+?/i
filename = $1
end
file_body = String.new(file.body.to_s) file_body = String.new(file.body.to_s)
# generate file name based on content type # generate file name based on content type
@ -786,6 +784,11 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
filename = filename.utf8_encode(fallback: :read_as_sanitized_binary) filename = filename.utf8_encode(fallback: :read_as_sanitized_binary)
end end
# generate file name based on content-id with file extention
if filename.blank? && headers_store['Content-ID'].present? && headers_store['Content-ID'] =~ /(.+?\..{2,6})@.+?/i
filename = $1
end
# e. g. Content-Type: video/quicktime # e. g. Content-Type: video/quicktime
if filename.blank? && (content_type = headers_store['Content-Type']) if filename.blank? && (content_type = headers_store['Content-Type'])
map = { map = {
@ -810,6 +813,11 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
end end
end end
# generate file name based on content-id without file extention
if filename.blank? && headers_store['Content-ID'].present? && headers_store['Content-ID'] =~ /(.+?)@.+?/i
filename = $1
end
# set fallback filename # set fallback filename
if filename.blank? if filename.blank?
filename = 'file' filename = 'file'

2727
test/data/mail/mail100.box Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -50,7 +50,7 @@ File.write('test/data/mail/mailXXX.yml', Channel::EmailParser.new.parse(File.rea
next if expected_attachment_md5 != parsed_attachment_md5 next if expected_attachment_md5 != parsed_attachment_md5
attachments_found.push parsed_attachment_md5 attachments_found.push parsed_attachment_md5
expected_attachment.each do |key, value| expected_attachment.each do |key, value|
assert_equal(value, parsed_attachment[key], "#{key} is different") assert_equal(value, parsed_attachment[key], "#{key} is different in test/data/mail/#{m[:source]}")
end end
next next
end end