Because of current exchange/iso/text plain-issues, moved to own html2text converter by strip out all tags unless (b|i|ul|ol|li|u|h1|h2|h3|hr).

This commit is contained in:
Martin Edenhofer 2016-06-21 17:14:15 +02:00
parent 2f13a28c98
commit 77214ffcc4
8 changed files with 255 additions and 323 deletions

View file

@ -8192,6 +8192,13 @@ output {
border: none;
overflow: auto;
}
hr {
margin-top: 6px;
margin-bottom: 6px;
border: 0;
border-top: 1px solid #dfdfdf;
}
}
/*

View file

@ -242,7 +242,8 @@ Add/change markup to display html in any mail client nice.
# https://github.com/martini/zammad/issues/165
new_html = html.gsub('<blockquote type="cite">', '<blockquote type="cite" style="border-left: 2px solid blue; margin: 0 0 16px; padding: 8px 12px 8px 12px;">')
new_html.gsub!('<p>', '<p style="margin: 0;">')
new_html.gsub!(/<p>/mxi, '<p style="margin: 0;">')
new_html.gsub!(%r{</?hr>}mxi, '<hr style="margin-top: 6px; margin-bottom: 6px; border: 0; border-top: 1px solid #dfdfdf;">')
new_html
end

View file

@ -23,6 +23,7 @@ class Channel::EmailParser
cc: 'Somebody <somebody@example.com>',
subject: 'some message subject',
body: 'some message body',
content_type: 'text/html', # text/plain
date: Time.zone.now,
attachments: [
{
@ -128,33 +129,33 @@ class Channel::EmailParser
# multi part email
if mail.multipart?
# html attachment/body may exists and will be converted to strict html
if mail.html_part && mail.html_part.body
data[:body] = mail.html_part.body.to_s
data[:body] = Encode.conv(mail.html_part.charset.to_s, data[:body])
data[:body] = data[:body].html2html_strict.to_s.force_encoding('utf-8')
if !data[:body].force_encoding('UTF-8').valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
data[:content_type] = 'text/html'
end
# text attachment/body exists
if mail.text_part
if data[:body].empty? && mail.text_part
data[:body] = mail.text_part.body.decoded
data[:body] = Encode.conv(mail.text_part.charset, data[:body])
if !data[:body].valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
data[:content_type] = 'text/plain'
end
# html attachment/body may exists and will be converted to text
if !mail.text_part || !data[:body] || data[:body] == ''
filename = '-no name-'
if mail.html_part && mail.html_part.body
filename = 'message.html'
data[:body] = mail.html_part.body.to_s
data[:body] = Encode.conv(mail.html_part.charset.to_s, data[:body])
data[:body] = data[:body].html2text.to_s.force_encoding('utf-8')
if !data[:body].force_encoding('UTF-8').valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
# any other attachments
else
data[:body] = 'no visible content'
end
# any other attachments
if data[:body].empty?
data[:body] = 'no visible content'
data[:content_type] = 'text/plain'
end
# add html attachment/body as real attachment
@ -194,6 +195,35 @@ class Channel::EmailParser
# not multipart email
# html part only, convert to text and add it as attachment
elsif mail.mime_type && mail.mime_type.to_s.casecmp('text/html').zero?
filename = 'message.html'
data[:body] = mail.body.decoded
data[:body] = Encode.conv(mail.charset, data[:body])
data[:body] = data[:body].html2html_strict.to_s.force_encoding('utf-8')
if !data[:body].valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
data[:content_type] = 'text/html'
# add body as attachment
headers_store = {
'content-alternative' => true,
}
if mail.mime_type
headers_store['Mime-Type'] = mail.mime_type
end
if mail.charset
headers_store['Charset'] = mail.charset
end
attachment = {
data: mail.body.decoded,
filename: mail.filename || filename,
preferences: headers_store
}
data[:attachments].push attachment
# text part only
elsif !mail.mime_type || mail.mime_type.to_s == '' || mail.mime_type.to_s.casecmp('text/plain').zero?
data[:body] = mail.body.decoded
@ -202,24 +232,10 @@ class Channel::EmailParser
if !data[:body].force_encoding('UTF-8').valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
# html part only, convert to text and add it as attachment
data[:content_type] = 'text/plain'
else
filename = '-no name-'
if mail.mime_type.to_s.casecmp('text/html').zero?
filename = 'message.html'
data[:body] = mail.body.decoded
data[:body] = Encode.conv(mail.charset, data[:body])
data[:body] = data[:body].html2text.to_s.force_encoding('utf-8')
if !data[:body].valid_encoding?
data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
end
# any other attachments
else
data[:body] = 'no visible content'
end
data[:body] = 'no visible content'
# add body as attachment
headers_store = {
@ -490,6 +506,7 @@ retrns
ticket_id: ticket.id,
type_id: Ticket::Article::Type.find_by(name: 'email').id,
sender_id: Ticket::Article::Sender.find_by(name: 'Customer').id,
content_type: mail[:content_type],
body: mail[:body],
from: mail[:from],
to: mail[:to],

View file

@ -78,7 +78,7 @@ class String
=end
def html2text(string_only = false)
def html2text(string_only = false, strict = false)
string = "#{self}" # rubocop:disable Style/UnneededInterpolation
# in case of invalid encodeing, strip invalid chars
@ -98,6 +98,26 @@ class String
link_list += "[#{counter}] #{link}\n"
"[#{counter}] "
}
else
string.gsub!(%r{<a\s+href=("|')(.+?)("|')(\s*|\s+[^>]*)>(.+?)<\s*/a\s*>}mxi) {|_placeholder|
link = $2
if !link.empty?
link.strip!
end
text = $5
if !text.empty?
text.strip!
end
placeholder = if !link.empty? && text.empty?
link
elsif link.empty? && !text.empty?
text
elsif !link.empty? && !text.empty? && (link.downcase == text.downcase || link.downcase == "mailto:#{text}".downcase || link.downcase == "http://#{text}".downcase)
text
else
"#{text} (#{link})"
end
}
end
# remove style tags with content
@ -105,6 +125,9 @@ class String
# remove empty lines
string.gsub!(/^\s*/m, '')
if strict
string.gsub!(%r{< \s* (/*) \s* (b|i|ul|ol|li|u|h1|h2|h3|hr) (\s*|\s+[^>]*) >}mxi, '######\1\2######')
end
# pre/code handling 1/2
string.gsub!(%r{<pre>(.+?)</pre>}m) { |placeholder|
@ -140,8 +163,8 @@ class String
# add new lines
string.gsub!(%r{</div><div(|\s.+?)>}im, "\n")
string.gsub!(%r{</p><p(|\s.+?)>}im, "\n")
string.gsub!(%r{<(div|p|pre|br|table|h)(|/| [^>]*)>}i, "\n")
string.gsub!(%r{</(tr|p|br|div)(|\s.+?)>}i, "\n")
string.gsub!(%r{<(div|p|pre|br|table|tr|h)(|/| [^>]*)>}i, "\n")
string.gsub!(%r{</(p|br|div)(|\s.+?)>}i, "\n")
string.gsub!(%r{</td>}i, ' ')
# strip all other tags
@ -223,4 +246,17 @@ class String
text.chomp
end
=begin
html = text_string.text2html
=end
def html2html_strict
string = html2text(true, true)
string = string.text2html
string.gsub!(/######(.+?)######/, '<\1>')
string.chomp
end
end

View file

@ -333,4 +333,92 @@ Men-----------------------'
end
test 'html2html_strict function' do
html = 'test'
result = 'test'
assert_equal(result, html.html2html_strict)
html = ' test '
result = 'test'
assert_equal(result, html.html2html_strict)
html = "\n\n test \n\n\n"
result = 'test'
assert_equal(result, html.html2html_strict)
html = '<b>test</b>'
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = '<B>test</B>'
result = '<B>test</B>'
assert_equal(result, html.html2html_strict)
html = '<i>test</i>'
result = '<i>test</i>'
assert_equal(result, html.html2html_strict)
html = '<h1>test</h1>'
result = '<h1>test</h1>'
assert_equal(result, html.html2html_strict)
html = '<h2>test</h2>'
result = '<h2>test</h2>'
assert_equal(result, html.html2html_strict)
html = '<h3>test</h3>'
result = '<h3>test</h3>'
assert_equal(result, html.html2html_strict)
html = "<b\n>test</b>"
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = '<b >test</b>'
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = '<b >test</b >'
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = '<b >test< /b >'
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = "<b\n>test<\n/b>"
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = "<b id=123 classs=\"\nsome_class\">test</b>"
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = "<b id=123 classs=\"\nsome_class\"\n>test<\n/b>"
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = "<ul id=123 classs=\"\nsome_class\"\n><li>test</li>\n<li class=\"asasd\">test</li><\n/ul>"
result = '<ul><li>test</li><li>test</li></ul>'
assert_equal(result, html.html2html_strict)
html = '<html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hallo Martin,<o:p></o:p></span></div>'
result = 'Hallo Martin,'
assert_equal(result, html.html2html_strict)
html = '<a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
result = 'john.smith@example.com'
assert_equal(result, html.html2html_strict)
html = '<a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
result = 'john.smith@example.com'
assert_equal(result, html.html2html_strict)
html = '<a href="mailto:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
result = 'john.smith@example.com (mailto:john.smith2@example.com)'
assert_equal(result, html.html2html_strict)
end
end

View file

@ -59,7 +59,9 @@ class EmailBuildTest < ActiveSupport::TestCase
data = parser.parse(mail.to_s)
# check body
should = '&gt; Welcome!<br>&gt;<br>&gt; Thank you for installing Zammad. äöüß<br>&gt;'
assert_equal(should, data[:body])
assert_equal('text/html', data[:content_type])
# check count of attachments, only 2, because 3 part is text message and is already in body
assert_equal(2, data[:attachments].length)
@ -200,6 +202,13 @@ text
</p>
<p style="margin: 0;">123</p>'
assert_equal(html_should, html_with_fixes)
html_raw = '<p>sometext</p><hr><p>123</p>'
html_with_fixes = Channel::EmailBuild.html_mail_client_fixes(html_raw)
assert_not_equal(html_with_fixes, html_raw)
html_should = '<p style="margin: 0;">sometext</p><hr style="margin-top: 6px; margin-bottom: 6px; border: 0; border-top: 1px solid #dfdfdf;"><p style="margin: 0;">123</p>'
assert_equal(html_should, html_with_fixes)
end
end

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long