Because of current exchange/iso/text plain-issues, moved to own html2text converter by strip out all tags unless (b|i|ul|ol|li|u|h1|h2|h3|hr).

2016-06-21 17:14:15 +02:00 · 2016-06-21 17:14:15 +02:00 · 77214ffcc4
commit 77214ffcc4
parent 2f13a28c98
8 changed files with 255 additions and 323 deletions
--- a/app/assets/stylesheets/zammad.scss
+++ b/app/assets/stylesheets/zammad.scss
@ -8192,6 +8192,13 @@ output {
    border: none;
    overflow: auto;
  }
+
+  hr {
+    margin-top: 6px;
+    margin-bottom: 6px;
+    border: 0;
+    border-top: 1px solid #dfdfdf;
+  }
 }

 /*
--- a/app/models/channel/email_build.rb
+++ b/app/models/channel/email_build.rb
@ -242,7 +242,8 @@ Add/change markup to display html in any mail client nice.

    # https://github.com/martini/zammad/issues/165
    new_html = html.gsub('<blockquote type="cite">', '<blockquote type="cite" style="border-left: 2px solid blue; margin: 0 0 16px; padding: 8px 12px 8px 12px;">')
-    new_html.gsub!('<p>', '<p style="margin: 0;">')
+    new_html.gsub!(/<p>/mxi, '<p style="margin: 0;">')
+    new_html.gsub!(%r{</?hr>}mxi, '<hr style="margin-top: 6px; margin-bottom: 6px; border: 0; border-top: 1px solid #dfdfdf;">')
    new_html
  end

--- a/app/models/channel/email_parser.rb
+++ b/app/models/channel/email_parser.rb
@ -23,6 +23,7 @@ class Channel::EmailParser
    cc:                'Somebody <somebody@example.com>',
    subject:           'some message subject',
    body:              'some message body',
+    content_type:      'text/html', # text/plain
    date:              Time.zone.now,
    attachments:       [
      {
@ -128,33 +129,33 @@ class Channel::EmailParser
    # multi part email
    if mail.multipart?

+      # html attachment/body may exists and will be converted to strict html
+      if mail.html_part && mail.html_part.body
+        data[:body] = mail.html_part.body.to_s
+        data[:body] = Encode.conv(mail.html_part.charset.to_s, data[:body])
+        data[:body] = data[:body].html2html_strict.to_s.force_encoding('utf-8')
+
+        if !data[:body].force_encoding('UTF-8').valid_encoding?
+          data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
+        end
+        data[:content_type] = 'text/html'
+      end
+
      # text attachment/body exists
-      if mail.text_part
+      if data[:body].empty? && mail.text_part
        data[:body] = mail.text_part.body.decoded
        data[:body] = Encode.conv(mail.text_part.charset, data[:body])

        if !data[:body].valid_encoding?
          data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
        end
+        data[:content_type] = 'text/plain'
      end

-      # html attachment/body may exists and will be converted to text
-      if !mail.text_part || !data[:body] || data[:body] == ''
-        filename = '-no name-'
-        if mail.html_part && mail.html_part.body
-          filename = 'message.html'
-          data[:body] = mail.html_part.body.to_s
-          data[:body] = Encode.conv(mail.html_part.charset.to_s, data[:body])
-          data[:body] = data[:body].html2text.to_s.force_encoding('utf-8')
-
-          if !data[:body].force_encoding('UTF-8').valid_encoding?
-            data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
-          end
-
-        # any other attachments
-        else
-          data[:body] = 'no visible content'
-        end
+      # any other attachments
+      if data[:body].empty?
+        data[:body] = 'no visible content'
+        data[:content_type] = 'text/plain'
      end

      # add html attachment/body as real attachment
@ -194,6 +195,35 @@ class Channel::EmailParser

    # not multipart email

+    # html part only, convert to text and add it as attachment
+    elsif mail.mime_type && mail.mime_type.to_s.casecmp('text/html').zero?
+      filename = 'message.html'
+      data[:body] = mail.body.decoded
+      data[:body] = Encode.conv(mail.charset, data[:body])
+      data[:body] = data[:body].html2html_strict.to_s.force_encoding('utf-8')
+
+      if !data[:body].valid_encoding?
+        data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
+      end
+      data[:content_type] = 'text/html'
+
+      # add body as attachment
+      headers_store = {
+        'content-alternative' => true,
+      }
+      if mail.mime_type
+        headers_store['Mime-Type'] = mail.mime_type
+      end
+      if mail.charset
+        headers_store['Charset'] = mail.charset
+      end
+      attachment = {
+        data: mail.body.decoded,
+        filename: mail.filename || filename,
+        preferences: headers_store
+      }
+      data[:attachments].push attachment
+
    # text part only
    elsif !mail.mime_type || mail.mime_type.to_s == '' || mail.mime_type.to_s.casecmp('text/plain').zero?
      data[:body] = mail.body.decoded
@ -202,24 +232,10 @@ class Channel::EmailParser
      if !data[:body].force_encoding('UTF-8').valid_encoding?
        data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
      end
-
-    # html part only, convert to text and add it as attachment
+      data[:content_type] = 'text/plain'
    else
      filename = '-no name-'
-      if mail.mime_type.to_s.casecmp('text/html').zero?
-        filename = 'message.html'
-        data[:body] = mail.body.decoded
-        data[:body] = Encode.conv(mail.charset, data[:body])
-        data[:body] = data[:body].html2text.to_s.force_encoding('utf-8')
-
-        if !data[:body].valid_encoding?
-          data[:body] = data[:body].encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
-        end
-
-        # any other attachments
-      else
-        data[:body] = 'no visible content'
-      end
+      data[:body] = 'no visible content'

      # add body as attachment
      headers_store = {
@ -490,6 +506,7 @@ retrns
        ticket_id: ticket.id,
        type_id: Ticket::Article::Type.find_by(name: 'email').id,
        sender_id: Ticket::Article::Sender.find_by(name: 'Customer').id,
+        content_type: mail[:content_type],
        body: mail[:body],
        from: mail[:from],
        to: mail[:to],
--- a/lib/core_ext/string.rb
+++ b/lib/core_ext/string.rb
@ -78,7 +78,7 @@ class String

 =end

-  def html2text(string_only = false)
+  def html2text(string_only = false, strict = false)
    string = "#{self}" # rubocop:disable Style/UnneededInterpolation

    # in case of invalid encodeing, strip invalid chars
@ -98,6 +98,26 @@ class String
        link_list += "[#{counter}] #{link}\n"
        "[#{counter}] "
      }
+    else
+      string.gsub!(%r{<a\s+href=("|')(.+?)("|')(\s*|\s+[^>]*)>(.+?)<\s*/a\s*>}mxi) {|_placeholder|
+        link = $2
+        if !link.empty?
+          link.strip!
+        end
+        text = $5
+        if !text.empty?
+          text.strip!
+        end
+        placeholder = if !link.empty? && text.empty?
+                        link
+                      elsif link.empty? && !text.empty?
+                        text
+                      elsif !link.empty? && !text.empty? && (link.downcase == text.downcase || link.downcase == "mailto:#{text}".downcase || link.downcase == "http://#{text}".downcase)
+                        text
+                      else
+                        "#{text} (#{link})"
+                      end
+      }
    end

    # remove style tags with content
@ -105,6 +125,9 @@ class String

    # remove empty lines
    string.gsub!(/^\s*/m, '')
+    if strict
+      string.gsub!(%r{< \s* (/*) \s* (b|i|ul|ol|li|u|h1|h2|h3|hr) (\s*|\s+[^>]*) >}mxi, '######\1\2######')
+    end

    # pre/code handling 1/2
    string.gsub!(%r{<pre>(.+?)</pre>}m) { |placeholder|
@ -140,8 +163,8 @@ class String
    # add new lines
    string.gsub!(%r{</div><div(|\s.+?)>}im, "\n")
    string.gsub!(%r{</p><p(|\s.+?)>}im, "\n")
-    string.gsub!(%r{<(div|p|pre|br|table|h)(|/| [^>]*)>}i, "\n")
-    string.gsub!(%r{</(tr|p|br|div)(|\s.+?)>}i, "\n")
+    string.gsub!(%r{<(div|p|pre|br|table|tr|h)(|/| [^>]*)>}i, "\n")
+    string.gsub!(%r{</(p|br|div)(|\s.+?)>}i, "\n")
    string.gsub!(%r{</td>}i, ' ')

    # strip all other tags
@ -223,4 +246,17 @@ class String
    text.chomp
  end

+=begin
+
+  html = text_string.text2html
+
+=end
+
+  def html2html_strict
+    string = html2text(true, true)
+    string = string.text2html
+    string.gsub!(/######(.+?)######/, '<\1>')
+    string.chomp
+  end
+
 end
--- a/test/unit/aaa_string_test.rb
+++ b/test/unit/aaa_string_test.rb
@ -333,4 +333,92 @@ Men-----------------------'

  end

+  test 'html2html_strict function' do
+
+    html   = 'test'
+    result = 'test'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '  test '
+    result = 'test'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "\n\n  test \n\n\n"
+    result = 'test'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<b>test</b>'
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<B>test</B>'
+    result = '<B>test</B>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<i>test</i>'
+    result = '<i>test</i>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<h1>test</h1>'
+    result = '<h1>test</h1>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<h2>test</h2>'
+    result = '<h2>test</h2>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<h3>test</h3>'
+    result = '<h3>test</h3>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "<b\n>test</b>"
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<b >test</b>'
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<b >test</b >'
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<b >test< /b >'
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "<b\n>test<\n/b>"
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "<b id=123 classs=\"\nsome_class\">test</b>"
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "<b id=123 classs=\"\nsome_class\"\n>test<\n/b>"
+    result = '<b>test</b>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = "<ul id=123 classs=\"\nsome_class\"\n><li>test</li>\n<li class=\"asasd\">test</li><\n/ul>"
+    result = '<ul><li>test</li><li>test</li></ul>'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hallo Martin,<o:p></o:p></span></div>'
+    result = 'Hallo Martin,'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
+    result = 'john.smith@example.com'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<a href="MAILTO:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
+    result = 'john.smith@example.com'
+    assert_equal(result, html.html2html_strict)
+
+    html   = '<a href="mailto:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
+    result = 'john.smith@example.com (mailto:john.smith2@example.com)'
+    assert_equal(result, html.html2html_strict)
+
+  end
+
 end
--- a/test/unit/email_build_test.rb
+++ b/test/unit/email_build_test.rb
@ -59,7 +59,9 @@ class EmailBuildTest < ActiveSupport::TestCase
    data = parser.parse(mail.to_s)

    # check body
+    should = '&gt; Welcome!<br>&gt;<br>&gt; Thank you for installing Zammad. äöüß<br>&gt;'
    assert_equal(should, data[:body])
+    assert_equal('text/html', data[:content_type])

    # check count of attachments, only 2, because 3 part is text message and is already in body
    assert_equal(2, data[:attachments].length)
@ -200,6 +202,13 @@ text
 </p>
 <p style="margin: 0;">123</p>'
    assert_equal(html_should, html_with_fixes)
+
+    html_raw = '<p>sometext</p><hr><p>123</p>'
+    html_with_fixes = Channel::EmailBuild.html_mail_client_fixes(html_raw)
+    assert_not_equal(html_with_fixes, html_raw)
+
+    html_should = '<p style="margin: 0;">sometext</p><hr style="margin-top: 6px; margin-bottom: 6px; border: 0; border-top: 1px solid #dfdfdf;"><p style="margin: 0;">123</p>'
+    assert_equal(html_should, html_with_fixes)
  end

 end
--- a/test/unit/email_parser_test.rb
+++ b/test/unit/email_parser_test.rb
--- a/test/unit/email_process_test.rb
+++ b/test/unit/email_process_test.rb