Show incoming images if local content. External images or links with images are still removed.

This commit is contained in:
Martin Edenhofer 2017-03-10 07:49:01 +01:00
parent 1b3d787c27
commit 63b0e4fe31
12 changed files with 1034 additions and 134 deletions

4
.gitignore vendored
View file

@ -25,6 +25,10 @@
# Ignore database config
/config/database.yml
# Ignore translation cache files
/config/locales*.yml
/config/translations/*.yml
# Ignore coverage stuff
/coverage

View file

@ -4,6 +4,11 @@ Rails.application.config.html_sanitizer_tags_remove_content = %w(
style
)
# content of this tags will will be inserted html quoted
Rails.application.config.html_sanitizer_tags_quote_content = %w(
script
)
# only this tags are allowed
Rails.application.config.html_sanitizer_tags_whitelist = %w(
a abbr acronym address area article aside audio
@ -22,7 +27,7 @@ Rails.application.config.html_sanitizer_attributes_whitelist = {
:all => %w(class dir lang style title translate data-signature data-signature-id),
'a' => %w(href hreflang name rel),
'abbr' => %w(title),
'blockquote' => %w(cite),
'blockquote' => %w(type cite),
'col' => %w(span width),
'colgroup' => %w(span width),
'data' => %w(value),

View file

@ -287,30 +287,68 @@ class String
=end
def html2html_strict(force = false)
string = html2text(true, true)
string.signature_identify(force)
string = string.text2html
string.gsub!(%r{######LINKEXT:(.+?)/TEXT:(.+?)######}, '<a href="\1" target="_blank">\2</a>')
string.gsub!(/######LINKRAW:(.+?)######/, '<a href="\1" target="_blank">\1</a>')
def html2html_strict
string = "#{self}" # rubocop:disable Style/UnneededInterpolation
string = HtmlSanitizer.cleanup(string).strip
string = HtmlSanitizer.strict(string, true).strip
# as fallback, use html2text and text2html
if string.blank?
string = html2text.text2html
string.signature_identify('text')
marker_template = '<span class="js-signatureMarker"></span>'
string.sub!(/######SIGNATURE_MARKER######/, marker_template)
string.gsub!(/######SIGNATURE_MARKER######/, '')
return string.chomp
end
#string.gsub!(/<p>[[:space:]]+<\/p><p>[[:space:]]+<\/p>/m, '<p> </p>')
string.gsub!(%r{<p>[[:space:]]+</p>\n<p>[[:space:]]+</p>}im, '<p> </p>')
string.gsub!(%r{<div>[[:space:]]+</div>\n<div>[[:space:]]+</div>}im, '<div> </div>')
string.gsub!(/<br>[[:space:]]?<br>[[:space:]]?<br>/im, '<br><br>')
string.gsub!(/<br>[[:space:]]?<br>[[:space:]]?<br>/im, '<br><br>')
string.gsub!(/<br>[[:space:]]?<br>[[:space:]]?<br>/im, '<br><br>')
string.gsub!(%r{<br/>[[:space:]]?<br/>[[:space:]]?<br/>}im, '<br/><br/>')
string.gsub!(%r{<br/>[[:space:]]?<br/>[[:space:]]?<br/>}im, '<br/><br/>')
string.gsub!(%r{<br/>[[:space:]]?<br/>[[:space:]]?<br/>}im, '<br/><br/>')
string.gsub!(%r{<p>[[:space:]]+</p>}im, '<p>&nbsp;</p>')
string.signature_identify('html')
marker_template = '<span class="js-signatureMarker"></span>'
string.sub!(/######SIGNATURE_MARKER######/, marker_template)
string.gsub!(/######SIGNATURE_MARKER######/, '')
string.gsub!(/######(.+?)######/, '<\1>')
string.chomp
end
def signature_identify(force = false)
def signature_identify(type = 'text', force = false)
string = self
marker = '######SIGNATURE_MARKER######'
if type == 'html'
map = [
'<br(|\/)>[[:space:]]*(--|__)',
'<\/div>[[:space:]]*(--|__)',
'<p>[[:space:]]*(--|__)',
'(<br(|\/)>|<p>|<div>)[[:space:]]*<b>(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]*</b>',
'<br>[[:space:]]*<br>[[:space:]]*(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]+',
'<blockquote(|.+?)>[[:space:]]*<div>[[:space:]]*(On|Am)',
]
map.each { |regexp|
string.sub!(/#{regexp}/m) { |placeholder|
placeholder = "#{marker}#{placeholder}"
}
}
return string
end
# if we do have less then 10 lines and less then 300 chars ignore this
if !force
lines = string.split("\n")
return if lines.count < 10 && string.length < 300
end
marker = '######SIGNATURE_MARKER######'
# search for signature separator "--\n"
string.sub!(/^\s{0,2}--\s{0,2}$/) { |placeholder|
placeholder = "#{marker}#{placeholder}"
@ -360,7 +398,7 @@ class String
# rubocop:enable Style/AsciiComments
# en/de/fr | sometimes ms adds a space to "xx : value"
map['ms-en-de-fr_from'] = '^(From|Von|De)( ?):[[:space:]].+?'
map['ms-en-de-fr_from'] = '^(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ)( ?):[[:space:]].+?'
map['ms-en-de-fr_from_html'] = "\n######b######(From|Von|De)([[:space:]]?):([[:space:]]?)(######\/b######)[[:space:]].+?"
# word 14
@ -369,9 +407,14 @@ class String
#map['word-en-de'] = "[^#{marker}].{1,250}\s(wrote|schrieb):"
map.each { |_key, regexp|
string.sub!(/#{regexp}/) { |placeholder|
placeholder = "#{marker}#{placeholder}"
}
begin
string.sub!(/#{regexp}/) { |placeholder|
placeholder = "#{marker}#{placeholder}"
}
rescue
# regexp was not possible because of some string encoding issue, use next
Rails.logger.debug "Invalid string/charset combination with regexp #{regexp} in string"
end
}
string

View file

@ -1,43 +1,106 @@
class HtmlSanitizer
def self.strict(string)
=begin
satinize html string based on whiltelist
string = HtmlSanitizer.strict(string, external)
=end
def self.strict(string, external = false)
# config
tags_remove_content = Rails.configuration.html_sanitizer_tags_remove_content
tags_quote_content = Rails.configuration.html_sanitizer_tags_quote_content
tags_whitelist = Rails.configuration.html_sanitizer_tags_whitelist
attributes_whitelist = Rails.configuration.html_sanitizer_attributes_whitelist
css_properties_whitelist = Rails.configuration.html_sanitizer_css_properties_whitelist
classes_whitelist = ['js-signatureMarker']
attributes_2_css = %w(width height)
scrubber = Loofah::Scrubber.new do |node|
# remove tags with subtree
if tags_remove_content.include?(node.name)
node.remove
Loofah::Scrubber::STOP
end
# remove tag, insert quoted content
if tags_quote_content.include?(node.name)
string = node.content
string.gsub!('&amp;', '&')
string.gsub!('&lt;', '<')
string.gsub!('&gt;', '>')
string.gsub!('&quot;', '"')
string.gsub!('&nbsp;', ' ')
text = Nokogiri::XML::Text.new(string, node.document)
node.add_next_sibling(text)
node.remove
Loofah::Scrubber::STOP
end
# replace tags, keep subtree
if !tags_whitelist.include?(node.name)
traversal(node, scrubber)
node.replace strict(node.children.to_s)
Loofah::Scrubber::STOP
end
# prepare src attribute
if node['src']
src = cleanup(node['src'])
src = cleanup_target(node['src'])
if src =~ /(javascript|livescript|vbscript):/i || src.start_with?('http', 'ftp', '//')
traversal(node, scrubber)
node.remove
Loofah::Scrubber::STOP
end
end
# clean class / only use allowed classes
if node['class']
classes = node['class'].gsub(/\t|\n|\r/, '').split(' ')
class_new = ''
classes.each { |local_class|
next if !classes_whitelist.include?(local_class.to_s.strip)
if class_new != ''
class_new += ' '
end
class_new += local_class
}
if class_new != ''
node['class'] = class_new
else
node.delete('class')
end
end
# move style attributes to css attributes
attributes_2_css.each { |key|
next if !node[key]
if node['style'].empty?
node['style'] = ''
else
node['style'] += ';'
end
value = node[key]
node.delete(key)
next if value.blank?
if value !~ /%|px|em/i
value += 'px'
end
node['style'] += "#{key}:#{value}"
}
# clean style / only use allowed style properties
if node['style']
pears = node['style'].downcase.gsub(/\t|\n|\r/, '').split(';')
style = ''
pears.each { |pear|
prop = pear.split(':')
pears.each { |local_pear|
prop = local_pear.split(':')
next if !prop[0]
key = prop[0].strip
next if !css_properties_whitelist.include?(key)
style += "#{pear};"
style += "#{local_pear};"
}
node['style'] = style
if style == ''
@ -48,7 +111,7 @@ class HtmlSanitizer
# scan for invalid link content
%w(href style).each { |attribute_name|
next if !node[attribute_name]
href = cleanup(node[attribute_name])
href = cleanup_target(node[attribute_name])
next if href !~ /(javascript|livescript|vbscript):/i
node.delete(attribute_name)
}
@ -60,33 +123,231 @@ class HtmlSanitizer
node.delete(attribute)
}
# remove mailto links
if node['href']
href = cleanup_target(node['href'])
if href =~ /mailto:(.*)$/i
text = Nokogiri::XML::Text.new($1, node.document)
node.add_next_sibling(text)
node.remove
Loofah::Scrubber::STOP
end
end
# prepare links
if node['href']
href = cleanup(node['href'])
href = cleanup_target(node['href'])
next if !href.start_with?('http', 'ftp', '//')
node.set_attribute('href', href)
node.set_attribute('rel', 'nofollow')
node.set_attribute('target', '_blank')
end
# check if href is different to text
if external && node.name == 'a' && !url_same?(node['href'], node.text)
if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
node.add_previous_sibling(text)
node['href'] = cleanup_target(node.text)
text = Nokogiri::XML::Text.new(')', node.document)
node.add_next_sibling(text)
else
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
node.content = text
end
end
# check if text has urls which need to be clickable
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
if node.class == Nokogiri::XML::Text
urls = []
node.content.scan(%r{((http|https|ftp|tel)://.+?|(www..+?))([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
urls.push match[0]
}
next if urls.empty?
add_link(node.content, urls, node)
end
end
end
Loofah.fragment(string).scrub!(scrubber).to_s
end
=begin
cleanup html string:
* remove empty nodes (p, div, span)
* remove nodes in general (keep content - span)
string = HtmlSanitizer.cleanup(string)
=end
def self.cleanup(string)
string.gsub!(/<[A-z]:[A-z]>/, '')
string.gsub!(%r{</[A-z]:[A-z]>}, '')
string.delete!("\t")
# remove all new lines
string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, "\n")
# remove double multiple empty lines
string.gsub!(/\n\n\n+/, "\n\n")
string = cleanup_replace_tags(string)
cleanup_structure(string)
end
def self.cleanup_replace_tags(string)
string.gsub!(%r{(<table(.+?|)>.+?</table>)}mxi) { |table|
table.gsub!(/<table(.+?|)>/im, '<br>')
table.gsub!(%r{</table>}im, ' ')
table.gsub!(/<thead(.+?|)>/im, '')
table.gsub!(%r{</thead>}im, ' ')
table.gsub!(/<tbody(.+?|)>/im, '')
table.gsub!(%r{</tbody>}im, ' ')
table.gsub!(/<tr(.+?|)>/im, "<br>\n")
#table.gsub!(%r{</td>}im, '')
#table.gsub!(%r{</td>}im, "\n<br>\n")
table.gsub!(%r{</td>}im, ' ')
table.gsub!(/<td(.+?|)>/im, '')
#table.gsub!(%r{</tr>}im, '')
table.gsub!(%r{</tr>}im, "\n<br>")
table.gsub!(/<br>[[:space:]]?<br>/im, '<br>')
table.gsub!(/<br>[[:space:]]?<br>/im, '<br>')
table.gsub!(%r{<br/>[[:space:]]?<br/>}im, '<br/>')
table.gsub!(%r{<br/>[[:space:]]?<br/>}im, '<br/>')
table
}
tags_backlist = %w(span table thead tbody td tr center)
scrubber = Loofah::Scrubber.new do |node|
next if !tags_backlist.include?(node.name)
node.replace cleanup_replace_tags(node.children.to_s)
Loofah::Scrubber::STOP
end
Loofah.fragment(string).scrub!(scrubber).to_s
end
def self.cleanup_structure(string)
remove_empty_nodes = %w(p div span small)
remove_empty_last_nodes = %w(b i u small)
scrubber = Loofah::Scrubber.new do |node|
if remove_empty_last_nodes.include?(node.name) && node.children.size.zero?
node.remove
Loofah::Scrubber::STOP
end
if remove_empty_nodes.include?(node.name) && node.children.size == 1 && remove_empty_nodes.include?(node.children.first.name) # && node.children.first.text.blank?
node.replace cleanup_structure(node.children.to_s)
end
# remove mailto links
if node['href']
href = cleanup_target(node['href'])
if href =~ /mailto:(.*)$/i
text = Nokogiri::XML::Text.new($1, node.document)
node.add_next_sibling(text)
node.remove
Loofah::Scrubber::STOP
end
end
# check if href is different to text
if node.name == 'a' && !url_same?(node['href'], node.text)
if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
text = Nokogiri::XML::Text.new("#{node.text} (", node.document)
node.add_previous_sibling(text)
node.content = cleanup_target(node['href'])
node['href'] = cleanup_target(node['href'])
text = Nokogiri::XML::Text.new(')', node.document)
node.add_next_sibling(text)
else
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
node.content = text
end
end
# remove not needed new lines
if node.class == Nokogiri::XML::Text
if !node.parent || (node.parent.name != 'pre' && node.parent.name != 'code')
content = node.content
if content
if content != ' ' && content != "\n"
content.gsub!(/[[:space:]]+/, ' ')
end
if node.previous
if node.previous.name == 'div' || node.previous.name == 'p'
content.strip!
end
elsif node.parent && !node.previous
if (node.parent.name == 'div' || node.parent.name == 'p') && content != ' ' && content != "\n"
content.strip!
end
end
node.content = content
end
end
end
end
Loofah.fragment(string).scrub!(scrubber).to_s
end
def self.traversal(node, scrubber)
node.children.each { |child|
if child.class == Nokogiri::XML::CDATA
node.before Nokogiri::XML::Text.new(node.content, node.document)
else
node.before Loofah.fragment(child.to_s).scrub!(scrubber)
def self.add_link(content, urls, node)
if urls.empty?
text = Nokogiri::XML::Text.new(content, node.document)
node.add_next_sibling(text)
return
end
url = urls.shift
if content =~ /^(.*)#{Regexp.quote(url)}(.*)$/mx
pre = $1
post = $2
if url =~ /^www/i
url = "http://#{url}"
end
}
node.remove
a = Nokogiri::XML::Node.new 'a', node.document
a['href'] = url
a['rel'] = 'nofollow'
a['target'] = '_blank'
a.content = url
if node.class != Nokogiri::XML::Text
text = Nokogiri::XML::Text.new(pre, node.document)
node.add_next_sibling(text).add_next_sibling(a)
return if post.blank?
add_link(post, urls, a)
return
end
node.content = pre
node.add_next_sibling(a)
return if post.blank?
add_link(post, urls, a)
end
end
def self.cleanup(string)
string.downcase.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
def self.cleanup_target(string)
URI.unescape(string).downcase.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
end
private_class_method :traversal
private_class_method :cleanup
def self.url_same?(url_new, url_old)
url_new = URI.unescape(url_new.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
url_old = URI.unescape(url_old.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
return true if url_new == url_old
return true if "http://#{url_new}" == url_old
return true if "http://#{url_old}" == url_new
return true if "https://#{url_new}" == url_old
return true if "https://#{url_old}" == url_new
false
end
private_class_method :cleanup_target
private_class_method :add_link
private_class_method :url_same?
end

View file

@ -315,7 +315,7 @@ lass=3DMsoNormal><span style=3D'font-size:10.5pt;font-family:Consolas;color=
:#1F497D'>T: +49 (0) XXX XX XX 22<o:p></o:p></span></p><p class=3DMsoNor=
mal><span style=3D'font-size:10.5pt;font-family:Consolas;color:#1F497D'>F: =
+49 (0) XXX XX XX 11<o:p></o:p></span></p><p class=3DMsoNormal><span sty=
le=3D'font-size:10.5pt;font-family:Consolas;color:#1F497D'>W: www.brain-scc=
le=3D'font-size:10.5pt;font-family:Consolas;color:#1F497D'>W: www.example=
.de <o:p></o:p></span></p><p class=3DMsoNormal><span style=3D'font-size:10.=
5pt;font-family:Consolas;color:#1F497D'><o:p>&nbsp;</o:p></span></p><p clas=
s=3DMsoNormal><span style=3D'font-size:10.5pt;font-family:Consolas;color:#1=

View file

@ -46,7 +46,7 @@ don&#39;t&nbsp;cry&nbsp;-&nbsp;work!&nbsp;(Rainald&nbsp;Goetz)</div>
<div name="quote" style="margin:10px 5px 5px 10px; padding: 10px 0 10px 10px; border-left:2px solid #C3D9E5; word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
<div style="margin:0 0 10px 0;"><b>Gesendet:</b>&nbsp;Mittwoch, 03. Februar 2016 um 12:43 Uhr<br/>
<b>Von:</b>&nbsp;&quot;Martin Smith&quot; &lt;m.Smith@example.com&gt;<br/>
<b>An:</b>&nbsp;linuxhotel@zammad.com<br/>
<b>An:</b>&nbsp;linuxhotel@example.com<br/>
<b>Betreff:</b>&nbsp;Fw: Zugangsdaten</div>
<div name="quoted-content">

View file

@ -431,7 +431,7 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = '<B>test</B>'
result = '<B>test</B>'
result = '<b>test</b>'
assert_equal(result, html.html2html_strict)
html = '<i>test</i>'
@ -450,81 +450,154 @@ Men-----------------------'
result = '<h3>test</h3>'
assert_equal(result, html.html2html_strict)
html = '<h3>test</h3><!-- some comment -->'
result = '<h3>test</h3>'
assert_equal(result, html.html2html_strict)
html = "<div>\n\n\ntest\n\n\n</div>"
result = "<div>test</div>"
assert_equal(result, html.html2html_strict)
html = "<div>\n\t\ntest\n\t\n</div>"
result = "<div>test</div>"
assert_equal(result, html.html2html_strict)
html = "<div>\n\t\ntest 123\n\t\n</div>"
result = "<div>test 123</div>"
assert_equal(result, html.html2html_strict)
html = "<div><p> </p><p> </p></div>"
result = "<div>\n<p>&nbsp;</p>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div><div> </div><div> </div></div>"
result = "<div>\n<div> </div>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<pre>a\nb\nc</pre>"
result = "<pre>a\nb\nc</pre>"
assert_equal(result, html.html2html_strict)
html = "<div><pre>a\nb\nc</pre></div>"
result = "<div><pre>a\nb\nc</pre></div>"
assert_equal(result, html.html2html_strict)
html = '<div class="WordSection1">
<p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau Koppenhagen,<o:p></o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
<o:p></o:p></span></p>
<div>'
result = '<div>
<p>Guten Morgen, Frau Koppenhagen,</p>
<p>&nbsp;</p>
<p>vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</p>
<p>&nbsp;</p>
<p>Nochmals vielen Dank und herzliche Grüße</p>
<div></div>
</div>'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://example.com</a>'
result = '<a href="http://example.com" target="_blank">http://example.com</a>'
result = '<a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>'
assert_equal(result, html.html2html_strict)
html = '<A href="http://example.com?a=1;">http://example.com?a=1;</A>'
result = '<a href="http://example.com?a=1;" target="_blank">http://example.com?a=1;</a>'
result = '<a href="http://example.com?a=1;" rel="nofollow" target="_blank">http://example.com?a=1;</a>'
assert_equal(result, html.html2html_strict)
html = '<a href="http://web.de">web.de</a>'
result = '<a href="http://web.de" target="_blank">web.de</a>'
result = '<a href="http://web.de" rel="nofollow" target="_blank">web.de</a>'
assert_equal(result, html.html2html_strict)
html = '<a id="123" href="http://web.de">web.de</a>'
result = '<a href="http://web.de" target="_blank">web.de</a>'
result = '<a href="http://web.de" rel="nofollow" target="_blank">web.de</a>'
assert_equal(result, html.html2html_strict)
html = '<br>https://www.facebook.com/test<br>'
result = '<a href="https://www.facebook.com/test" target="_blank">https://www.facebook.com/test</a>'
result = '<br><a href="https://www.facebook.com/test" rel="nofollow" target="_blank">https://www.facebook.com/test</a><br>'
assert_equal(result, html.html2html_strict)
html = 'some text http://example.com some other text'
result = 'some text <a href="http://example.com" target="_blank">http://example.com</a> some other text'
result = 'some text <a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a> some other text'
assert_equal(result, html.html2html_strict)
html = 'some text www.example.com some other text'
result = 'some text <a href="http://www.example.com" target="_blank">http://www.example.com</a> some other text'
result = 'some text <a href="http://www.example.com" rel="nofollow" target="_blank">http://www.example.com</a> some other text'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://what-different.example.com</a>'
result = 'http://example.com (<a href="http://what-different.example.com" target="_blank">http://what-different.example.com</a>)'
result = 'http://example.com (<a href="http://what-different.example.com" target="_blank">http://what-different.example.com</a>)'
#result = 'http://example.com (<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a>)'
result = '<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a> (<a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>)'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://EXAMPLE.com</a>'
result = '<a href="http://example.com" target="_blank">http://EXAMPLE.com</a>'
result = '<a href="http://example.com" rel="nofollow" target="_blank">http://EXAMPLE.com</a>'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com" class="abc">http://example.com</a>'
result = '<a href="http://example.com" target="_blank">http://example.com</a>'
result = '<a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com/" class="abc">http://example.com</a>'
result = '<a href="http://example.com/" target="_blank">http://example.com</a>'
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
assert_equal(result, html.html2html_strict)
html = "<a href=\"http://example.com/\n\" class=\"abc\">http://example.com</a>"
result = '<a href="http://example.com/" target="_blank">http://example.com</a>'
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
assert_equal(result, html.html2html_strict)
html = "<a href=\"http://example.com/\n \" class=\"abc\n\"\n>http://example.com</a>"
result = '<a href="http://example.com/" target="_blank">http://example.com</a>'
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com</div>"
result = '<a href="http://example.com" target="_blank">http://example.com</a>'
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com.</div>"
result = '<a href="http://example.com" target="_blank">http://example.com</a>.'
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>.</div>"
assert_equal(result, html.html2html_strict)
html = "<div>lala http://example.com.</div>"
result = '<div>lala <a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>.</div>'
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com, and so on</div>"
result = '<a href="http://example.com" target="_blank">http://example.com</a>, and so on'
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>, and so on</div>"
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com?lala=me, and so on</div>"
result = '<a href="http://example.com?lala=me" target="_blank">http://example.com?lala=me</a>, and so on'
result = "<div><a href=\"http://example.com?lala=me\" rel=\"nofollow\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
assert_equal(result, html.html2html_strict)
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
result = "<a href=\"http://facebook.de/examplesrbog\" target=\"_blank\">http://facebook.de/examplesrbog</a>"
#result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\"><span lang=\"EN-US\">http://facebook.de/examplesrbog</span></a>"
result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\">http://facebook.de/examplesrbog</a>"
assert_equal(result, html.html2html_strict)
html = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href=\"http://newsletters.cylex.de/\" class=\"\">Link des Adventkalenders</a> in<br class=\"\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class=\"\">&nbsp;"
result = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders (<a href=\"http://newsletters.cylex.de/\" target=\"_blank\">http://newsletters.cylex.de/</a>) in<br>      Ihrer Lesezeichen-Symbolleiste zu ergänzen."
result = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders (<a href=\"http://newsletters.cylex.de/\" rel=\"nofollow\" target=\"_blank\">http://newsletters.cylex.de/</a>) in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>"
assert_equal(result, html.html2html_strict)
html = "<div>
abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>"
result = "<div>abc<span class=\"js-signatureMarker\"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>"
result = "<div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div> abc<p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p> </div>"
result = "<div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Mit freundlichem Gruß<span class=\"Apple-converted-space\">&nbsp;</span><br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.<o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"mailto:john.smith@example.com\" style=color: blue; text-decoration: underline; \">john.smith@example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=http://www.example.com\" style=\"color: blue; text-decoration: underline; \">www.example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div>"
result = "<div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG &amp; Co.</div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>\n</div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</div><div>john.smith@example.com</div><div><a href=\"http://www.example.com\" rel=\"nofollow\" target=\"_blank\">http://www.example.com</a> (<a href=\"http://www.example.com%22\" rel=\"nofollow\" target=\"_blank\">http://www.example.com\"</a>)</div>"
assert_equal(result, html.html2html_strict)
html = '<b >test</b>'
@ -536,11 +609,37 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = '<b >test< /b >'
result = '<b>test</b>'
result = '<b>test&lt; /b &gt;</b>'
assert_equal(result, html.html2html_strict)
html = "<b\n>test<\n/b>"
result = '<b>test</b>'
result = "<b>test&lt; /b&gt;</b>"
assert_equal(result, html.html2html_strict)
html = '<table>
<tr>
<td bgcolor=white><font size=2 face="sans-serif"><b>Franz Schäfer</b></font>
<tr>
<td bgcolor=white><font size=2 face="sans-serif">Manager Information Systems</font></table>
<br>
<table>
<tr>
<td bgcolor=white><font size=2 face="sans-serif">Telefon &nbsp;</font>
<td bgcolor=white><font size=2 face="sans-serif">+49 000 000 8565</font>
<tr>
<td colspan=2 bgcolor=white><font size=2 face="sans-serif">christian.schaefer@example.com</font></table>
<br>
<table>'
result = "<br><b>Franz Schäfer</b><br><br><br><br>Telefon\n+49 000 000 8565<br>\nchristian.schaefer@example.com<br><br><br>"
result = '<br>
<b>Franz Schäfer</b>
<br>
Manager Information Systems <br>
<br>
Telefon
+49 000 000 8565
<br>
christian.schaefer@example.com <br>'
assert_equal(result, html.html2html_strict)
html = "<b id=123 classs=\"\nsome_class\">test</b>"
@ -548,15 +647,113 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = "<b id=123 classs=\"\nsome_class\"\n>test<\n/b>"
result = '<b>test</b>'
result = "<b>test&lt; /b&gt;</b>"
assert_equal(result, html.html2html_strict)
html = "<ul id=123 classs=\"\nsome_class\"\n><li>test</li>\n<li class=\"asasd\">test</li><\n/ul>"
result = '<ul><li>test</li><li>test</li></ul>'
result = "<ul>\n<li>test</li>\n<li>test</li>&lt; /ul&gt;</ul>"
assert_equal(result, html.html2html_strict)
html = '<html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hallo Martin,<o:p></o:p></span></div>'
result = 'Hallo Martin,'
html = '<div><div>Hello Martin,</div></div>'
result = '<div>Hello Martin,</div>'
assert_equal(result, html.html2html_strict)
html = '<div lang="DE"><div><div>Hello Martin,</div></div></div>'
result = '<div>Hello Martin,</div>'
assert_equal(result, html.html2html_strict)
html = '<div lang="DE"><div><div>Hello Martin,</div> </div></div>'
result = "<div>\n<div>Hello Martin,</div>\n</div>"
assert_equal(result, html.html2html_strict)
html = '<span></span>'
result = ''
assert_equal(result, html.html2html_strict)
html = '<p lang="DE"><b><span></span></b></p>'
result = '<p lang="DE"></p>'
assert_equal(result, html.html2html_strict)
html = '<div>lala<div lang="DE"><p><span>Hello Martin,</span></p></div></div>'
result = "<div>lala<p>Hello Martin,</p>\n</div>"
assert_equal(result, html.html2html_strict)
html = '<p lang="DE"><b><span>Hello Martin,</span></b></p>'
result = '<p lang="DE"><b>Hello Martin,</b></p>'
assert_equal(result, html.html2html_strict)
html = '<body lang="DE" link="blue" vlink="purple"><div class="WordSection1">
<p class="MsoNormal"><span style="color:#1F497D">Guten Morgen, Frau ABC,<o:p></o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D">vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?<o:p></o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
<p class="MsoNormal"><span style="color:#1F497D">Nochmals vielen Dank und herzliche Grüße
<o:p></o:p></span></p>
<div>
<p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D"><o:p>&nbsp;</o:p></span></b></p>
<p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">Anna Smith<o:p></o:p></span></b></p>
<p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc SEV GmbH<o:p></o:p></span></b></p>
<p class="MsoNormal"><b><span style="font-size:10.0pt;color:#1F497D">art abc TRAV<o:p></o:p></span></b></p>
<p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">Marktstätte 123<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">123456 Dorten<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-1<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">T: &#43;49 (0) 12345/1234560-0<o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:9.0pt;color:#1F497D">F: &#43;49 (0) 12345/1234560-2<o:p></o:p></span></p>
<p class="MsoNormal"><a href="mailto:annad@example.com"><span style="font-size:9.0pt">annad@example.com</span></a><span style="font-size:9.0pt;color:#C00000"><o:p></o:p></span></p>
<p class="MsoNormal"><a href="http://www.example.com/"><span style="font-size:9.0pt">www.example.com</span></a><span style="font-size:9.0pt;color:#1F497D">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
</span><a href="http://www.ABC.com/"><span style="font-size:9.0pt">www.ABC.com</span></a><span style="font-size:9.0pt;color:#1F497D"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-size:8.0pt;color:#1F497D">Geschäftsführer Vor Nach, VorUndZu Nach&nbsp;&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp;&nbsp; Amtsgericht Dort HRB 12345&nbsp;&nbsp;&nbsp; -&nbsp;&nbsp;&nbsp; Ein Unternehmer der ABC Gruppe<o:p></o:p></span></p>'
result = '<div>
<p>Guten Morgen, Frau ABC,</p>
<p>&nbsp;</p>
<p>vielen Dank für die Reservierung. Dabei allerdings die Sprache (Niederländisch) nicht erwähnt. Können Sie bitte dieses in Ihrer Reservierung vormerken?</p>
<p>&nbsp;</p>
<p>Nochmals vielen Dank und herzliche Grüße</p>
<div>
<p><b> </b></p>
<p><b>Anna Smith</b></p>
<p><b>art abc SEV GmbH</b></p>
<p><b>art abc TRAV</b></p>
<p>Marktstätte 123</p>
<p>123456 Dorten</p>
<p>T: +49 (0) 12345/1234560-1</p>
<p>T: +49 (0) 12345/1234560-0</p>
<p>F: +49 (0) 12345/1234560-2</p>
<p>annad@example.com</p>
<p><a href="http://www.example.com/" rel="nofollow" target="_blank">www.example.com</a> <a href="http://www.abc.com/" rel="nofollow" target="_blank">www.ABC.com</a></p>
<p>Geschäftsführer Vor Nach, VorUndZu Nach - Amtsgericht Dort HRB 12345 - Ein Unternehmer der ABC Gruppe</p>
</div>
</div>'
assert_equal(result, html.html2html_strict)
html = '<p class="MsoNormal"><span style="color:#1F497D"><o:p>&nbsp;</o:p></span></p>
<div>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;">Von:</span></b><span style="font-size:10.0pt;font-family:&quot;Tahoma&quot;,&quot;sans-serif&quot;"> Besucherbüro, MKuk [<a href="mailto:besucherbuero@example.com">mailto:besucherbuero@example.com</a>] <br>
<b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
<b>An:</b> \'Amaia Epalza\'<br>
<b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017<o:p></o:p></span></p>
</div>
</div>
<p class="MsoNormal"><o:p>&nbsp;</o:p></p>
<p class="MsoNormal"><b><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">Reservierungsbestätigung Führung Skulptur-Projekte 2017 am
</span></b><o:p></o:p></p>
<p class="MsoNormal"><span style="font-size:10.0pt;font-family:&quot;Segoe UI&quot;,&quot;sans-serif&quot;;color:#1F497D">&nbsp;</span><o:p></o:p></p>
<p class="MsoNormal">Guten Morgen Frau Epalza,<o:p></o:p></p>'
result = '<p>&nbsp;</p><div>
<div>
<span class="js-signatureMarker"></span><p><b>Von:</b> Besucherbüro, MKuk [besucherbuero@example.com] <br>
<b>Gesendet:</b> Freitag, 16. Dezember 2016 08:05<br>
<b>An:</b> \'Amaia Epalza\'<br>
<b>Betreff:</b> AW: Gruppe vtb Kultuur // 28.06.2017</p>
</div>
</div><p>&nbsp;</p><p><b>Reservierungsbestätigung Führung Skulptur-Projekte 2017 am </b></p><p>&nbsp;</p><p>Guten Morgen Frau Epalza,</p>'
assert_equal(result, html.html2html_strict)
html = '<html><head><base href="x-msg://2849/"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space; "><span class="Apple-style-span" style="border-collapse: separate; font-family: Helvetica; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: 2; text-align: -webkit-auto; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; -webkit-border-vertical-spacing: 0px; -webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div lang="DE" link="blue" vlink="purple"><div class="Section1" style="page: Section1; "><div style="margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; "><span style="font-size: 10pt; font-family: Arial, sans-serif; ">Hello Martin,<o:p></o:p></span></div>'
result = '<div>Hello Martin,</div>'
assert_equal(result, html.html2html_strict)
html = '<a href="mailto:john.smith@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
@ -569,9 +766,23 @@ Men-----------------------'
html = '<a href="mailto:john.smith2@example.com" style="color: blue; text-decoration: underline; ">john.smith@example.com</a>'
#result = 'john.smith@example.com (mailto:john.smith2@example.com)'
result = 'john.smith@example.com'
result = 'john.smith2@example.com'
assert_equal(result, html.html2html_strict)
html = '<p class="MsoNormal"><a href="http://www.example.de/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
#result = '<p>http://www.example.de/ <a href="http://www.example.de/" rel="nofollow" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
result = '<p><a href="http://www.example.de/" rel="nofollow" target="_blank">http://www.example.de/</a></p>'
assert_equal(result, html.html2html_strict)
html = '<p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>'
#result = '<p>http://www.example.com/?wm=mail <a href="http://www.example.com/?wm=mail" rel="nofollow" target="_blank"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>'
result = '<p><a href="http://www.example.com/?wm=mail" rel="nofollow" target="_blank">http://www.example.com/?wm=mail</a></p>'
assert_equal(result, html.html2html_strict)
html = '<p><font size="2"><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></font></p>'
#result = '<p>http://www.example.com/?wm=mail <a href="http://www.example.com/?wm=mail" rel="nofollow" target="_blank"><img border="0" src="cid:example_new.png@8B201D8C.000B" style="width:101px;height:30px;"></a></p>'
result = '<p><a href="http://www.example.com/?wm=mail" rel="nofollow" target="_blank">http://www.example.com/?wm=mail</a></p>'
assert_equal(result, html.html2html_strict)
end
test 'signature_identify function' do
@ -579,80 +790,140 @@ Men-----------------------'
source = 'test'
result = 'test'
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test\n--\nend"
result = "test\n#{marker_template}--\nend"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "On 01/04/15 10:55, Bob Smith wrote:"
result = "#{marker_template}On 01/04/15 10:55, Bob Smith wrote:"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:"
result = "#{marker_template}Am 03.04.2015 um 20:58 schrieb Martin Edenhofer <me@znuny.ink>:"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "\ntest 123 \n1\n2\n3\n4\n5\n6\n7\n8\n9\n--\nBob Smith\n"
result = "\ntest 123 \n1\n2\n3\n4\n5\n6\n7\n8\n9\n#{marker_template}--\nBob Smith\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test 123 \n--no not match--\n--\nBob Smith\n"
result = "test 123 \n--no not match--\n#{marker_template}--\nBob Smith\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test 123 \n--no not match--\n -- \nBob Smith\n"
result = "test 123 \n--no not match--\n#{marker_template} -- \nBob Smith\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test 123 \n\n--\nBob Smith\n\n\n\n\n--\nBob Smith\n"
result = "test 123 \n#{marker_template}\n--\nBob Smith\n\n\n\n\n--\nBob Smith\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test 123\ntest 123\n--\nBob Smith\n"
result = "test 123\ntest 123\n#{marker_template}--\nBob Smith\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
source = "test 123\ntest 123\n--\nBob Smith\n\n"
result = "test 123\ntest 123\n#{marker_template}--\nBob Smith\n\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
# apple
# en
source = "test 123 \n--no not match--\nBob Smith\nOn 01/04/15 10:55, Bob Smith wrote:\nlalala\n--\nsome test"
result = "test 123 \n--no not match--\nBob Smith\n#{marker_template}On 01/04/15 10:55, Bob Smith wrote:\nlalala\n#{marker_template}--\nsome test"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
# de
source = "test 123 \n\n--no not match--\n\nBob Smith\nAm 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:\nlalala"
result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}Am 03.04.2015 um 20:58 schrieb Bob Smith <bob@example.com>:\nlalala"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
# ms
# en
source = "test 123 \n\n--no not match--\n\nBob Smith\nFrom: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nSent: Donnerstag, 2. April 2015 10:00\nlalala</div>"
result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}From: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nSent: Donnerstag, 2. April 2015 10:00\nlalala</div>"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
# de
source = "test 123 \n\n--no not match--\n\nBob Smith\nVon: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nGesendet: Donnerstag, 2. April 2015 10:00\nBetreff: lalala\n"
result = "test 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}Von: Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nGesendet: Donnerstag, 2. April 2015 10:00\nBetreff: lalala\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
# fr
source = "\ntest 123 \n\n--no not match--\n\nBob Smith\nDe : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nEnvoyé : mercredi 29 avril 2015 17:31\nObjet : lalala\n"
result = "\ntest 123 \n\n--no not match--\n\nBob Smith\n#{marker_template}De : Martin Edenhofer via Znuny Support [mailto:support@znuny.inc]\nEnvoyé : mercredi 29 avril 2015 17:31\nObjet : lalala\n"
assert_equal(result, source.signature_identify(true))
assert_equal(result, source.signature_identify('text', true))
marker_template = '<span class="js-signatureMarker"></span>'
html = "<br>lalala<br>--<br>Max Mix"
result = "lalala<br>#{marker_template}--<br>Max Mix"
assert_equal(result, html.html2html_strict(true))
html = "lalala<br>--<br>Max Mix"
result = "lalala#{marker_template}<br>--<br>Max Mix"
assert_equal(result, html.html2html_strict)
html = "den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hallo,<br><br>ich versuche an den Punkten"
result = "den.<br>#{marker_template}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI&#39;s<br><br>Hallo,<br><br>ich versuche an den Punkten"
assert_equal(result, html.html2html_strict(true))
marker_template = '<span class="js-signatureMarker"></span>'
html = "lalala<br/>--<br/>Max Mix"
result = "lalala#{marker_template}<br>--<br>Max Mix"
assert_equal(result, html.html2html_strict)
marker_template = '<span class="js-signatureMarker"></span>'
html = "lalala<br/>
--<br/>Max Mix"
result = "lalala#{marker_template}<br> --<br>Max Mix"
assert_equal(result, html.html2html_strict)
marker_template = '<span class="js-signatureMarker"></span>'
html = "lalala<p>--</p>Max Mix"
result = "lalala#{marker_template}<p>--</p>Max Mix"
assert_equal(result, html.html2html_strict)
marker_template = '<span class="js-signatureMarker"></span>'
html = "lalala<br>__<br>Max Mix"
result = "lalala#{marker_template}<br>__<br>Max Mix"
assert_equal(result, html.html2html_strict)
html = "den.<br><br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten"
result = "den.<br>#{marker_template}<br><b>Von:</b> Fritz Bauer [mailto:me@example.com]<br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's<br><br>Hello,<br><br>ich versuche an den Punkten"
assert_equal(result, html.html2html_strict)
html = '<div><div style="border:none;border-top:solid #e1e1e1 1.0pt;padding:3.0pt 0cm 0cm 0cm"><p class="MsoNormal"><b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif">Von:</span></b><span lang="DE" style="font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif"> Martin Edenhofer via Zammad Helpdesk [mailto:<a href="mailto:support@example.com">support@zammad.com</a>] <br><b>Gesendet:</b> '
result = '<span class="js-signatureMarker"></span><p><b>Von:</b> Martin Edenhofer via Zammad Helpdesk [mailto:support@example.com] <br><b>Gesendet:</b> </p>'
assert_equal(result, html.html2html_strict)
html = '<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal" style="margin-left:35.4pt"><b><span style="font-family:Calibri;color:black">Von:
</span></b><span style="font-family:Calibri;color:black">Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
<b>Organisation: </b>Znuny Group<br>
<b>Datum: </b>Montag, 6. März 2017 um 13:32<br>'
result = '<div>
<span class="js-signatureMarker"></span><p><b>Von: </b>Johanna Kiefer via Znuny Projects &lt;projects@example.com&gt;<br>
<b>Organisation: </b>Znuny Group<br>
<b>Datum: </b>Montag, 6. März 2017 um 13:32<br></p>
</div>'
assert_equal(result, html.html2html_strict)
html = '<br>
<br>
<br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
&nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.de&gt;</font>
<br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
&nbsp;</font>'
result = '<span class="js-signatureMarker"></span><br><br>Von: Hotel &lt;info@example.de&gt;
<br>An:'
assert_equal(result, html.html2html_strict)
html = '<br class=""><div><blockquote type="cite" class=""><div class="">On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;<a href="mailto:oliver@example.com" class="">oliver@example.com</a>&gt; wrote:</div><br class="Apple-interchange-newline">'
result = '<br><div><span class="js-signatureMarker"></span><blockquote type="cite">
<div>On 04 Mar 2017, at 14:47, Oliver Ruhm &lt;oliver@example.com&gt; wrote:</div>
<br>
</blockquote></div>'
assert_equal(result, html.html2html_strict)
html = '<br class=""><div><blockquote type="cite" class=""><div class="">some note</div><br class="Apple-interchange-newline">'
result = '<br><div><blockquote type="cite">
<div>some note</div>
<br>
</blockquote></div>'
assert_equal(result, html.html2html_strict)
end

View file

@ -59,7 +59,7 @@ class EmailBuildTest < ActiveSupport::TestCase
data = parser.parse(mail.to_s)
# check body
should = '&gt; Welcome!<br>&gt;<br>&gt; Thank you for installing Zammad. äöüß<br>&gt;'
should = '<div>&gt; Welcome!</div><div>&gt;</div><div>&gt; Thank you for installing Zammad. äöüß</div><div>&gt;</div>'
assert_equal(should, data[:body])
assert_equal('text/html', data[:content_type])

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -48,16 +48,21 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
assert_equal(HtmlSanitizer.strict('<DIV STYLE="background-image: url(javascript:alert(\'XSS\'), \'\')">'), '<div></div>')
assert_equal(HtmlSanitizer.strict('<a href="/some/path">test</a>'), '<a href="/some/path">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>'), '<a href="https://some/path" rel="nofollow" target="_blank">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), 'https://some/path (<a href="test" rel="nofollow" target="_blank">test</a>)')
assert_equal(HtmlSanitizer.strict('<XML ID="xss"><I><B><IMG SRC="javas<!-- -->cript:alert(\'XSS\')"></B></I></XML>'), '<i><b></b></i>')
assert_equal(HtmlSanitizer.strict('<IMG SRC="javas<!-- -->cript:alert(\'XSS\')">'), '')
assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
assert_equal(HtmlSanitizer.strict('<SCRIPT a=">" SRC="httx://xss.rocks/xss.js"></SCRIPT>'), '')
assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="h%0Att%20%20p://6%206.000146.0x7.147/" rel="nofollow" target="_blank">XSS</a>')
tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow" target="_blank">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>', true), 'http://66.000146.0x7.147/ (<a href="xss" rel="nofollow" target="_blank">XSS</a>)')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow" target="_blank">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '//www.google.com/ (<a href="xss" rel="nofollow" target="_blank">XSS</a>)')
assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')
assert_equal(HtmlSanitizer.strict('<maction actiontype="statusline#http://google.com" xlink:href="javascript:alert(2)">CLICKME</maction>'), 'CLICKME')
assert_equal(HtmlSanitizer.strict('<a xlink:href="javascript:alert(2)">CLICKME</a>'), '<a>CLICKME</a>')
assert_equal(HtmlSanitizer.strict('<a xlink:href="javascript:alert(2)">CLICKME</a>', true), ' (<a href="clickme">CLICKME</a>)')
assert_equal(HtmlSanitizer.strict('<!--<img src="--><img src=x onerror=alert(1)//">'), '<img src="x">')
assert_equal(HtmlSanitizer.strict('<![><img src="]><img src=x onerror=alert(1)//">'), '<img src="%5D&gt;&lt;img%20src=x%20onerror=alert(1)//">')
assert_equal(HtmlSanitizer.strict('<svg><![CDATA[><image xlink:href="]]><img src=xx:x onerror=alert(2)//"></svg>'), '')
@ -66,8 +71,10 @@ tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="h%0Att%20%20p://6%206.000146.0
assert_equal(HtmlSanitizer.strict('<embed src="data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg=="></embed>'), '')
assert_equal(HtmlSanitizer.strict('<img[a][b]src=x[d]onerror[c]=[e]"alert(1)">'), '<img>')
assert_equal(HtmlSanitizer.strict('<a href="[a]java[b]script[c]:alert(1)">XXX</a>'), '<a>XXX</a>')
assert_equal(HtmlSanitizer.strict('<a href="[a]java[b]script[c]:alert(1)">XXX</a>', true), ' (<a href="xxx">XXX</a>)')
assert_equal(HtmlSanitizer.strict('<svg xmlns="http://www.w3.org/2000/svg"><script>alert(1)</script></svg>'), 'alert(1)')
assert_equal(HtmlSanitizer.strict('<a style="position:fixed;top:0;left:0;width: 260px;height:100vh;background-color:red;display: block;" href="http://example.com"></a>'), '<a style="width: 260px;height:100vh;" href="http://example.com" rel="nofollow" target="_blank"></a>')
assert_equal(HtmlSanitizer.strict('<a style="position:fixed;top:0;left:0;width: 260px;height:100vh;background-color:red;display: block;" href="http://example.com"></a>', true), 'http://example.com (<a style="width: 260px;height:100vh;" href="" rel="nofollow" target="_blank"></a>)')
end

View file

@ -65,7 +65,7 @@ class TicketXssTest < ActiveSupport::TestCase
created_by_id: 1,
)
assert_equal("please tell me this doesn't work: <table>ada<tr></tr>
</table><div class=\"adasd\"></div><div>
</table><div></div><div>
<a>LINK</a><a href=\"http://lalal.de\" rel=\"nofollow\" target=\"_blank\">aa</a>ABC</div>", article3.body, 'article3.body verify - inbound')
article4 = Ticket::Article.create(