Fixed html sanitizer loop with own generated link injection. Extended tests.

This commit is contained in:
Martin Edenhofer 2017-04-26 11:05:58 +02:00
parent 0308d8fe2d
commit 98a1ba8a62
5 changed files with 113 additions and 170 deletions

View file

@ -19,7 +19,58 @@ satinize html string based on whiltelist
classes_whitelist = ['js-signatureMarker']
attributes_2_css = %w(width height)
scrubber = Loofah::Scrubber.new do |node|
scrubber_link = Loofah::Scrubber.new do |node|
# check if href is different to text
if external && node.name == 'a' && !url_same?(node['href'], node.text)
if node['href'].blank?
node.replace node.children.to_s
Loofah::Scrubber::STOP
elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
node.add_previous_sibling(text)
node['href'] = cleanup_target(node.text)
text = Nokogiri::XML::Text.new(')', node.document)
node.add_next_sibling(text)
else
node.content = cleanup_target(node['href'])
end
end
# check if text has urls which need to be clickable
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
if node.class == Nokogiri::XML::Text
urls = []
node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
if match[0]
urls.push match[0].to_s.strip
end
}
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
if match[1]
urls.push match[1].to_s.strip
end
}
next if urls.empty?
add_link(node.content, urls, node)
end
end
# prepare links
if node['href']
href = cleanup_target(node['href'])
if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
node['href'] = "http://#{node['href']}"
href = node['href']
end
next if !href.downcase.start_with?('http', 'ftp', '//')
node.set_attribute('href', href)
node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank')
end
end
scrubber_wipe = Loofah::Scrubber.new do |node|
# remove tags with subtree
if tags_remove_content.include?(node.name)
@ -128,67 +179,19 @@ satinize html string based on whiltelist
Loofah::Scrubber::STOP
end
end
# prepare links
if node['href']
href = cleanup_target(node['href'])
if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
node['href'] = "http://#{node['href']}"
href = node['href']
end
next if !href.downcase.start_with?('http', 'ftp', '//')
node.set_attribute('href', href)
node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank')
end
# check if href is different to text
if external && node.name == 'a' && !url_same?(node['href'], node.text)
if node['href'].blank?
node.replace node.children.to_s
Loofah::Scrubber::STOP
elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
node.add_previous_sibling(text)
node['href'] = cleanup_target(node.text)
text = Nokogiri::XML::Text.new(')', node.document)
node.add_next_sibling(text)
else
node.content = cleanup_target(node['href'])
end
end
# check if text has urls which need to be clickable
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
if node.class == Nokogiri::XML::Text
urls = []
node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
if match[0]
urls.push match[0].to_s.strip
end
}
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
if match[1]
urls.push match[1].to_s.strip
end
}
next if urls.empty?
add_link(node.content, urls, node)
end
end
end
new_string = ''
done = true
while done
new_string = Loofah.fragment(string).scrub!(scrubber).to_s
new_string = Loofah.fragment(string).scrub!(scrubber_wipe).to_s
if string == new_string
done = false
end
string = new_string
end
string
Loofah.fragment(string).scrub!(scrubber_link).to_s
end
=begin

View file

@ -552,8 +552,7 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = '<div>https://www.facebook.com/test</div>'
result = '<div>
<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
result = '<div><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
</div>'
assert_equal(result, html.html2html_strict)
@ -641,11 +640,11 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com</div>"
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com.</div>"
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
assert_equal(result, html.html2html_strict)
html = "<div>lala http://example.com.</div>"
@ -653,11 +652,11 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com, and so on</div>"
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
assert_equal(result, html.html2html_strict)
html = "<div>http://example.com?lala=me, and so on</div>"
result = "<div>\n<a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
result = "<div><a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
assert_equal(result, html.html2html_strict)
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
@ -665,12 +664,12 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = "<span style=\"font-size:10.0pt;font-family:&quot;Cambria&quot;,serif;color:#1F497D;mso-fareast-language:DE\">web&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href=\"http://www.example.de\"><span style=\"color:blue\">www.example.de</span></a><o:p></o:p></span>"
result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
<a href=\"http://www.example.com\"><span style=\"color:blue\">www.example.com</span></a><o:p></o:p></span>"
result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
assert_equal(result, html.html2html_strict)
html = "web <a href=\"www.example.de\"><span style=\"color:blue\">www.example.de</span></a>"
result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
html = "web <a href=\"www.example.com\"><span style=\"color:blue\">www.example.com</span></a>"
result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
assert_equal(result, html.html2html_strict)
html = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href=\"http://newsletters.cylex.de/\" class=\"\">Link des Adventkalenders</a> in<br class=\"\">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class=\"\">&nbsp;"
@ -913,9 +912,9 @@ christian.schaefer@example.com'
result = '<img style="width: 181px; height: 125px;" src="...">'
assert_equal(result, html.html2html_strict)
html = '<p class="MsoNormal"><a href="http://www.example.de/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
#result = '<p>http://www.example.de/ <a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
result = '<p><a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.de/</a></p>'
html = '<p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
#result = '<p>http://www.example.com/ <a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
result = '<p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/</a></p>'
assert_equal(result, html.html2html_strict)
html = '<p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>'
@ -940,6 +939,10 @@ christian.schaefer@example.com'
result = '<p>oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</p><p>&nbsp;</p>'
assert_equal(result, html.html2html_strict)
html = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>'
result = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a> (<a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&amp;pwchangekey=66901c449dda98a098de4b57ccdf0805</a>)</div>'
assert_equal(result, html.html2html_strict)
end
test 'inline attachment replace' do
@ -1106,10 +1109,10 @@ christian.schaefer@example.com'
html = '<div><br>
<br>
<br><font size=1 color=#5f5f5f face="sans-serif">Von: &nbsp; &nbsp; &nbsp;
&nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.de&gt;</font>
&nbsp;</font><font size=1 face="sans-serif">Hotel &lt;info@example.com&gt;</font>
<br><font size=1 color=#5f5f5f face="sans-serif">An: &nbsp; &nbsp; &nbsp;
&nbsp;</font></div>'
result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel &lt;info@example.de&gt;
result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel &lt;info@example.com&gt;
<br>An:
</div>'
assert_equal(result, html.html2html_strict)

View file

@ -3,112 +3,49 @@ require 'test_helper'
class CacheTest < ActiveSupport::TestCase
test 'cache' do
tests = [
# test 1
{
set: {
key: '123',
data: {
key: 'some value',
}
},
verify: {
key: '123',
data: {
key: 'some value',
}
},
},
# test 1
Cache.write('123', 'some value')
cache = Cache.get('123')
assert_equal(cache, 'some value')
# test 2
{
set: {
key: '123',
data: {
key: 'some valueöäüß',
}
},
verify: {
key: '123',
data: {
key: 'some valueöäüß',
}
},
},
Cache.write('123', { key: 'some value' })
cache = Cache.get('123')
assert_equal(cache, { key: 'some value' })
# test 3
{
delete: {
key: '123',
},
verify: {
key: '123',
data: nil
},
},
# test 2
Cache.write('123', { key: 'some valueöäüß' })
cache = Cache.get('123')
assert_equal(cache, { key: 'some valueöäüß' })
# test 4
{
set: {
key: '123',
data: {
key: 'some valueöäüß2',
}
},
verify: {
key: '123',
data: {
key: 'some valueöäüß2',
}
},
},
# test 3
Cache.delete('123')
cache = Cache.get('123')
assert_nil(cache)
# test 5
{
cleanup: true,
verify: {
key: '123',
data: nil
},
},
# test 4
Cache.write('123', { key: 'some valueöäüß2' })
cache = Cache.get('123')
assert_equal(cache, { key: 'some valueöäüß2' })
# test 6
{
set: {
key: '123',
data: {
key: 'some valueöäüß2',
},
param: {
expires_in: 3.seconds,
}
},
sleep: 5,
verify: {
key: '123',
data: nil
},
},
]
tests.each { |test|
if test[:set]
Cache.write(test[:set], test[:set][:data])
end
if test[:delete]
Cache.delete(test[:delete][:key])
end
if test[:cleanup]
Cache.clear
end
if test[:sleep]
sleep test[:sleep]
end
if test[:verify]
cache = Cache.get(test[:verify])
assert_equal(cache, test[:verify][:data], 'verify')
end
}
Cache.delete('123')
cache = Cache.get('123')
assert_nil(cache)
# test 5
Cache.clear
cache = Cache.get('123')
assert_nil(cache)
Cache.delete('123')
cache = Cache.get('123')
assert_nil(cache)
# test 6
Cache.write('123', { key: 'some valueöäüß2' }, expires_in: 3.seconds)
sleep 5
cache = Cache.get('123')
assert_nil(cache)
end
# verify if second cache write overwrite first one

View file

@ -552,7 +552,7 @@ Newsletter abbestellen (<a href="http://newsletters.cylex.de/ref/www.cylex.de/si
},
{
data: IO.binread('test/fixtures/mail19.box'),
body_md5: '29a8a50c2931346296f8b8fe782e115c',
body_md5: '0a9da3fd3da7a5779fb711fe04818ccd',
params: {
from: '"我" <>',
from_email: '"我" <>',

View file

@ -48,7 +48,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
assert_equal(HtmlSanitizer.strict('<DIV STYLE="background-image: url(javascript:alert(\'XSS\'), \'\')">'), '<div></div>')
assert_equal(HtmlSanitizer.strict('<a href="/some/path">test</a>'), '<a href="/some/path">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>'), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">https://some/path</a> (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), 'https://some/path (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
assert_equal(HtmlSanitizer.strict('<XML ID="xss"><I><B><IMG SRC="javas<!-- -->cript:alert(\'XSS\')"></B></I></XML>'), '<i><b></b></i>')
assert_equal(HtmlSanitizer.strict('<IMG SRC="javas<!-- -->cript:alert(\'XSS\')">'), '')
assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
@ -56,7 +56,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">http://66.000146.0x7.147/</a> (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
tt p://6 6.000146.0x7.147/">XSS</A>', true), 'h%0Att%20%20p://6%206.000146.0x7.147/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '//www.google.com/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')