Fixed html sanitizer loop with own generated link injection. Extended tests.
This commit is contained in:
parent
0308d8fe2d
commit
98a1ba8a62
5 changed files with 113 additions and 170 deletions
|
@ -19,7 +19,58 @@ satinize html string based on whiltelist
|
|||
classes_whitelist = ['js-signatureMarker']
|
||||
attributes_2_css = %w(width height)
|
||||
|
||||
scrubber = Loofah::Scrubber.new do |node|
|
||||
scrubber_link = Loofah::Scrubber.new do |node|
|
||||
|
||||
# check if href is different to text
|
||||
if external && node.name == 'a' && !url_same?(node['href'], node.text)
|
||||
if node['href'].blank?
|
||||
node.replace node.children.to_s
|
||||
Loofah::Scrubber::STOP
|
||||
elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
|
||||
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
|
||||
node.add_previous_sibling(text)
|
||||
node['href'] = cleanup_target(node.text)
|
||||
text = Nokogiri::XML::Text.new(')', node.document)
|
||||
node.add_next_sibling(text)
|
||||
else
|
||||
node.content = cleanup_target(node['href'])
|
||||
end
|
||||
end
|
||||
|
||||
# check if text has urls which need to be clickable
|
||||
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
|
||||
if node.class == Nokogiri::XML::Text
|
||||
urls = []
|
||||
node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
|
||||
if match[0]
|
||||
urls.push match[0].to_s.strip
|
||||
end
|
||||
}
|
||||
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
|
||||
if match[1]
|
||||
urls.push match[1].to_s.strip
|
||||
end
|
||||
}
|
||||
next if urls.empty?
|
||||
add_link(node.content, urls, node)
|
||||
end
|
||||
end
|
||||
|
||||
# prepare links
|
||||
if node['href']
|
||||
href = cleanup_target(node['href'])
|
||||
if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
|
||||
node['href'] = "http://#{node['href']}"
|
||||
href = node['href']
|
||||
end
|
||||
next if !href.downcase.start_with?('http', 'ftp', '//')
|
||||
node.set_attribute('href', href)
|
||||
node.set_attribute('rel', 'nofollow noreferrer noopener')
|
||||
node.set_attribute('target', '_blank')
|
||||
end
|
||||
end
|
||||
|
||||
scrubber_wipe = Loofah::Scrubber.new do |node|
|
||||
|
||||
# remove tags with subtree
|
||||
if tags_remove_content.include?(node.name)
|
||||
|
@ -128,67 +179,19 @@ satinize html string based on whiltelist
|
|||
Loofah::Scrubber::STOP
|
||||
end
|
||||
end
|
||||
|
||||
# prepare links
|
||||
if node['href']
|
||||
href = cleanup_target(node['href'])
|
||||
if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?}
|
||||
node['href'] = "http://#{node['href']}"
|
||||
href = node['href']
|
||||
end
|
||||
next if !href.downcase.start_with?('http', 'ftp', '//')
|
||||
node.set_attribute('href', href)
|
||||
node.set_attribute('rel', 'nofollow noreferrer noopener')
|
||||
node.set_attribute('target', '_blank')
|
||||
end
|
||||
|
||||
# check if href is different to text
|
||||
if external && node.name == 'a' && !url_same?(node['href'], node.text)
|
||||
if node['href'].blank?
|
||||
node.replace node.children.to_s
|
||||
Loofah::Scrubber::STOP
|
||||
elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present?
|
||||
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
|
||||
node.add_previous_sibling(text)
|
||||
node['href'] = cleanup_target(node.text)
|
||||
text = Nokogiri::XML::Text.new(')', node.document)
|
||||
node.add_next_sibling(text)
|
||||
else
|
||||
node.content = cleanup_target(node['href'])
|
||||
end
|
||||
end
|
||||
|
||||
# check if text has urls which need to be clickable
|
||||
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
|
||||
if node.class == Nokogiri::XML::Text
|
||||
urls = []
|
||||
node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
|
||||
if match[0]
|
||||
urls.push match[0].to_s.strip
|
||||
end
|
||||
}
|
||||
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
|
||||
if match[1]
|
||||
urls.push match[1].to_s.strip
|
||||
end
|
||||
}
|
||||
next if urls.empty?
|
||||
add_link(node.content, urls, node)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
new_string = ''
|
||||
done = true
|
||||
while done
|
||||
new_string = Loofah.fragment(string).scrub!(scrubber).to_s
|
||||
new_string = Loofah.fragment(string).scrub!(scrubber_wipe).to_s
|
||||
if string == new_string
|
||||
done = false
|
||||
end
|
||||
string = new_string
|
||||
end
|
||||
string
|
||||
|
||||
Loofah.fragment(string).scrub!(scrubber_link).to_s
|
||||
end
|
||||
|
||||
=begin
|
||||
|
|
|
@ -552,8 +552,7 @@ Men-----------------------'
|
|||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<div>https://www.facebook.com/test</div>'
|
||||
result = '<div>
|
||||
<a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
|
||||
result = '<div><a href="https://www.facebook.com/test" rel="nofollow noreferrer noopener" target="_blank">https://www.facebook.com/test</a>
|
||||
</div>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
|
@ -641,11 +640,11 @@ Men-----------------------'
|
|||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>http://example.com</div>"
|
||||
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
|
||||
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>\n</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>http://example.com.</div>"
|
||||
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
|
||||
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>.</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>lala http://example.com.</div>"
|
||||
|
@ -653,11 +652,11 @@ Men-----------------------'
|
|||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>http://example.com, and so on</div>"
|
||||
result = "<div>\n<a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
|
||||
result = "<div><a href=\"http://example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com</a>, and so on</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>http://example.com?lala=me, and so on</div>"
|
||||
result = "<div>\n<a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
|
||||
result = "<div><a href=\"http://example.com?lala=me\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://example.com?lala=me</a>, and so on</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
|
||||
|
@ -665,12 +664,12 @@ Men-----------------------'
|
|||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<span style=\"font-size:10.0pt;font-family:"Cambria",serif;color:#1F497D;mso-fareast-language:DE\">web
|
||||
<a href=\"http://www.example.de\"><span style=\"color:blue\">www.example.de</span></a><o:p></o:p></span>"
|
||||
result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
|
||||
<a href=\"http://www.example.com\"><span style=\"color:blue\">www.example.com</span></a><o:p></o:p></span>"
|
||||
result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "web <a href=\"www.example.de\"><span style=\"color:blue\">www.example.de</span></a>"
|
||||
result = "web <a href=\"http://www.example.de\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.de</a>"
|
||||
html = "web <a href=\"www.example.com\"><span style=\"color:blue\">www.example.com</span></a>"
|
||||
result = "web <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">www.example.com</a>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den <a href=\"http://newsletters.cylex.de/\" class=\"\">Link des Adventkalenders</a> in<br class=\"\"> Ihrer Lesezeichen-Symbolleiste zu ergänzen.</p><div class=\"\"> "
|
||||
|
@ -913,9 +912,9 @@ christian.schaefer@example.com'
|
|||
result = '<img style="width: 181px; height: 125px;" src="...">'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<p class="MsoNormal"><a href="http://www.example.de/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
|
||||
#result = '<p>http://www.example.de/ <a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
|
||||
result = '<p><a href="http://www.example.de/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.de/</a></p>'
|
||||
html = '<p class="MsoNormal"><a href="http://www.example.com/"><span style="color:blue;text-decoration:none"><img border="0" width="30" height="30" id="_x0000_i1030" src="cid:image001.png@01D172FC.F323CDB0"></span></a><o:p></o:p></p>'
|
||||
#result = '<p>http://www.example.com/ <a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank"><img border="0" src="cid:image001.png@01D172FC.F323CDB0" style="width:30px;height:30px;"></a></p>'
|
||||
result = '<p><a href="http://www.example.com/" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/</a></p>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<p><a style="color: " href="http://www.example.com/?wm=mail"><img border="0" src="cid:example_new.png@8B201D8C.000B" width="101" height="30"></a></p>'
|
||||
|
@ -940,6 +939,10 @@ christian.schaefer@example.com'
|
|||
result = '<p>oh jeee … Zauberwort vergessen ;-) Können Sie mir <b>bitte</b> noch meine Testphase verlängern?</p><p> </p>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805</a></div>'
|
||||
result = '<div><a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805</a> (<a href="http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805" title="http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805</a>)</div>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
end
|
||||
|
||||
test 'inline attachment replace' do
|
||||
|
@ -1106,10 +1109,10 @@ christian.schaefer@example.com'
|
|||
html = '<div><br>
|
||||
<br>
|
||||
<br><font size=1 color=#5f5f5f face="sans-serif">Von:
|
||||
</font><font size=1 face="sans-serif">Hotel <info@example.de></font>
|
||||
</font><font size=1 face="sans-serif">Hotel <info@example.com></font>
|
||||
<br><font size=1 color=#5f5f5f face="sans-serif">An:
|
||||
</font></div>'
|
||||
result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel <info@example.de>
|
||||
result = '<span class="js-signatureMarker"></span><div><br>Von: Hotel <info@example.com>
|
||||
<br>An:
|
||||
</div>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
|
|
@ -3,112 +3,49 @@ require 'test_helper'
|
|||
|
||||
class CacheTest < ActiveSupport::TestCase
|
||||
test 'cache' do
|
||||
tests = [
|
||||
|
||||
# test 1
|
||||
{
|
||||
set: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some value',
|
||||
}
|
||||
},
|
||||
verify: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some value',
|
||||
}
|
||||
},
|
||||
},
|
||||
# test 1
|
||||
Cache.write('123', 'some value')
|
||||
cache = Cache.get('123')
|
||||
assert_equal(cache, 'some value')
|
||||
|
||||
# test 2
|
||||
{
|
||||
set: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some valueöäüß',
|
||||
}
|
||||
},
|
||||
verify: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some valueöäüß',
|
||||
}
|
||||
},
|
||||
},
|
||||
Cache.write('123', { key: 'some value' })
|
||||
cache = Cache.get('123')
|
||||
assert_equal(cache, { key: 'some value' })
|
||||
|
||||
# test 3
|
||||
{
|
||||
delete: {
|
||||
key: '123',
|
||||
},
|
||||
verify: {
|
||||
key: '123',
|
||||
data: nil
|
||||
},
|
||||
},
|
||||
# test 2
|
||||
Cache.write('123', { key: 'some valueöäüß' })
|
||||
cache = Cache.get('123')
|
||||
assert_equal(cache, { key: 'some valueöäüß' })
|
||||
|
||||
# test 4
|
||||
{
|
||||
set: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some valueöäüß2',
|
||||
}
|
||||
},
|
||||
verify: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some valueöäüß2',
|
||||
}
|
||||
},
|
||||
},
|
||||
# test 3
|
||||
Cache.delete('123')
|
||||
cache = Cache.get('123')
|
||||
assert_nil(cache)
|
||||
|
||||
# test 5
|
||||
{
|
||||
cleanup: true,
|
||||
verify: {
|
||||
key: '123',
|
||||
data: nil
|
||||
},
|
||||
},
|
||||
# test 4
|
||||
Cache.write('123', { key: 'some valueöäüß2' })
|
||||
cache = Cache.get('123')
|
||||
assert_equal(cache, { key: 'some valueöäüß2' })
|
||||
|
||||
# test 6
|
||||
{
|
||||
set: {
|
||||
key: '123',
|
||||
data: {
|
||||
key: 'some valueöäüß2',
|
||||
},
|
||||
param: {
|
||||
expires_in: 3.seconds,
|
||||
}
|
||||
},
|
||||
sleep: 5,
|
||||
verify: {
|
||||
key: '123',
|
||||
data: nil
|
||||
},
|
||||
},
|
||||
]
|
||||
tests.each { |test|
|
||||
if test[:set]
|
||||
Cache.write(test[:set], test[:set][:data])
|
||||
end
|
||||
if test[:delete]
|
||||
Cache.delete(test[:delete][:key])
|
||||
end
|
||||
if test[:cleanup]
|
||||
Cache.clear
|
||||
end
|
||||
if test[:sleep]
|
||||
sleep test[:sleep]
|
||||
end
|
||||
if test[:verify]
|
||||
cache = Cache.get(test[:verify])
|
||||
assert_equal(cache, test[:verify][:data], 'verify')
|
||||
end
|
||||
}
|
||||
Cache.delete('123')
|
||||
cache = Cache.get('123')
|
||||
assert_nil(cache)
|
||||
|
||||
# test 5
|
||||
Cache.clear
|
||||
cache = Cache.get('123')
|
||||
assert_nil(cache)
|
||||
|
||||
Cache.delete('123')
|
||||
cache = Cache.get('123')
|
||||
assert_nil(cache)
|
||||
|
||||
# test 6
|
||||
Cache.write('123', { key: 'some valueöäüß2' }, expires_in: 3.seconds)
|
||||
sleep 5
|
||||
cache = Cache.get('123')
|
||||
assert_nil(cache)
|
||||
end
|
||||
|
||||
# verify if second cache write overwrite first one
|
||||
|
|
|
@ -552,7 +552,7 @@ Newsletter abbestellen (<a href="http://newsletters.cylex.de/ref/www.cylex.de/si
|
|||
},
|
||||
{
|
||||
data: IO.binread('test/fixtures/mail19.box'),
|
||||
body_md5: '29a8a50c2931346296f8b8fe782e115c',
|
||||
body_md5: '0a9da3fd3da7a5779fb711fe04818ccd',
|
||||
params: {
|
||||
from: '"我" <>',
|
||||
from_email: '"我" <>',
|
||||
|
|
|
@ -48,7 +48,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
|
|||
assert_equal(HtmlSanitizer.strict('<DIV STYLE="background-image: url(javascript:alert(\'XSS\'), \'\')">'), '<div></div>')
|
||||
assert_equal(HtmlSanitizer.strict('<a href="/some/path">test</a>'), '<a href="/some/path">test</a>')
|
||||
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>'), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">test</a>')
|
||||
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), '<a href="https://some/path" rel="nofollow noreferrer noopener" target="_blank">https://some/path</a> (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
|
||||
assert_equal(HtmlSanitizer.strict('<a href="https://some/path">test</a>', true), 'https://some/path (<a href="http://test" rel="nofollow noreferrer noopener" target="_blank">test</a>)')
|
||||
assert_equal(HtmlSanitizer.strict('<XML ID="xss"><I><B><IMG SRC="javas<!-- -->cript:alert(\'XSS\')"></B></I></XML>'), '<i><b></b></i>')
|
||||
assert_equal(HtmlSanitizer.strict('<IMG SRC="javas<!-- -->cript:alert(\'XSS\')">'), '')
|
||||
assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
|
||||
|
@ -56,7 +56,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
|
|||
assert_equal(HtmlSanitizer.strict('<A HREF="h
|
||||
tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
|
||||
assert_equal(HtmlSanitizer.strict('<A HREF="h
|
||||
tt p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank">http://66.000146.0x7.147/</a> (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
|
||||
tt p://6 6.000146.0x7.147/">XSS</A>', true), 'h%0Att%20%20p://6%206.000146.0x7.147/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
|
||||
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank">XSS</a>')
|
||||
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '//www.google.com/ (<a href="http://XSS" rel="nofollow noreferrer noopener" target="_blank">XSS</a>)')
|
||||
assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')
|
||||
|
|
Loading…
Reference in a new issue