From 98a1ba8a62d8f16c0078922baf83d9b9bb35752f Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Wed, 26 Apr 2017 11:05:58 +0200 Subject: [PATCH] Fixed html sanitizer loop with own generated link injection. Extended tests. --- lib/html_sanitizer.rb | 107 ++++++++++++------------ test/unit/aaa_string_test.rb | 33 ++++---- test/unit/cache_test.rb | 137 +++++++++---------------------- test/unit/email_parser_test.rb | 2 +- test/unit/html_sanitizer_test.rb | 4 +- 5 files changed, 113 insertions(+), 170 deletions(-) diff --git a/lib/html_sanitizer.rb b/lib/html_sanitizer.rb index e3e3cf7df..d004bbcd1 100644 --- a/lib/html_sanitizer.rb +++ b/lib/html_sanitizer.rb @@ -19,7 +19,58 @@ satinize html string based on whiltelist classes_whitelist = ['js-signatureMarker'] attributes_2_css = %w(width height) - scrubber = Loofah::Scrubber.new do |node| + scrubber_link = Loofah::Scrubber.new do |node| + + # check if href is different to text + if external && node.name == 'a' && !url_same?(node['href'], node.text) + if node['href'].blank? + node.replace node.children.to_s + Loofah::Scrubber::STOP + elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present? + text = Nokogiri::XML::Text.new("#{node['href']} (", node.document) + node.add_previous_sibling(text) + node['href'] = cleanup_target(node.text) + text = Nokogiri::XML::Text.new(')', node.document) + node.add_next_sibling(text) + else + node.content = cleanup_target(node['href']) + end + end + + # check if text has urls which need to be clickable + if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a') + if node.class == Nokogiri::XML::Text + urls = [] + node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match| + if match[0] + urls.push match[0].to_s.strip + end + } + node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match| + if match[1] + urls.push match[1].to_s.strip + end + } + next if urls.empty? + add_link(node.content, urls, node) + end + end + + # prepare links + if node['href'] + href = cleanup_target(node['href']) + if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?} + node['href'] = "http://#{node['href']}" + href = node['href'] + end + next if !href.downcase.start_with?('http', 'ftp', '//') + node.set_attribute('href', href) + node.set_attribute('rel', 'nofollow noreferrer noopener') + node.set_attribute('target', '_blank') + end + end + + scrubber_wipe = Loofah::Scrubber.new do |node| # remove tags with subtree if tags_remove_content.include?(node.name) @@ -128,67 +179,19 @@ satinize html string based on whiltelist Loofah::Scrubber::STOP end end - - # prepare links - if node['href'] - href = cleanup_target(node['href']) - if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?} - node['href'] = "http://#{node['href']}" - href = node['href'] - end - next if !href.downcase.start_with?('http', 'ftp', '//') - node.set_attribute('href', href) - node.set_attribute('rel', 'nofollow noreferrer noopener') - node.set_attribute('target', '_blank') - end - - # check if href is different to text - if external && node.name == 'a' && !url_same?(node['href'], node.text) - if node['href'].blank? - node.replace node.children.to_s - Loofah::Scrubber::STOP - elsif (node.children.empty? || node.children.first.class == Nokogiri::XML::Text) && node.text.present? - text = Nokogiri::XML::Text.new("#{node['href']} (", node.document) - node.add_previous_sibling(text) - node['href'] = cleanup_target(node.text) - text = Nokogiri::XML::Text.new(')', node.document) - node.add_next_sibling(text) - else - node.content = cleanup_target(node['href']) - end - end - - # check if text has urls which need to be clickable - if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a') - if node.class == Nokogiri::XML::Text - urls = [] - node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match| - if match[0] - urls.push match[0].to_s.strip - end - } - node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match| - if match[1] - urls.push match[1].to_s.strip - end - } - next if urls.empty? - add_link(node.content, urls, node) - end - end - end new_string = '' done = true while done - new_string = Loofah.fragment(string).scrub!(scrubber).to_s + new_string = Loofah.fragment(string).scrub!(scrubber_wipe).to_s if string == new_string done = false end string = new_string end - string + + Loofah.fragment(string).scrub!(scrubber_link).to_s end =begin diff --git a/test/unit/aaa_string_test.rb b/test/unit/aaa_string_test.rb index 7aba4ebf3..4a247497f 100644 --- a/test/unit/aaa_string_test.rb +++ b/test/unit/aaa_string_test.rb @@ -552,8 +552,7 @@ Men-----------------------' assert_equal(result, html.html2html_strict) html = '
https://www.facebook.com/test
' - result = '
-https://www.facebook.com/test + result = '
https://www.facebook.com/test
' assert_equal(result, html.html2html_strict) @@ -641,11 +640,11 @@ Men-----------------------' assert_equal(result, html.html2html_strict) html = "
http://example.com
" - result = "
\nhttp://example.com\n
" + result = "
http://example.com\n
" assert_equal(result, html.html2html_strict) html = "
http://example.com.
" - result = "
\nhttp://example.com.
" + result = "
http://example.com.
" assert_equal(result, html.html2html_strict) html = "
lala http://example.com.
" @@ -653,11 +652,11 @@ Men-----------------------' assert_equal(result, html.html2html_strict) html = "
http://example.com, and so on
" - result = "
\nhttp://example.com, and so on
" + result = "
http://example.com, and so on
" assert_equal(result, html.html2html_strict) html = "
http://example.com?lala=me, and so on
" - result = "
\nhttp://example.com?lala=me, and so on
" + result = "
http://example.com?lala=me, and so on
" assert_equal(result, html.html2html_strict) html = "http://facebook.de/examplesrbog" @@ -665,12 +664,12 @@ Men-----------------------' assert_equal(result, html.html2html_strict) html = "web                        -www.example.de" - result = "web www.example.de" +www.example.com" + result = "web www.example.com" assert_equal(result, html.html2html_strict) - html = "web www.example.de" - result = "web www.example.de" + html = "web www.example.com" + result = "web www.example.com" assert_equal(result, html.html2html_strict) html = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders in
      Ihrer Lesezeichen-Symbolleiste zu ergänzen.

 " @@ -913,9 +912,9 @@ christian.schaefer@example.com' result = '' assert_equal(result, html.html2html_strict) - html = '

' - #result = '

http://www.example.de/

' - result = '

http://www.example.de/

' + html = '

' + #result = '

http://www.example.com/

' + result = '

http://www.example.com/

' assert_equal(result, html.html2html_strict) html = '

' @@ -940,6 +939,10 @@ christian.schaefer@example.com' result = '

oh jeee … Zauberwort vergessen ;-) Können Sie mir bitte noch meine Testphase verlängern?

 

' assert_equal(result, html.html2html_strict) + html = '
http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805
' + result = '
http://www.example.com/Community/Passwort-Vergessen/?module_fnc%5BextranetHandler%5D=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805 (http://www.example.com/Community/Passwort-Vergessen/?module_fnc=ChangeForgotPassword&pwchangekey=66901c449dda98a098de4b57ccdf0805)
' + assert_equal(result, html.html2html_strict) + end test 'inline attachment replace' do @@ -1106,10 +1109,10 @@ christian.schaefer@example.com' html = '



Von:       - Hotel <info@example.de>Hotel <info@example.com>
An:        
' - result = '

Von: Hotel <info@example.de> + result = '

Von: Hotel <info@example.com>
An:
' assert_equal(result, html.html2html_strict) diff --git a/test/unit/cache_test.rb b/test/unit/cache_test.rb index 926d5f57d..07152f081 100644 --- a/test/unit/cache_test.rb +++ b/test/unit/cache_test.rb @@ -3,112 +3,49 @@ require 'test_helper' class CacheTest < ActiveSupport::TestCase test 'cache' do - tests = [ - # test 1 - { - set: { - key: '123', - data: { - key: 'some value', - } - }, - verify: { - key: '123', - data: { - key: 'some value', - } - }, - }, + # test 1 + Cache.write('123', 'some value') + cache = Cache.get('123') + assert_equal(cache, 'some value') - # test 2 - { - set: { - key: '123', - data: { - key: 'some valueöäüß', - } - }, - verify: { - key: '123', - data: { - key: 'some valueöäüß', - } - }, - }, + Cache.write('123', { key: 'some value' }) + cache = Cache.get('123') + assert_equal(cache, { key: 'some value' }) - # test 3 - { - delete: { - key: '123', - }, - verify: { - key: '123', - data: nil - }, - }, + # test 2 + Cache.write('123', { key: 'some valueöäüß' }) + cache = Cache.get('123') + assert_equal(cache, { key: 'some valueöäüß' }) - # test 4 - { - set: { - key: '123', - data: { - key: 'some valueöäüß2', - } - }, - verify: { - key: '123', - data: { - key: 'some valueöäüß2', - } - }, - }, + # test 3 + Cache.delete('123') + cache = Cache.get('123') + assert_nil(cache) - # test 5 - { - cleanup: true, - verify: { - key: '123', - data: nil - }, - }, + # test 4 + Cache.write('123', { key: 'some valueöäüß2' }) + cache = Cache.get('123') + assert_equal(cache, { key: 'some valueöäüß2' }) - # test 6 - { - set: { - key: '123', - data: { - key: 'some valueöäüß2', - }, - param: { - expires_in: 3.seconds, - } - }, - sleep: 5, - verify: { - key: '123', - data: nil - }, - }, - ] - tests.each { |test| - if test[:set] - Cache.write(test[:set], test[:set][:data]) - end - if test[:delete] - Cache.delete(test[:delete][:key]) - end - if test[:cleanup] - Cache.clear - end - if test[:sleep] - sleep test[:sleep] - end - if test[:verify] - cache = Cache.get(test[:verify]) - assert_equal(cache, test[:verify][:data], 'verify') - end - } + Cache.delete('123') + cache = Cache.get('123') + assert_nil(cache) + + # test 5 + Cache.clear + cache = Cache.get('123') + assert_nil(cache) + + Cache.delete('123') + cache = Cache.get('123') + assert_nil(cache) + + # test 6 + Cache.write('123', { key: 'some valueöäüß2' }, expires_in: 3.seconds) + sleep 5 + cache = Cache.get('123') + assert_nil(cache) end # verify if second cache write overwrite first one diff --git a/test/unit/email_parser_test.rb b/test/unit/email_parser_test.rb index 3d7f71cd0..f62592642 100644 --- a/test/unit/email_parser_test.rb +++ b/test/unit/email_parser_test.rb @@ -552,7 +552,7 @@ Newsletter abbestellen (', from_email: '"我" <>', diff --git a/test/unit/html_sanitizer_test.rb b/test/unit/html_sanitizer_test.rb index 38f58d53e..baf691db6 100644 --- a/test/unit/html_sanitizer_test.rb +++ b/test/unit/html_sanitizer_test.rb @@ -48,7 +48,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase assert_equal(HtmlSanitizer.strict('
'), '
') assert_equal(HtmlSanitizer.strict('
test'), 'test') assert_equal(HtmlSanitizer.strict('test'), 'test') - assert_equal(HtmlSanitizer.strict('test', true), 'https://some/path (test)') + assert_equal(HtmlSanitizer.strict('test', true), 'https://some/path (test)') assert_equal(HtmlSanitizer.strict(''), '') assert_equal(HtmlSanitizer.strict(''), '') assert_equal(HtmlSanitizer.strict(' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-') @@ -56,7 +56,7 @@ class HtmlSanitizerTest < ActiveSupport::TestCase assert_equal(HtmlSanitizer.strict('XSS'), 'XSS') assert_equal(HtmlSanitizer.strict('XSS', true), 'http://66.000146.0x7.147/ (XSS)') +tt p://6 6.000146.0x7.147/">XSS', true), 'h%0Att%20%20p://6%206.000146.0x7.147/ (XSS)') assert_equal(HtmlSanitizer.strict('XSS'), 'XSS') assert_equal(HtmlSanitizer.strict('XSS', true), '//www.google.com/ (XSS)') assert_equal(HtmlSanitizer.strict('
'), 'X')