diff --git a/lib/html_sanitizer.rb b/lib/html_sanitizer.rb index 920a657e8..ae37b9a2b 100644 --- a/lib/html_sanitizer.rb +++ b/lib/html_sanitizer.rb @@ -158,8 +158,15 @@ satinize html string based on whiltelist if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a') if node.class == Nokogiri::XML::Text urls = [] - node.content.scan(%r{((http|https|ftp|tel)://.+?|(www..+?))([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match| - urls.push match[0] + node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match| + if match[0] + urls.push match[0].to_s.strip + end + } + node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match| + if match[1] + urls.push match[1].to_s.strip + end } next if urls.empty? add_link(node.content, urls, node) diff --git a/test/unit/aaa_string_test.rb b/test/unit/aaa_string_test.rb index c5f2f5572..8641ffd59 100644 --- a/test/unit/aaa_string_test.rb +++ b/test/unit/aaa_string_test.rb @@ -570,6 +570,30 @@ Men-----------------------' result = 'some text http://www.example.com some other text' assert_equal(result, html.html2html_strict) + html = 'some textwwwsome other text' + result = 'some textwwwsome other text' + assert_equal(result, html.html2html_strict) + + html = 'some text wwwsome other text' + result = 'some text wwwsome other text' + assert_equal(result, html.html2html_strict) + + html = 'some text www.some.dom other text' + result = 'some text http://www.some.dom other text' + assert_equal(result, html.html2html_strict) + + html = 'www.some.dom other text' + result = 'http://www.some.dom other text' + assert_equal(result, html.html2html_strict) + + html = 'www.some.dom' + result = 'http://www.some.dom' + assert_equal(result, html.html2html_strict) + + html = 'web:www.some.dom other text' + result = 'web:http://www.some.dom other text' + assert_equal(result, html.html2html_strict) + html = 'http://what-different.example.com' #result = 'http://example.com (http://what-different.example.com)' result = 'http://what-different.example.com (http://example.com)' diff --git a/test/unit/email_parser_test.rb b/test/unit/email_parser_test.rb index 7e9b7db54..1864caa97 100644 --- a/test/unit/email_parser_test.rb +++ b/test/unit/email_parser_test.rb @@ -552,7 +552,7 @@ Newsletter abbestellen (', from_email: '"我" <>',