Fixed issue #1902 - Spaces in link URLs are removed.

This commit is contained in:
Thorsten Eckel 2018-04-03 17:46:45 +02:00
parent e48ca057fd
commit 0c917e13aa
4 changed files with 23 additions and 13 deletions

View file

@ -46,12 +46,15 @@ satinize html string based on whiltelist
# prepare links # prepare links
if node['href'] if node['href']
href = cleanup_target(node['href']) href = cleanup_target(node['href'], keep_spaces: true)
if external && href.present? && !href.downcase.start_with?('//') && href.downcase !~ %r{^.{1,6}://.+?} href_without_spaces = href.gsub(/[[:space:]]/, '')
if external && href_without_spaces.present? && !href_without_spaces.downcase.start_with?('//') && href_without_spaces.downcase !~ %r{^.{1,6}://.+?}
node['href'] = "http://#{node['href']}" node['href'] = "http://#{node['href']}"
href = node['href'] href = node['href']
href_without_spaces = href.gsub(/[[:space:]]/, '')
end end
next if !href.downcase.start_with?('http', 'ftp', '//')
next if !href_without_spaces.downcase.start_with?('http', 'ftp', '//')
node.set_attribute('href', href) node.set_attribute('href', href)
node.set_attribute('rel', 'nofollow noreferrer noopener') node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank') node.set_attribute('target', '_blank')
@ -372,9 +375,14 @@ cleanup html string:
string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ') string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ')
end end
def self.cleanup_target(string) def self.cleanup_target(string, keep_spaces: false)
string = CGI.unescape(string).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?') string = CGI.unescape(string).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
string.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000") blank_regex = if keep_spaces
/\t|\n|\r/
else
/[[:space:]]|\t|\n|\r/
end
string.strip.gsub(blank_regex, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000")
end end
def self.url_same?(url_new, url_old) def self.url_same?(url_new, url_old)

View file

@ -648,7 +648,7 @@ Men-----------------------'
assert_equal(result, html.html2html_strict) assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com %22test%22">http://what-different.example.com</a>' html = '<a href="http://example.com %22test%22">http://what-different.example.com</a>'
result = "<a href=\"http://example.com%22test%22\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title='http://example.com\"test\"'>http://what-different.example.com</a>" result = "<a href=\"http://example.com%20%22test%22\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title='http://example.com \"test\"'>http://what-different.example.com</a>"
assert_equal(result, html.html2html_strict) assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://EXAMPLE.com</a>' html = '<a href="http://example.com">http://EXAMPLE.com</a>'

View file

@ -940,7 +940,7 @@ end
}, },
{ {
data: IO.binread('test/fixtures/mail43.box'), data: IO.binread('test/fixtures/mail43.box'),
body_md5: '23cb094f443f41069b9f347a551e2e4d', body_md5: 'a3b91a8969b54a67dd2154e70f74cc30',
params: { params: {
from: 'Paula <databases.en@example.com>', from: 'Paula <databases.en@example.com>',
from_email: 'databases.en@example.com', from_email: 'databases.en@example.com',
@ -973,7 +973,7 @@ end
<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\"><span style=\"color: rgb(0, 0, 0);\">Polen</span></a></li> <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NMUGVpNGV8fDBm\"><span style=\"color: rgb(0, 0, 0);\">Polen</span></a></li>
<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\"><span style=\"color: rgb(0, 0, 0);\">Russland</span></a></li> <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVUmVpZTV8fGVk\"><span style=\"color: rgb(0, 0, 0);\">Russland</span></a></li>
<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\"><span style=\"color: rgb(0, 0, 0);\">Slowenien</span></a></li> <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NJU2VpN2R8fGYz\"><span style=\"color: rgb(0, 0, 0);\">Slowenien</span></a></li>
<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\"><span style=\"color: rgb(0, 0, 0);\">Slowakei</span></a></li> <li><a href=\"http://busine%20ss-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://busine ss-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NLU2VpNjZ8fDQ5\"><span style=\"color: rgb(0, 0, 0);\">Slowakei</span></a></li>
<li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\"><span style=\"color: rgb(0, 0, 0);\">Ukraine</span></a></li> </ul> </td> </tr> </tbody> </table> </td> </tr> <tr> <td> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td style=\"border-top-style:solid; border-top-width:3px;\"> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td> </td> </tr> </tbody> </table> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td align=\"left\" valign=\"top\" style=\"font-size:15px;color:#222222;\"> <p>Anwendungsmöglichkeiten für Geschäftskontakte<br> <br> </p><ul> <li> <li><a href=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NBVWVpYTd8fDNh\"><span style=\"color: rgb(0, 0, 0);\">Ukraine</span></a></li> </ul> </td> </tr> </tbody> </table> </td> </tr> <tr> <td> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td style=\"border-top-style:solid; border-top-width:3px;\"> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td> </td> </tr> </tbody> </table> </td> </tr> </tbody> </table> <table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"> <tbody> <tr> <td align=\"left\" valign=\"top\" style=\"font-size:15px;color:#222222;\"> <p>Anwendungsmöglichkeiten für Geschäftskontakte<br> <br> </p><ul> <li>
<i>Newsletter senden</i> - Senden von Werbung per E-Mail (besonders effizient).</li> <i>Newsletter senden</i> - Senden von Werbung per E-Mail (besonders effizient).</li>
<li> <li>

View file

@ -54,9 +54,9 @@ class HtmlSanitizerTest < ActiveSupport::TestCase
assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-') assert_equal(HtmlSanitizer.strict(' <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-'), ' +ADw-SCRIPT+AD4-alert(\'XSS\');+ADw-/SCRIPT+AD4-')
assert_equal(HtmlSanitizer.strict('<SCRIPT a=">" SRC="httx://xss.rocks/xss.js"></SCRIPT>'), '') assert_equal(HtmlSanitizer.strict('<SCRIPT a=">" SRC="httx://xss.rocks/xss.js"></SCRIPT>'), '')
assert_equal(HtmlSanitizer.strict('<A HREF="h assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="http://66.000146.0x7.147/">XSS</a>') tt p://6 6.000146.0x7.147/">XSS</A>'), '<a href="htt%20%20p://6%206.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="htt p://6 6.000146.0x7.147/">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="h assert_equal(HtmlSanitizer.strict('<A HREF="h
tt p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="http://66.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="http://66.000146.0x7.147/">XSS</a>') tt p://6 6.000146.0x7.147/">XSS</A>', true), '<a href="htt%20%20p://6%206.000146.0x7.147/" rel="nofollow noreferrer noopener" target="_blank" title="htt p://6 6.000146.0x7.147/">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>') assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>'), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>')
assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>') assert_equal(HtmlSanitizer.strict('<A HREF="//www.google.com/">XSS</A>', true), '<a href="//www.google.com/" rel="nofollow noreferrer noopener" target="_blank" title="//www.google.com/">XSS</a>')
assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X') assert_equal(HtmlSanitizer.strict('<form id="test"></form><button form="test" formaction="javascript:alert(1)">X</button>'), 'X')
@ -113,7 +113,9 @@ test 123
assert_equal(HtmlSanitizer.strict('<table><tr style=" Font-size:0%"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>') assert_equal(HtmlSanitizer.strict('<table><tr style=" Font-size:0%"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;display: none;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>') assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;display: none;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;visibility:hidden;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>') assert_equal(HtmlSanitizer.strict('<table><tr style="font-size:0%;visibility:hidden;"><td>123</td></tr></table>'), '<table><tr><td>123</td></tr></table>')
assert_equal(HtmlSanitizer.strict('<a href="/some/path%20test.pdf">test</a>'), '<a href="/some/path%20test.pdf">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://somehost.domain/path%20test.pdf">test</a>'), '<a href="https://somehost.domain/path%20test.pdf" rel="nofollow noreferrer noopener" target="_blank" title="https://somehost.domain/path test.pdf">test</a>')
assert_equal(HtmlSanitizer.strict('<a href="https://somehost.domain/zaihan%20test">test</a>'), '<a href="https://somehost.domain/zaihan%20test" rel="nofollow noreferrer noopener" target="_blank" title="https://somehost.domain/zaihan test">test</a>')
end end
end end