Fixed html sanitizer with non utf8 content in href attributes.
This commit is contained in:
parent
1a1e1a2d7b
commit
0fd5c971fc
4 changed files with 3873 additions and 16 deletions
|
@ -29,12 +29,7 @@ satinize html string based on whiltelist
|
|||
|
||||
# remove tag, insert quoted content
|
||||
if tags_quote_content.include?(node.name)
|
||||
string = node.content
|
||||
string.gsub!('&', '&')
|
||||
string.gsub!('<', '<')
|
||||
string.gsub!('>', '>')
|
||||
string.gsub!('"', '"')
|
||||
string.gsub!(' ', ' ')
|
||||
string = html_decode(node.content)
|
||||
text = Nokogiri::XML::Text.new(string, node.document)
|
||||
node.add_next_sibling(text)
|
||||
node.remove
|
||||
|
@ -155,8 +150,7 @@ satinize html string based on whiltelist
|
|||
text = Nokogiri::XML::Text.new(')', node.document)
|
||||
node.add_next_sibling(text)
|
||||
else
|
||||
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
|
||||
node.content = text
|
||||
node.content = cleanup_target(node['href'])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -286,8 +280,7 @@ cleanup html string:
|
|||
text = Nokogiri::XML::Text.new(')', node.document)
|
||||
node.add_next_sibling(text)
|
||||
else
|
||||
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
|
||||
node.content = text
|
||||
node.content = cleanup_target(node['href'])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -352,13 +345,20 @@ cleanup html string:
|
|||
end
|
||||
end
|
||||
|
||||
def self.html_decode(string)
|
||||
string.gsub('&', '&').gsub('<', '<').gsub('>', '>').gsub('"', '"').gsub(' ', ' ')
|
||||
end
|
||||
|
||||
def self.cleanup_target(string)
|
||||
URI.unescape(string).gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
|
||||
string = URI.unescape(string).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
|
||||
string.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
|
||||
end
|
||||
|
||||
def self.url_same?(url_new, url_old)
|
||||
url_new = URI.unescape(url_new.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||
url_old = URI.unescape(url_old.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||
url_new = URI.unescape(url_new.to_s).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?').downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||
url_old = URI.unescape(url_old.to_s).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?').downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||
url_new = html_decode(url_new).sub('/?', '?')
|
||||
url_old = html_decode(url_old).sub('/?', '?')
|
||||
return true if url_new == url_old
|
||||
return true if "http://#{url_new}" == url_old
|
||||
return true if "http://#{url_old}" == url_new
|
||||
|
@ -437,5 +437,6 @@ satinize style of img tags
|
|||
private_class_method :cleanup_target
|
||||
private_class_method :add_link
|
||||
private_class_method :url_same?
|
||||
private_class_method :html_decode
|
||||
|
||||
end
|
||||
|
|
3812
test/fixtures/mail47.box
vendored
Normal file
3812
test/fixtures/mail47.box
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -595,6 +595,26 @@ Men-----------------------'
|
|||
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n>http://example.com?abc=123&123=abc</a>"
|
||||
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com?abc=123&123=abc</a>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n>http://example.com?abc=123&123=abc</a>"
|
||||
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com?abc=123&123=abc</a>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n><img src=\"cid:123\"></a>"
|
||||
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com/?abc=123&123=abc</a>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap</a></p>'
|
||||
result = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap" rel="nofollow" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap</a></p>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>'
|
||||
result = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap" rel="nofollow" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</a></p>'
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div>http://example.com</div>"
|
||||
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>\n</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
@ -616,7 +636,6 @@ Men-----------------------'
|
|||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
|
||||
#result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\"><span lang=\"EN-US\">http://facebook.de/examplesrbog</span></a>"
|
||||
result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\">http://facebook.de/examplesrbog</a>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
|
@ -641,8 +660,8 @@ abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donne
|
|||
result = "<div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = "<div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Mit freundlichem Gruß<span class=\"Apple-converted-space\"> </span><br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.<o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"mailto:john.smith@example.com\" style=color: blue; text-decoration: underline; \">john.smith@example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=http://www.example.com\" style=\"color: blue; text-decoration: underline; \">www.example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div>"
|
||||
result = "<div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.</div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>\n</div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</div><div>john.smith@example.com</div><div><a href=\"http://www.example.com\" rel=\"nofollow\" target=\"_blank\">http://www.example.com</a> (<a href=\"http://www.example.com%22\" rel=\"nofollow\" target=\"_blank\">http://www.example.com\"</a>)</div>"
|
||||
html = "<div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Mit freundlichem Gruß<span class=\"Apple-converted-space\"> </span><br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.<o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"mailto:john.smith@example.com\" style=color: blue; text-decoration: underline; \">john.smith@example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"http://www.example.com\" style=\"color: blue; text-decoration: underline; \">www.example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div>"
|
||||
result = "<div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.</div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>\n</div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</div><div>john.smith@example.com</div><div><a href=\"http://www.example.com\" rel=\"nofollow\" target=\"_blank\">www.example.com</a></div>"
|
||||
assert_equal(result, html.html2html_strict)
|
||||
|
||||
html = '<b >test</b>'
|
||||
|
|
|
@ -2266,6 +2266,31 @@ Some Text',
|
|||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
data: IO.binread('test/fixtures/mail47.box'),
|
||||
success: true,
|
||||
result: {
|
||||
0 => {
|
||||
priority: '2 normal',
|
||||
title: '-90%! Nur 3,90 statt 39,90 EUR: In-Ear-Stereo-Headset mit Bluetooth 4.1 und Magnetverschluss für Bob Max Example',
|
||||
},
|
||||
1 => {
|
||||
from: 'EXAMPLE HotPriceMail <anja.weber@example.de>',
|
||||
sender: 'Customer',
|
||||
type: 'email',
|
||||
},
|
||||
},
|
||||
verify: {
|
||||
users: [
|
||||
{
|
||||
firstname: 'EXAMPLE',
|
||||
lastname: 'HotPriceMail',
|
||||
fullname: 'EXAMPLE HotPriceMail',
|
||||
email: 'anja.weber@example.de',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
assert_process(files)
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue