Fixed html sanitizer with non utf8 content in href attributes.
This commit is contained in:
parent
1a1e1a2d7b
commit
0fd5c971fc
4 changed files with 3873 additions and 16 deletions
|
@ -29,12 +29,7 @@ satinize html string based on whiltelist
|
||||||
|
|
||||||
# remove tag, insert quoted content
|
# remove tag, insert quoted content
|
||||||
if tags_quote_content.include?(node.name)
|
if tags_quote_content.include?(node.name)
|
||||||
string = node.content
|
string = html_decode(node.content)
|
||||||
string.gsub!('&', '&')
|
|
||||||
string.gsub!('<', '<')
|
|
||||||
string.gsub!('>', '>')
|
|
||||||
string.gsub!('"', '"')
|
|
||||||
string.gsub!(' ', ' ')
|
|
||||||
text = Nokogiri::XML::Text.new(string, node.document)
|
text = Nokogiri::XML::Text.new(string, node.document)
|
||||||
node.add_next_sibling(text)
|
node.add_next_sibling(text)
|
||||||
node.remove
|
node.remove
|
||||||
|
@ -155,8 +150,7 @@ satinize html string based on whiltelist
|
||||||
text = Nokogiri::XML::Text.new(')', node.document)
|
text = Nokogiri::XML::Text.new(')', node.document)
|
||||||
node.add_next_sibling(text)
|
node.add_next_sibling(text)
|
||||||
else
|
else
|
||||||
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
|
node.content = cleanup_target(node['href'])
|
||||||
node.content = text
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -286,8 +280,7 @@ cleanup html string:
|
||||||
text = Nokogiri::XML::Text.new(')', node.document)
|
text = Nokogiri::XML::Text.new(')', node.document)
|
||||||
node.add_next_sibling(text)
|
node.add_next_sibling(text)
|
||||||
else
|
else
|
||||||
text = Nokogiri::XML::Text.new(cleanup_target(node['href']), node.document)
|
node.content = cleanup_target(node['href'])
|
||||||
node.content = text
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -352,13 +345,20 @@ cleanup html string:
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.html_decode(string)
|
||||||
|
string.gsub('&', '&').gsub('<', '<').gsub('>', '>').gsub('"', '"').gsub(' ', ' ')
|
||||||
|
end
|
||||||
|
|
||||||
def self.cleanup_target(string)
|
def self.cleanup_target(string)
|
||||||
URI.unescape(string).gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
|
string = URI.unescape(string).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?')
|
||||||
|
string.gsub(/[[:space:]]|\t|\n|\r/, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '')
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.url_same?(url_new, url_old)
|
def self.url_same?(url_new, url_old)
|
||||||
url_new = URI.unescape(url_new.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
url_new = URI.unescape(url_new.to_s).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?').downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||||
url_old = URI.unescape(url_old.to_s).downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
url_old = URI.unescape(url_old.to_s).encode('utf-8', 'binary', invalid: :replace, undef: :replace, replace: '?').downcase.gsub(%r{/$}, '').gsub(/[[:space:]]|\t|\n|\r/, '').strip
|
||||||
|
url_new = html_decode(url_new).sub('/?', '?')
|
||||||
|
url_old = html_decode(url_old).sub('/?', '?')
|
||||||
return true if url_new == url_old
|
return true if url_new == url_old
|
||||||
return true if "http://#{url_new}" == url_old
|
return true if "http://#{url_new}" == url_old
|
||||||
return true if "http://#{url_old}" == url_new
|
return true if "http://#{url_old}" == url_new
|
||||||
|
@ -437,5 +437,6 @@ satinize style of img tags
|
||||||
private_class_method :cleanup_target
|
private_class_method :cleanup_target
|
||||||
private_class_method :add_link
|
private_class_method :add_link
|
||||||
private_class_method :url_same?
|
private_class_method :url_same?
|
||||||
|
private_class_method :html_decode
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
3812
test/fixtures/mail47.box
vendored
Normal file
3812
test/fixtures/mail47.box
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -595,6 +595,26 @@ Men-----------------------'
|
||||||
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
|
result = '<a href="http://example.com/" rel="nofollow" target="_blank">http://example.com</a>'
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n>http://example.com?abc=123&123=abc</a>"
|
||||||
|
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com?abc=123&123=abc</a>'
|
||||||
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n>http://example.com?abc=123&123=abc</a>"
|
||||||
|
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com?abc=123&123=abc</a>'
|
||||||
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
html = "<a href=\"http://example.com/?abc=123&123=abc\" class=\"abc\n\"\n><img src=\"cid:123\"></a>"
|
||||||
|
result = '<a href="http://example.com/?abc=123&123=abc" rel="nofollow" target="_blank">http://example.com/?abc=123&123=abc</a>'
|
||||||
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
html = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap">https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap</a></p>'
|
||||||
|
result = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap" rel="nofollow" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&#ldap</a></p>'
|
||||||
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
html = '<p>https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</p>'
|
||||||
|
result = '<p><a href="https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap" rel="nofollow" target="_blank">https://wiki.lab.example.com/doku.php?id=xxxx:start&a=1;#ldap</a></p>'
|
||||||
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
html = "<div>http://example.com</div>"
|
html = "<div>http://example.com</div>"
|
||||||
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>\n</div>"
|
result = "<div><a href=\"http://example.com\" rel=\"nofollow\" target=\"_blank\">http://example.com</a>\n</div>"
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
@ -616,7 +636,6 @@ Men-----------------------'
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
|
html = "<a href=\"http://facebook.de/examplesrbog\"><span lang=\"EN-US\" style='color:blue'>http://facebook.de/examplesrbog</span></a>"
|
||||||
#result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\"><span lang=\"EN-US\">http://facebook.de/examplesrbog</span></a>"
|
|
||||||
result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\">http://facebook.de/examplesrbog</a>"
|
result = "<a href=\"http://facebook.de/examplesrbog\" rel=\"nofollow\" target=\"_blank\">http://facebook.de/examplesrbog</a>"
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
|
@ -641,8 +660,8 @@ abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donne
|
||||||
result = "<div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
|
result = "<div>abc<span class=\"js-signatureMarker\"></span><p> <b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
html = "<div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Mit freundlichem Gruß<span class=\"Apple-converted-space\"> </span><br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.<o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"mailto:john.smith@example.com\" style=color: blue; text-decoration: underline; \">john.smith@example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=http://www.example.com\" style=\"color: blue; text-decoration: underline; \">www.example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div>"
|
html = "<div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Mit freundlichem Gruß<span class=\"Apple-converted-space\"> </span><br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.<o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"mailto:john.smith@example.com\" style=color: blue; text-decoration: underline; \">john.smith@example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div><div style=\"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, sans-serif; \"><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><a href=\"http://www.example.com\" style=\"color: blue; text-decoration: underline; \">www.example.com</a></span><span style=\"font-size: 10pt; font-family: Arial, sans-serif; \"><o:p></o:p></span></div>"
|
||||||
result = "<div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.</div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>\n</div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</div><div>john.smith@example.com</div><div><a href=\"http://www.example.com\" rel=\"nofollow\" target=\"_blank\">http://www.example.com</a> (<a href=\"http://www.example.com%22\" rel=\"nofollow\" target=\"_blank\">http://www.example.com\"</a>)</div>"
|
result = "<div>Mit freundlichem Gruß<br><br>John Smith<br>Service und Support<br><br>Example Service AG & Co.</div><div>Management OHG<br>Someware-Str. 4<br>xxxxx Someware<br><br>\n</div><div>Tel.: +49 001 7601 462<br>Fax: +49 001 7601 472</div><div>john.smith@example.com</div><div><a href=\"http://www.example.com\" rel=\"nofollow\" target=\"_blank\">www.example.com</a></div>"
|
||||||
assert_equal(result, html.html2html_strict)
|
assert_equal(result, html.html2html_strict)
|
||||||
|
|
||||||
html = '<b >test</b>'
|
html = '<b >test</b>'
|
||||||
|
|
|
@ -2266,6 +2266,31 @@ Some Text',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
data: IO.binread('test/fixtures/mail47.box'),
|
||||||
|
success: true,
|
||||||
|
result: {
|
||||||
|
0 => {
|
||||||
|
priority: '2 normal',
|
||||||
|
title: '-90%! Nur 3,90 statt 39,90 EUR: In-Ear-Stereo-Headset mit Bluetooth 4.1 und Magnetverschluss für Bob Max Example',
|
||||||
|
},
|
||||||
|
1 => {
|
||||||
|
from: 'EXAMPLE HotPriceMail <anja.weber@example.de>',
|
||||||
|
sender: 'Customer',
|
||||||
|
type: 'email',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
verify: {
|
||||||
|
users: [
|
||||||
|
{
|
||||||
|
firstname: 'EXAMPLE',
|
||||||
|
lastname: 'HotPriceMail',
|
||||||
|
fullname: 'EXAMPLE HotPriceMail',
|
||||||
|
email: 'anja.weber@example.de',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
assert_process(files)
|
assert_process(files)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue