Fixed endless loop in HtmlSanitizer.cleanup_structure in certain conditions.

This commit is contained in:
Martin Edenhofer 2017-03-21 09:02:15 +01:00
parent 7eebbc6f17
commit 8f3fe2b399
3 changed files with 52 additions and 6 deletions

View file

@ -335,7 +335,7 @@ class String
'(<br(|\/)>|<p>|<div>)[[:space:]]*<b>(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]*</b>', '(<br(|\/)>|<p>|<div>)[[:space:]]*<b>(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]*</b>',
'(<br>|<div>)[[:space:]]*<br>[[:space:]]*(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]+', '(<br>|<div>)[[:space:]]*<br>[[:space:]]*(Von|From|De|от|Z|Od|Ze|Fra|Van|Mistä|Από|Dal|から|Из|од|iz|Från|จาก|з|Từ):[[:space:]]+',
'<blockquote(|.+?)>[[:space:]]*<div>[[:space:]]*(On|Am|Le|El|Den|Dňa|W dniu|Il|Op|Dne|Dana)[[:space:]]', '<blockquote(|.+?)>[[:space:]]*<div>[[:space:]]*(On|Am|Le|El|Den|Dňa|W dniu|Il|Op|Dne|Dana)[[:space:]]',
'<div(|.+?)>[[:space:]]*<br>[[:space:]]*(On|Am|Le|El|Den|Dňa|W dniu|Il|Op|Dne|Dana)[[:space:]].+?<blockquote', '<div(|.+?)>[[:space:]]*<br>[[:space:]]*(On|Am|Le|El|Den|Dňa|W dniu|Il|Op|Dne|Dana)[[:space:]].{1,500}<blockquote',
] ]
map.each { |regexp| map.each { |regexp|
string.sub!(/#{regexp}/m) { |placeholder| string.sub!(/#{regexp}/m) { |placeholder|

View file

@ -237,15 +237,30 @@ cleanup html string:
remove_empty_nodes = %w(p div span small) remove_empty_nodes = %w(p div span small)
remove_empty_last_nodes = %w(b i u small) remove_empty_last_nodes = %w(b i u small)
scrubber = Loofah::Scrubber.new do |node| # remove last empty nodes and empty -not needed- parrent nodes
scrubber_structure = Loofah::Scrubber.new do |node|
if remove_empty_last_nodes.include?(node.name) && node.children.size.zero? if remove_empty_last_nodes.include?(node.name) && node.children.size.zero?
node.remove node.remove
Loofah::Scrubber::STOP Loofah::Scrubber::STOP
end end
if remove_empty_nodes.include?(node.name) && node.children.size == 1 && remove_empty_nodes.include?(node.children.first.name)
if remove_empty_nodes.include?(node.name) && node.children.size == 1 && remove_empty_nodes.include?(node.children.first.name) # && node.children.first.text.blank? node.replace node.children.to_s
node.replace cleanup_structure(node.children.to_s) Loofah::Scrubber::STOP
end end
end
string = Loofah.fragment(string).scrub!(scrubber_structure).to_s
new_string = ''
done = true
while done
new_string = Loofah.fragment(string).scrub!(scrubber_structure).to_s
if string == new_string
done = false
end
string = new_string
end
scrubber_cleanup = Loofah::Scrubber.new do |node|
# remove mailto links # remove mailto links
if node['href'] if node['href']
@ -298,7 +313,7 @@ cleanup html string:
end end
end end
end end
Loofah.fragment(string).scrub!(scrubber).to_s Loofah.fragment(string).scrub!(scrubber_cleanup).to_s
end end
def self.add_link(content, urls, node) def self.add_link(content, urls, node)

View file

@ -1088,6 +1088,37 @@ christian.schaefer@example.com'
</div> </div>
</blockquote>' </blockquote>'
assert_equal(result, html.html2html_strict) assert_equal(result, html.html2html_strict)
html = "<div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div>Dear Bob<span style=\"line-height: 23.8px;\">:</span><span style=\"color: rgb(255, 255, 255); line-height: 1.7;\">Mr/Mrs</span></div><div><br></div><div><span style=\"line-height: 1.7;\">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src=\"cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com\" orgwidth=\"1101\" orgheight=\"637\" data-image=\"1\" style=\"width: 722.7px; height: 418px; border: none;\"></div><div>Best regards!</div><div><br></div><div><b style=\"line-height: 1.7;\"><i><u><span lang=\"EL\" style=\"font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL\">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style=\"position:relative;zoom:1\"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: www.example.com</div></div><div style=\"clear:both\"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>"
result = '<div>
<div>Dear Bob:Mr/Mrs</div>
<div> </div>
<div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div>
<div> </div>
<div>Could you inform me the specification you need?</div>
<div> </div>
<div>May I sent you our products catalogues for your reference?</div>
<div> </div>
<div><img src="cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com" style="width: 722.7px; height: 418px;"></div>
<div>Best regards!</div>
<div> </div>
<div><b><i><u>Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</u></i></b></div>
<div>
<div>Bob Smith</div>
<div>
<div>Exp. &amp; Imp.</div>
<div>Town Example Electric Co., Ltd.</div>
<div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div>
<div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div>
<div>Web: <a href="http://www.example.com" rel="nofollow" target="_blank">http://www.example.com</a>
</div>
</div>
<div></div>
</div>
</div>'
assert_equal(result, html.html2html_strict)
end end
end end