Improved html sanitizer with a tags without href attributes.

This commit is contained in:
Martin Edenhofer 2017-03-14 10:39:51 +01:00
parent cd729a987f
commit 6d808ff3eb
2 changed files with 12 additions and 2 deletions

View file

@ -145,7 +145,10 @@ satinize html string based on whiltelist
# check if href is different to text
if external && node.name == 'a' && !url_same?(node['href'], node.text)
if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
if node['href'].blank?
node.replace strict(node.children.to_s)
Loofah::Scrubber::STOP
elsif node.children.empty? || node.children.first.class == Nokogiri::XML::Text
text = Nokogiri::XML::Text.new("#{node['href']} (", node.document)
node.add_previous_sibling(text)
node['href'] = cleanup_target(node.text)
@ -257,7 +260,10 @@ cleanup html string:
# check if href is different to text
if node.name == 'a' && !url_same?(node['href'], node.text)
if node.children.empty? || node.children.first.class == Nokogiri::XML::Text
if node['href'].blank?
node.replace cleanup_structure(node.children.to_s)
Loofah::Scrubber::STOP
elsif node.children.empty? || node.children.first.class == Nokogiri::XML::Text
text = Nokogiri::XML::Text.new("#{node.text} (", node.document)
node.add_previous_sibling(text)
node.content = cleanup_target(node['href'])

View file

@ -620,6 +620,10 @@ Men-----------------------'
result = "Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders (<a href=\"http://newsletters.cylex.de/\" rel=\"nofollow\" target=\"_blank\">http://newsletters.cylex.de/</a>) in<br> Ihrer Lesezeichen-Symbolleiste zu ergänzen.<div> </div>"
assert_equal(result, html.html2html_strict)
html = '<a name="_MailEndCompose"><span style="font-size:11.0pt;font-family:&quot;Calibri&quot;,&quot;sans-serif&quot;;color:#44546A">Hello Mr Smith,<o:p></o:p></span></a>'
result = 'Hello Mr Smith,'
assert_equal(result, html.html2html_strict)
html = "<div>
abc<p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p></div>"
result = "<div>abc<span class=\"js-signatureMarker\"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's </p>\n</div>"