Stop wrapping <a> tags around URLs without a scheme/protocol (fixes #2139)

This commit is contained in:
Ryan Lue 2018-08-01 17:25:38 +08:00
parent 59b8ccde1f
commit e341d07d6b
3 changed files with 12 additions and 48 deletions

View file

@ -1,4 +1,5 @@
class HtmlSanitizer
LINKABLE_URL_SCHEMES = URI.scheme_list.keys.map(&:downcase) - ['mailto'] + ['tel']
=begin
@ -25,23 +26,14 @@ satinize html string based on whiltelist
scrubber_link = Loofah::Scrubber.new do |node|
# check if text has urls which need to be clickable
if node&.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
if node.class == Nokogiri::XML::Text
urls = []
node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each do |match|
if match[0]
urls.push match[0].to_s.strip
end
end
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each do |match|
if match[1]
urls.push match[1].to_s.strip
end
end
next if urls.blank?
add_link(node.content, urls, node)
end
# wrap plain-text URLs in <a> tags
if node.is_a?(Nokogiri::XML::Text) && node.ancestors.map(&:name).exclude?('a')
urls = URI.extract(node.content, LINKABLE_URL_SCHEMES)
.map { |u| u.sub(/[,.]$/, '') } # URI::extract captures trailing dots/commas
.reject { |u| u.match?(/^[^:]+:$/) } # URI::extract will match, e.g., 'tel:'
next if urls.blank?
add_link(node.content, urls, node)
end
# prepare links

View file

@ -6,7 +6,7 @@ subject: Ticket Templates
content_type: text/html
body: |-
<div>
<p>Hallo Martin,</p><p>&nbsp;</p><p>ich möchte mich gern für den Beta-Test für die Ticket Templates unter XXXX 2.4 anmelden.</p><p>&nbsp;</p><div> <p>&nbsp;</p><p>Mit freundlichen Grüßen</p><p>John Günther</p><p>&nbsp;</p><p><a href="http://www.GeoFachDatenServer.de" rel="nofollow noreferrer noopener" target="_blank" title="http://www.GeoFachDatenServer.de">example.com</a> profitieren Sie vom umfangreichen Daten-Netzwerk </p><p>&nbsp;</p><p>_ __ ___ ____________________________ ___ __ _</p><p>&nbsp;</p><p>Example GmbH</p><p>Some What</p><p>&nbsp;</p><p>Sitz: Someware-Straße 9, XXXXX Someware</p><p>&nbsp;</p><p>M: +49 (0) XXX XX XX 70</p><p>T: +49 (0) XXX XX XX 22</p><p>F: +49 (0) XXX XX XX 11</p><p>W: <a href="http://www.example.de" rel="nofollow noreferrer noopener" target="_blank">http://www.example.de</a></p><p>&nbsp;</p><p>Geschäftsführer: John Smith</p><p>HRB XXXXXX AG Someware</p><p>St.-Nr.: 112/107/05858</p><p>&nbsp;</p><p>ISO 9001:2008 Zertifiziert -Qualitätsstandard mit Zukunft</p><p>_ __ ___ ____________________________ ___ __ _</p><p>&nbsp;</p><p>Diese Information ist ausschließlich für den Adressaten bestimmt und kann vertrauliche oder gesetzlich geschützte Informationen enthalten. Wenn Sie nicht der bestimmungsgemäße Adressat sind, unterrichten Sie bitte den Absender und vernichten Sie diese Mail. Anderen als dem bestimmungsgemäßen Adressaten ist es untersagt, diese E-Mail zu lesen, zu speichern, weiterzuleiten oder ihren Inhalt auf welche Weise auch immer zu verwenden.</p></div><p>&nbsp;</p><div>
<p>Hallo Martin,</p><p>&nbsp;</p><p>ich möchte mich gern für den Beta-Test für die Ticket Templates unter XXXX 2.4 anmelden.</p><p>&nbsp;</p><div> <p>&nbsp;</p><p>Mit freundlichen Grüßen</p><p>John Günther</p><p>&nbsp;</p><p><a href="http://www.GeoFachDatenServer.de" rel="nofollow noreferrer noopener" target="_blank" title="http://www.GeoFachDatenServer.de">example.com</a> profitieren Sie vom umfangreichen Daten-Netzwerk </p><p>&nbsp;</p><p>_ __ ___ ____________________________ ___ __ _</p><p>&nbsp;</p><p>Example GmbH</p><p>Some What</p><p>&nbsp;</p><p>Sitz: Someware-Straße 9, XXXXX Someware</p><p>&nbsp;</p><p>M: +49 (0) XXX XX XX 70</p><p>T: +49 (0) XXX XX XX 22</p><p>F: +49 (0) XXX XX XX 11</p><p>W: www.example.de</p><p>&nbsp;</p><p>Geschäftsführer: John Smith</p><p>HRB XXXXXX AG Someware</p><p>St.-Nr.: 112/107/05858</p><p>&nbsp;</p><p>ISO 9001:2008 Zertifiziert -Qualitätsstandard mit Zukunft</p><p>_ __ ___ ____________________________ ___ __ _</p><p>&nbsp;</p><p>Diese Information ist ausschließlich für den Adressaten bestimmt und kann vertrauliche oder gesetzlich geschützte Informationen enthalten. Wenn Sie nicht der bestimmungsgemäße Adressat sind, unterrichten Sie bitte den Absender und vernichten Sie diese Mail. Anderen als dem bestimmungsgemäßen Adressaten ist es untersagt, diese E-Mail zu lesen, zu speichern, weiterzuleiten oder ihren Inhalt auf welche Weise auch immer zu verwenden.</p></div><p>&nbsp;</p><div>
<span class="js-signatureMarker"></span><p><b>Von:</b> Fritz Bauer [mailto:me@example.com] <br><b>Gesendet:</b> Donnerstag, 3. Mai 2012 11:51<br><b>An:</b> John Smith<br><b>Cc:</b> Smith, John Marian; johnel.fratczak@example.com; ole.brei@example.com; Günther John | Example GmbH; bkopon@example.com; john.heisterhagen@team.example.com; sven.rocked@example.com; michael.house@example.com; tgutzeit@example.com<br><b>Betreff:</b> Re: OTRS::XXX Erweiterung - Anhänge an CI's</p></div><p>&nbsp;</p><p>Hallo,</p><div> <p>&nbsp;</p></div><div>
<p>ich versuche an den Punkten anzuknüpfen.</p></div><div> <p>&nbsp;</p></div><div>
<p><b>a) LDAP Muster Konfigdatei</b></p></div><div> <p>&nbsp;</p></div><div>

View file

@ -616,31 +616,7 @@ Men-----------------------'
assert_equal(result, html.html2html_strict)
html = 'some text www.example.com some other text'
result = 'some text <a href="http://www.example.com" rel="nofollow noreferrer noopener" target="_blank">http://www.example.com</a> some other text'
assert_equal(result, html.html2html_strict)
html = 'some textwwwsome other text'
result = 'some textwwwsome other text'
assert_equal(result, html.html2html_strict)
html = 'some text wwwsome other text'
result = 'some text wwwsome other text'
assert_equal(result, html.html2html_strict)
html = 'some text www.some.dom other text'
result = 'some text <a href="http://www.some.dom" rel="nofollow noreferrer noopener" target="_blank">http://www.some.dom</a> other text'
assert_equal(result, html.html2html_strict)
html = 'www.some.dom other text'
result = '<a href="http://www.some.dom" rel="nofollow noreferrer noopener" target="_blank">http://www.some.dom</a> other text'
assert_equal(result, html.html2html_strict)
html = 'www.some.dom'
result = '<a href="http://www.some.dom" rel="nofollow noreferrer noopener" target="_blank">http://www.some.dom</a>'
assert_equal(result, html.html2html_strict)
html = 'web:www.some.dom other text'
result = 'web:<a href="http://www.some.dom" rel="nofollow noreferrer noopener" target="_blank">http://www.some.dom</a> other text'
result = 'some text www.example.com some other text'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://what-different.example.com</a>'
@ -1003,11 +979,7 @@ html.html2html_strict
assert_equal(result, html.html2html_strict)
html = "<div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div style=\"line-height:1.7;color:#000000;font-size:14px;font-family:Arial\"><div>Dear Bob<span style=\"line-height: 23.8px;\">:</span><span style=\"color: rgb(255, 255, 255); line-height: 1.7;\">Mr/Mrs</span></div><div><br></div><div><span style=\"line-height: 1.7;\">We&nbsp;are&nbsp;one&nbsp;of&nbsp;the&nbsp;leading&nbsp;manufacturer&nbsp;and&nbsp;supplier&nbsp;of&nbsp;</span>conduits and cars since 3000.</div><div><br></div><div>Could you inform me the specification you need?</div><div><br></div><div>May I sent you our products catalogues for your reference?</div><div><br></div><div><img src=\"cid:5cb2783c$1$15ae9b384c8$Coremail$zhanabcdzhao$example.com\" orgwidth=\"1101\" orgheight=\"637\" data-image=\"1\" style=\"width: 722.7px; height: 418px; border: none;\"></div><div>Best regards!</div><div><br></div><div><b style=\"line-height: 1.7;\"><i><u><span lang=\"EL\" style=\"font-size:11.0pt;font-family:&quot;Calibri&quot;,sans-serif;color:#17365D;\nmso-ansi-language:EL\">Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.</span></u></i></b></div><div style=\"position:relative;zoom:1\"><div>Bob Smith</div><div><div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) &nbsp;Fax: 0000-11-12345678&nbsp;</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: www.example.com</div></div><div style=\"clear:both\"></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div>"
result = "<div>
<div>Dear Bob:Mr/Mrs</div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src=\"cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com\" style=\"width: 722.7px; height: 418px;\"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u>Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</u></i></b></div><div>
<div>Bob Smith</div><div>
<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: <a href=\"http://www.example.com\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">http://www.example.com</a>
</div></div></div></div>"
result = "<div>\n<div>Dear Bob:Mr/Mrs</div><div>&nbsp;</div><div>We are one of the leading manufacturer and supplier of conduits and cars since 3000.</div><div>&nbsp;</div><div>Could you inform me the specification you need?</div><div>&nbsp;</div><div>May I sent you our products catalogues for your reference?</div><div>&nbsp;</div><div><img src=\"cid:5cb2783c%241%2415ae9b384c8%24Coremail%24zhanabcdzhao%24example.com\" style=\"width: 722.7px; height: 418px;\"></div><div>Best regards!</div><div>&nbsp;</div><div><b><i><u>Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.</u></i></b></div><div>\n<div>Bob Smith</div><div>\n<div>Exp. &amp; Imp.</div><div>Town Example Electric Co., Ltd.</div><div>Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678</div><div>Room1234, NO. 638, Smith Road, Town, 200000, Somewhere</div><div>Web: www.example.com</div></div></div></div>"
assert_equal(result, html.html2html_strict)
html = '<li><a style="font-size:15px; font-family:Arial;color:#0f7246" class="text_link" href="http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh"><span style="color: rgb(0, 0, 0);">Luxemburg</span></a></li>'