Fixed issue #951 - Auto detection of www urls fails.

This commit is contained in:
Martin Edenhofer 2017-04-13 14:47:12 +02:00
parent fb1d20c5e4
commit 5868fdd19f
3 changed files with 34 additions and 3 deletions

View file

@ -158,8 +158,15 @@ satinize html string based on whiltelist
if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a') if node && node.name != 'a' && node.parent && node.parent.name != 'a' && (!node.parent.parent || node.parent.parent.name != 'a')
if node.class == Nokogiri::XML::Text if node.class == Nokogiri::XML::Text
urls = [] urls = []
node.content.scan(%r{((http|https|ftp|tel)://.+?|(www..+?))([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match| node.content.scan(%r{((http|https|ftp|tel)://.+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)}mxi).each { |match|
urls.push match[0] if match[0]
urls.push match[0].to_s.strip
end
}
node.content.scan(/(^|:|;|\s)(www\..+?)([[:space:]]|\.[[:space:]]|,[[:space:]]|\.$|,$|\)|\(|$)/mxi).each { |match|
if match[1]
urls.push match[1].to_s.strip
end
} }
next if urls.empty? next if urls.empty?
add_link(node.content, urls, node) add_link(node.content, urls, node)

View file

@ -570,6 +570,30 @@ Men-----------------------'
result = 'some text <a href="http://www.example.com" rel="nofollow" target="_blank">http://www.example.com</a> some other text' result = 'some text <a href="http://www.example.com" rel="nofollow" target="_blank">http://www.example.com</a> some other text'
assert_equal(result, html.html2html_strict) assert_equal(result, html.html2html_strict)
html = 'some textwwwsome other text'
result = 'some textwwwsome other text'
assert_equal(result, html.html2html_strict)
html = 'some text wwwsome other text'
result = 'some text wwwsome other text'
assert_equal(result, html.html2html_strict)
html = 'some text www.some.dom other text'
result = 'some text <a href="http://www.some.dom" rel="nofollow" target="_blank">http://www.some.dom</a> other text'
assert_equal(result, html.html2html_strict)
html = 'www.some.dom other text'
result = '<a href="http://www.some.dom" rel="nofollow" target="_blank">http://www.some.dom</a> other text'
assert_equal(result, html.html2html_strict)
html = 'www.some.dom'
result = '<a href="http://www.some.dom" rel="nofollow" target="_blank">http://www.some.dom</a>'
assert_equal(result, html.html2html_strict)
html = 'web:www.some.dom other text'
result = 'web:<a href="http://www.some.dom" rel="nofollow" target="_blank">http://www.some.dom</a> other text'
assert_equal(result, html.html2html_strict)
html = '<a href="http://example.com">http://what-different.example.com</a>' html = '<a href="http://example.com">http://what-different.example.com</a>'
#result = 'http://example.com (<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a>)' #result = 'http://example.com (<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a>)'
result = '<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a> (<a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>)' result = '<a href="http://what-different.example.com" rel="nofollow" target="_blank">http://what-different.example.com</a> (<a href="http://example.com" rel="nofollow" target="_blank">http://example.com</a>)'

View file

@ -552,7 +552,7 @@ Newsletter abbestellen (<a href="http://newsletters.cylex.de/ref/www.cylex.de/si
}, },
{ {
data: IO.binread('test/fixtures/mail19.box'), data: IO.binread('test/fixtures/mail19.box'),
body_md5: '4355c52fdfd2adea0cda6814adb78ae3', body_md5: '40bf3f7f830c6ba7947deb9a2acfc5bc',
params: { params: {
from: '"我" <>', from: '"我" <>',
from_email: '"我" <>', from_email: '"我" <>',