Render umlauts and other multi-byte characters correctly in HtmlSanitizer::strict

This commit is contained in:
Ryan Lue 2018-06-07 18:38:09 +08:00
parent c17a29cd0f
commit 5583369d8b
3 changed files with 45 additions and 19 deletions

View file

@ -373,34 +373,29 @@ cleanup html string:
string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ') string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ')
end end
def self.cleanup_target(string, keep_spaces: false) def self.cleanup_target(string, **options)
string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary) cleaned_string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary)
blank_regex = if keep_spaces cleaned_string = cleaned_string.delete(' ') unless options[:keep_spaces]
/\t|\n|\r/ cleaned_string = cleaned_string.strip
else .delete("\t\n\r\u0000")
/[[:space:]]|\t|\n|\r/ .gsub(%r{/\*.*?\*/}, '')
end .gsub(/<!--.*?-->/, '')
cleaned_string = string.strip.gsub(blank_regex, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000") .gsub(/\[.+?\]/, '')
sanitize_attachment_disposition(cleaned_string) sanitize_attachment_disposition(cleaned_string)
end end
def self.sanitize_attachment_disposition(url) def self.sanitize_attachment_disposition(url)
uri = URI(url) uri = URI(url)
return url if uri.host != Setting.get('fqdn')
if uri.host == Setting.get('fqdn') && uri.query.present?
params = CGI.parse(uri.query || '') params = CGI.parse(uri.query || '')
if params.key?('disposition') .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
params['disposition'] = 'attachment' uri.query = URI.encode_www_form(params)
end
uri.query = if params.blank?
nil
else
URI.encode_www_form(params)
end end
uri.to_s uri.to_s
rescue URI::InvalidURIError rescue URI::Error
url url
end end
@ -485,6 +480,7 @@ satinize style of img tags
end end
private_class_method :cleanup_target private_class_method :cleanup_target
private_class_method :sanitize_attachment_disposition
private_class_method :add_link private_class_method :add_link
private_class_method :url_same? private_class_method :url_same?
private_class_method :html_decode private_class_method :html_decode

View file

@ -2776,6 +2776,34 @@ Some Text',
], ],
}, },
}, },
{
data: <<~RAW_MAIL.chomp,
From: me@example.com
To: customer@example.com
Subject: some subject
Content-Type: text/html; charset=us-ascii; format=flowed
<html>
<body>
<a href="mailto:testäöü@example.com">test</a>
</body>
</html>
RAW_MAIL
success: true,
result: {
0 => {
priority: '2 normal',
title: 'some subject',
},
1 => {
content_type: 'text/html',
body: 'testäöü@example.com',
sender: 'Customer',
type: 'email',
internal: false,
},
},
},
] ]
assert_process(files) assert_process(files)
end end

View file

@ -134,5 +134,7 @@ test 123
attachment_url_evil_other = "#{attachment_url}?disposition=some_other" attachment_url_evil_other = "#{attachment_url}?disposition=some_other"
assert_equal(HtmlSanitizer.strict("<a href=\"#{attachment_url_evil_other}\">Evil link</a>"), "<a href=\"#{attachment_url_good}\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"#{attachment_url_good}\">Evil link</a>") assert_equal(HtmlSanitizer.strict("<a href=\"#{attachment_url_evil_other}\">Evil link</a>"), "<a href=\"#{attachment_url_good}\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"#{attachment_url_good}\">Evil link</a>")
assert_equal(HtmlSanitizer.strict('<a href="mailto:testäöü@example.com">test</a>'), 'testäöü@example.com')
end end
end end