Render umlauts and other multi-byte characters correctly in HtmlSanitizer::strict

This commit is contained in:
Ryan Lue 2018-06-07 18:38:09 +08:00
parent c17a29cd0f
commit 5583369d8b
3 changed files with 45 additions and 19 deletions

View file

@ -373,34 +373,29 @@ cleanup html string:
string.gsub('&amp;', '&').gsub('&lt;', '<').gsub('&gt;', '>').gsub('&quot;', '"').gsub('&nbsp;', ' ')
end
def self.cleanup_target(string, keep_spaces: false)
string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary)
blank_regex = if keep_spaces
/\t|\n|\r/
else
/[[:space:]]|\t|\n|\r/
end
cleaned_string = string.strip.gsub(blank_regex, '').gsub(%r{/\*.*?\*/}, '').gsub(/<!--.*?-->/, '').gsub(/\[.+?\]/, '').delete("\u0000")
def self.cleanup_target(string, **options)
cleaned_string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary)
cleaned_string = cleaned_string.delete(' ') unless options[:keep_spaces]
cleaned_string = cleaned_string.strip
.delete("\t\n\r\u0000")
.gsub(%r{/\*.*?\*/}, '')
.gsub(/<!--.*?-->/, '')
.gsub(/\[.+?\]/, '')
sanitize_attachment_disposition(cleaned_string)
end
def self.sanitize_attachment_disposition(url)
uri = URI(url)
return url if uri.host != Setting.get('fqdn')
params = CGI.parse(uri.query || '')
if params.key?('disposition')
params['disposition'] = 'attachment'
if uri.host == Setting.get('fqdn') && uri.query.present?
params = CGI.parse(uri.query || '')
.tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') }
uri.query = URI.encode_www_form(params)
end
uri.query = if params.blank?
nil
else
URI.encode_www_form(params)
end
uri.to_s
rescue URI::InvalidURIError
rescue URI::Error
url
end
@ -485,6 +480,7 @@ satinize style of img tags
end
private_class_method :cleanup_target
private_class_method :sanitize_attachment_disposition
private_class_method :add_link
private_class_method :url_same?
private_class_method :html_decode

View file

@ -2776,6 +2776,34 @@ Some Text',
],
},
},
{
data: <<~RAW_MAIL.chomp,
From: me@example.com
To: customer@example.com
Subject: some subject
Content-Type: text/html; charset=us-ascii; format=flowed
<html>
<body>
<a href="mailto:testäöü@example.com">test</a>
</body>
</html>
RAW_MAIL
success: true,
result: {
0 => {
priority: '2 normal',
title: 'some subject',
},
1 => {
content_type: 'text/html',
body: 'testäöü@example.com',
sender: 'Customer',
type: 'email',
internal: false,
},
},
},
]
assert_process(files)
end

View file

@ -134,5 +134,7 @@ test 123
attachment_url_evil_other = "#{attachment_url}?disposition=some_other"
assert_equal(HtmlSanitizer.strict("<a href=\"#{attachment_url_evil_other}\">Evil link</a>"), "<a href=\"#{attachment_url_good}\" rel=\"nofollow noreferrer noopener\" target=\"_blank\" title=\"#{attachment_url_good}\">Evil link</a>")
assert_equal(HtmlSanitizer.strict('<a href="mailto:testäöü@example.com">test</a>'), 'testäöü@example.com')
end
end