From 5583369d8b59bfa7c3c5f617bdf900895993337a Mon Sep 17 00:00:00 2001 From: Ryan Lue Date: Thu, 7 Jun 2018 18:38:09 +0800 Subject: [PATCH] Render umlauts and other multi-byte characters correctly in HtmlSanitizer::strict --- lib/html_sanitizer.rb | 34 ++++++++++++++------------------ test/unit/email_process_test.rb | 28 ++++++++++++++++++++++++++ test/unit/html_sanitizer_test.rb | 2 ++ 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/lib/html_sanitizer.rb b/lib/html_sanitizer.rb index 00fbfd896..ecb76c383 100644 --- a/lib/html_sanitizer.rb +++ b/lib/html_sanitizer.rb @@ -373,34 +373,29 @@ cleanup html string: string.gsub('&', '&').gsub('<', '<').gsub('>', '>').gsub('"', '"').gsub(' ', ' ') end - def self.cleanup_target(string, keep_spaces: false) - string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary) - blank_regex = if keep_spaces - /\t|\n|\r/ - else - /[[:space:]]|\t|\n|\r/ - end - cleaned_string = string.strip.gsub(blank_regex, '').gsub(%r{/\*.*?\*/}, '').gsub(//, '').gsub(/\[.+?\]/, '').delete("\u0000") + def self.cleanup_target(string, **options) + cleaned_string = CGI.unescape(string).utf8_encode(fallback: :read_as_sanitized_binary) + cleaned_string = cleaned_string.delete(' ') unless options[:keep_spaces] + cleaned_string = cleaned_string.strip + .delete("\t\n\r\u0000") + .gsub(%r{/\*.*?\*/}, '') + .gsub(//, '') + .gsub(/\[.+?\]/, '') + sanitize_attachment_disposition(cleaned_string) end def self.sanitize_attachment_disposition(url) uri = URI(url) - return url if uri.host != Setting.get('fqdn') - params = CGI.parse(uri.query || '') - if params.key?('disposition') - params['disposition'] = 'attachment' + if uri.host == Setting.get('fqdn') && uri.query.present? + params = CGI.parse(uri.query || '') + .tap { |p| p.merge!('disposition' => 'attachment') if p.include?('disposition') } + uri.query = URI.encode_www_form(params) end - uri.query = if params.blank? - nil - else - URI.encode_www_form(params) - end - uri.to_s - rescue URI::InvalidURIError + rescue URI::Error url end @@ -485,6 +480,7 @@ satinize style of img tags end private_class_method :cleanup_target + private_class_method :sanitize_attachment_disposition private_class_method :add_link private_class_method :url_same? private_class_method :html_decode diff --git a/test/unit/email_process_test.rb b/test/unit/email_process_test.rb index f1f29b676..e4b934f20 100644 --- a/test/unit/email_process_test.rb +++ b/test/unit/email_process_test.rb @@ -2776,6 +2776,34 @@ Some Text', ], }, }, + { + data: <<~RAW_MAIL.chomp, + From: me@example.com + To: customer@example.com + Subject: some subject + Content-Type: text/html; charset=us-ascii; format=flowed + + + + test + + + RAW_MAIL + success: true, + result: { + 0 => { + priority: '2 normal', + title: 'some subject', + }, + 1 => { + content_type: 'text/html', + body: 'testäöü@example.com', + sender: 'Customer', + type: 'email', + internal: false, + }, + }, + }, ] assert_process(files) end diff --git a/test/unit/html_sanitizer_test.rb b/test/unit/html_sanitizer_test.rb index 9c95a0dad..03b040da2 100644 --- a/test/unit/html_sanitizer_test.rb +++ b/test/unit/html_sanitizer_test.rb @@ -134,5 +134,7 @@ test 123 attachment_url_evil_other = "#{attachment_url}?disposition=some_other" assert_equal(HtmlSanitizer.strict("Evil link"), "Evil link") + + assert_equal(HtmlSanitizer.strict('test'), 'testäöü@example.com') end end