From 84ff6252576976c42f89ffefe87c3c1e223aa835 Mon Sep 17 00:00:00 2001 From: Billy Zhou Date: Wed, 30 Jan 2019 07:12:12 +0100 Subject: [PATCH] Fixed #2456 - parsing utf8 encoded email subject --- app/models/channel/email_parser.rb | 38 +----------------------- spec/models/channel/email_parser_spec.rb | 10 +++++++ test/data/mail/mail006.yml | 4 +-- test/data/mail/mail018.yml | 2 +- test/data/mail/mail075.yml | 4 +-- test/data/mail/mail077.box | 22 ++++++++++++++ test/data/mail/mail077.yml | 2 ++ 7 files changed, 40 insertions(+), 42 deletions(-) create mode 100644 test/data/mail/mail077.box create mode 100644 test/data/mail/mail077.yml diff --git a/app/models/channel/email_parser.rb b/app/models/channel/email_parser.rb index a3b34f9a4..bd9669770 100644 --- a/app/models/channel/email_parser.rb +++ b/app/models/channel/email_parser.rb @@ -508,7 +508,7 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again h['x-any-recipient'] = h.values.select(&:present?).join(', ') h['message_id'] = imported_fields['message-id'] - h['subject'] = imported_fields['subject']&.sub(/^=\?us-ascii\?Q\?(.+)\s*\?=\s*$/, '\1') + h['subject'] = Mail::Encodings.value_decode(imported_fields['subject']) begin h['date'] = Time.zone.parse(mail.date.to_s) || imported_fields['date'] rescue @@ -794,42 +794,6 @@ module Mail end end - # workaround to parse subjects with 2 different encodings correctly (e. g. quoted-printable see test/fixtures/mail9.box) - module Encodings - def self.value_decode(str) - # Optimization: If there's no encoded-words in the string, just return it - return str if !str.index('=?') - - str = str.gsub(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's - - # Split on white-space boundaries with capture, so we capture the white-space as well - str.split(/([ \t])/).map do |text| - if text.index('=?') .nil? - text - else - # Join QP encoded-words that are adjacent to avoid decoding partial chars - # text.gsub!(/\?\=\=\?.+?\?[Qq]\?/m, '') if text =~ /\?==\?/ - - # Search for occurences of quoted strings or plain strings - text.scan(/( # Group around entire regex to include it in matches - \=\?[^?]+\?([QB])\?[^?]+?\?\= # Quoted String with subgroup for encoding method - | # or - .+?(?=\=\?|$) # Plain String - )/xmi).map do |matches| - string, method = *matches - if method == 'b' || method == 'B' # rubocop:disable Style/MultipleComparison - b_value_decode(string) - elsif method == 'q' || method == 'Q' # rubocop:disable Style/MultipleComparison - q_value_decode(string) - else - string - end - end - end - end.join('') - end - end - # issue#348 - IMAP mail fetching stops because of broken spam email (e. g. broken Content-Transfer-Encoding value see test/fixtures/mail43.box) # https://github.com/zammad/zammad/issues/348 class Body diff --git a/spec/models/channel/email_parser_spec.rb b/spec/models/channel/email_parser_spec.rb index c7c65efbd..2c95880ab 100644 --- a/spec/models/channel/email_parser_spec.rb +++ b/spec/models/channel/email_parser_spec.rb @@ -163,5 +163,15 @@ RSpec.describe Channel::EmailParser, type: :model do .to start_with( '' +from: '"Hans BÄKO Schönland" ' from_email: me@bogen.net -from_display_name: Hans BÄKOSchönland +from_display_name: Hans BÄKO Schönland subject: 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких' content_type: text/html body: '

this is a test



' +from: '"Hans BÄKO Schönland" ' from_email: me@bogen.net -from_display_name: Hans BÄKOSchönland +from_display_name: Hans BÄKO Schönland to: Namedyński (hans@example.com) subject: test email body: |- diff --git a/test/data/mail/mail077.box b/test/data/mail/mail077.box new file mode 100644 index 000000000..699a2edd9 --- /dev/null +++ b/test/data/mail/mail077.box @@ -0,0 +1,22 @@ +From martin@example.com Thu May 3 12:04:29 2012 +Return-Path: +X-Original-To: info@example.com +Delivered-To: box@samba.example.com +Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1]) + by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D + for ; Thu, 3 May 2012 12:04:28 +0100 (BST) +Subject: =?UTF-8?Q?Personal=C3=A4nderung?= +From: +Content-Type: text/plain; + charset=iso-8859-1 +Content-Transfer-Encoding: quoted-printable +Date: Fri, 4 May 2012 14:01:03 +0200 +Message-Id: +To: metest@znuny.com + +=E4=F6=FC=DF ad asd + +-Martin + +-- +Old programmers never die. They just branch to a new address. diff --git a/test/data/mail/mail077.yml b/test/data/mail/mail077.yml new file mode 100644 index 000000000..f6a4a1226 --- /dev/null +++ b/test/data/mail/mail077.yml @@ -0,0 +1,2 @@ +--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess +subject: Personaländerung