Fixed #2456 - parsing utf8 encoded email subject

This commit is contained in:
Billy Zhou 2019-01-30 07:12:12 +01:00
parent 4a67fcd10b
commit 84ff625257
7 changed files with 40 additions and 42 deletions

View file

@ -508,7 +508,7 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
h['x-any-recipient'] = h.values.select(&:present?).join(', ')
h['message_id'] = imported_fields['message-id']
h['subject'] = imported_fields['subject']&.sub(/^=\?us-ascii\?Q\?(.+)\s*\?=\s*$/, '\1')
h['subject'] = Mail::Encodings.value_decode(imported_fields['subject'])
begin
h['date'] = Time.zone.parse(mail.date.to_s) || imported_fields['date']
rescue
@ -794,42 +794,6 @@ module Mail
end
end
# workaround to parse subjects with 2 different encodings correctly (e. g. quoted-printable see test/fixtures/mail9.box)
module Encodings
def self.value_decode(str)
# Optimization: If there's no encoded-words in the string, just return it
return str if !str.index('=?')
str = str.gsub(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
# Split on white-space boundaries with capture, so we capture the white-space as well
str.split(/([ \t])/).map do |text|
if text.index('=?') .nil?
text
else
# Join QP encoded-words that are adjacent to avoid decoding partial chars
# text.gsub!(/\?\=\=\?.+?\?[Qq]\?/m, '') if text =~ /\?==\?/
# Search for occurences of quoted strings or plain strings
text.scan(/( # Group around entire regex to include it in matches
\=\?[^?]+\?([QB])\?[^?]+?\?\= # Quoted String with subgroup for encoding method
| # or
.+?(?=\=\?|$) # Plain String
)/xmi).map do |matches|
string, method = *matches
if method == 'b' || method == 'B' # rubocop:disable Style/MultipleComparison
b_value_decode(string)
elsif method == 'q' || method == 'Q' # rubocop:disable Style/MultipleComparison
q_value_decode(string)
else
string
end
end
end
end.join('')
end
end
# issue#348 - IMAP mail fetching stops because of broken spam email (e. g. broken Content-Transfer-Encoding value see test/fixtures/mail43.box)
# https://github.com/zammad/zammad/issues/348
class Body

View file

@ -163,5 +163,15 @@ RSpec.describe Channel::EmailParser, type: :model do
.to start_with( '<a href="https://zammad.com/"' )
end
end
context 'Mail::Encodings.value_decode' do
it 'decode us-ascii encoded strings' do
expect( Mail::Encodings.value_decode('=?us-ascii?Q?Test?=') ).to eql( 'Test' )
end
it 'decode utf-8 encoded strings' do
expect( Mail::Encodings.value_decode('=?UTF-8?Q?Personal=C3=A4nderung?=') ).to eql( 'Personaländerung' )
end
end
end
end

View file

@ -1,7 +1,7 @@
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
from: '"Hans BÄKOSchönland" <me@bogen.net>'
from: '"Hans BÄKO Schönland" <me@bogen.net>'
from_email: me@bogen.net
from_display_name: Hans BÄKOSchönland
from_display_name: Hans BÄKO Schönland
subject: 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких'
content_type: text/html
body: '<p>this is a test</p><br><hr> <a href="http://localhost/8HMZENUS/2737??PS="

View file

@ -3,7 +3,7 @@ from: postmaster@example.com
from_email: postmaster@example.com
from_display_name: ''
to: sales@znuny.org
subject: Benachrichtung zum =?unicode-1-1-utf-7?Q?+ANw-bermittlungsstatus (Fehlgeschlagen)?=
subject: Benachrichtung zum +ANw-bermittlungsstatus (Fehlgeschlagen)
body: |+
Dies ist eine automatisch erstellte Benachrichtigung +APw-ber den Zustellstatus.

View file

@ -1,7 +1,7 @@
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
from: '"Hans BÄKOSchönland" <me@bogen.net>'
from: '"Hans BÄKO Schönland" <me@bogen.net>'
from_email: me@bogen.net
from_display_name: Hans BÄKOSchönland
from_display_name: Hans BÄKO Schönland
to: Namedyński (hans@example.com)
subject: test email
body: |-

View file

@ -0,0 +1,22 @@
From martin@example.com Thu May 3 12:04:29 2012
Return-Path: <martin@example.com>
X-Original-To: info@example.com
Delivered-To: box@samba.example.com
Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1])
by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D
for <info@example.com>; Thu, 3 May 2012 12:04:28 +0100 (BST)
Subject: =?UTF-8?Q?Personal=C3=A4nderung?=
From: <John.Smith@example.com>
Content-Type: text/plain;
charset=iso-8859-1
Content-Transfer-Encoding: quoted-printable
Date: Fri, 4 May 2012 14:01:03 +0200
Message-Id: <BC182994-03FA-4DC5-8202-98CBFACA0887@example.com>
To: metest@znuny.com
=E4=F6=FC=DF ad asd
-Martin
--
Old programmers never die. They just branch to a new address.

View file

@ -0,0 +1,2 @@
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
subject: Personaländerung