Fixed #2456 - parsing utf8 encoded email subject
This commit is contained in:
parent
4a67fcd10b
commit
84ff625257
7 changed files with 40 additions and 42 deletions
|
@ -508,7 +508,7 @@ process unprocessable_mails (tmp/unprocessable_mail/*.eml) again
|
||||||
|
|
||||||
h['x-any-recipient'] = h.values.select(&:present?).join(', ')
|
h['x-any-recipient'] = h.values.select(&:present?).join(', ')
|
||||||
h['message_id'] = imported_fields['message-id']
|
h['message_id'] = imported_fields['message-id']
|
||||||
h['subject'] = imported_fields['subject']&.sub(/^=\?us-ascii\?Q\?(.+)\s*\?=\s*$/, '\1')
|
h['subject'] = Mail::Encodings.value_decode(imported_fields['subject'])
|
||||||
begin
|
begin
|
||||||
h['date'] = Time.zone.parse(mail.date.to_s) || imported_fields['date']
|
h['date'] = Time.zone.parse(mail.date.to_s) || imported_fields['date']
|
||||||
rescue
|
rescue
|
||||||
|
@ -794,42 +794,6 @@ module Mail
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# workaround to parse subjects with 2 different encodings correctly (e. g. quoted-printable see test/fixtures/mail9.box)
|
|
||||||
module Encodings
|
|
||||||
def self.value_decode(str)
|
|
||||||
# Optimization: If there's no encoded-words in the string, just return it
|
|
||||||
return str if !str.index('=?')
|
|
||||||
|
|
||||||
str = str.gsub(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
|
|
||||||
|
|
||||||
# Split on white-space boundaries with capture, so we capture the white-space as well
|
|
||||||
str.split(/([ \t])/).map do |text|
|
|
||||||
if text.index('=?') .nil?
|
|
||||||
text
|
|
||||||
else
|
|
||||||
# Join QP encoded-words that are adjacent to avoid decoding partial chars
|
|
||||||
# text.gsub!(/\?\=\=\?.+?\?[Qq]\?/m, '') if text =~ /\?==\?/
|
|
||||||
|
|
||||||
# Search for occurences of quoted strings or plain strings
|
|
||||||
text.scan(/( # Group around entire regex to include it in matches
|
|
||||||
\=\?[^?]+\?([QB])\?[^?]+?\?\= # Quoted String with subgroup for encoding method
|
|
||||||
| # or
|
|
||||||
.+?(?=\=\?|$) # Plain String
|
|
||||||
)/xmi).map do |matches|
|
|
||||||
string, method = *matches
|
|
||||||
if method == 'b' || method == 'B' # rubocop:disable Style/MultipleComparison
|
|
||||||
b_value_decode(string)
|
|
||||||
elsif method == 'q' || method == 'Q' # rubocop:disable Style/MultipleComparison
|
|
||||||
q_value_decode(string)
|
|
||||||
else
|
|
||||||
string
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end.join('')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# issue#348 - IMAP mail fetching stops because of broken spam email (e. g. broken Content-Transfer-Encoding value see test/fixtures/mail43.box)
|
# issue#348 - IMAP mail fetching stops because of broken spam email (e. g. broken Content-Transfer-Encoding value see test/fixtures/mail43.box)
|
||||||
# https://github.com/zammad/zammad/issues/348
|
# https://github.com/zammad/zammad/issues/348
|
||||||
class Body
|
class Body
|
||||||
|
|
|
@ -163,5 +163,15 @@ RSpec.describe Channel::EmailParser, type: :model do
|
||||||
.to start_with( '<a href="https://zammad.com/"' )
|
.to start_with( '<a href="https://zammad.com/"' )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'Mail::Encodings.value_decode' do
|
||||||
|
it 'decode us-ascii encoded strings' do
|
||||||
|
expect( Mail::Encodings.value_decode('=?us-ascii?Q?Test?=') ).to eql( 'Test' )
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'decode utf-8 encoded strings' do
|
||||||
|
expect( Mail::Encodings.value_decode('=?UTF-8?Q?Personal=C3=A4nderung?=') ).to eql( 'Personaländerung' )
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
from: '"Hans BÄKOSchönland" <me@bogen.net>'
|
from: '"Hans BÄKO Schönland" <me@bogen.net>'
|
||||||
from_email: me@bogen.net
|
from_email: me@bogen.net
|
||||||
from_display_name: Hans BÄKOSchönland
|
from_display_name: Hans BÄKO Schönland
|
||||||
subject: 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких'
|
subject: 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких'
|
||||||
content_type: text/html
|
content_type: text/html
|
||||||
body: '<p>this is a test</p><br><hr> <a href="http://localhost/8HMZENUS/2737??PS="
|
body: '<p>this is a test</p><br><hr> <a href="http://localhost/8HMZENUS/2737??PS="
|
||||||
|
|
|
@ -3,7 +3,7 @@ from: postmaster@example.com
|
||||||
from_email: postmaster@example.com
|
from_email: postmaster@example.com
|
||||||
from_display_name: ''
|
from_display_name: ''
|
||||||
to: sales@znuny.org
|
to: sales@znuny.org
|
||||||
subject: Benachrichtung zum =?unicode-1-1-utf-7?Q?+ANw-bermittlungsstatus (Fehlgeschlagen)?=
|
subject: Benachrichtung zum +ANw-bermittlungsstatus (Fehlgeschlagen)
|
||||||
body: |+
|
body: |+
|
||||||
Dies ist eine automatisch erstellte Benachrichtigung +APw-ber den Zustellstatus.
|
Dies ist eine automatisch erstellte Benachrichtigung +APw-ber den Zustellstatus.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
from: '"Hans BÄKOSchönland" <me@bogen.net>'
|
from: '"Hans BÄKO Schönland" <me@bogen.net>'
|
||||||
from_email: me@bogen.net
|
from_email: me@bogen.net
|
||||||
from_display_name: Hans BÄKOSchönland
|
from_display_name: Hans BÄKO Schönland
|
||||||
to: Namedyński (hans@example.com)
|
to: Namedyński (hans@example.com)
|
||||||
subject: test email
|
subject: test email
|
||||||
body: |-
|
body: |-
|
||||||
|
|
22
test/data/mail/mail077.box
Normal file
22
test/data/mail/mail077.box
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
From martin@example.com Thu May 3 12:04:29 2012
|
||||||
|
Return-Path: <martin@example.com>
|
||||||
|
X-Original-To: info@example.com
|
||||||
|
Delivered-To: box@samba.example.com
|
||||||
|
Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1])
|
||||||
|
by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D
|
||||||
|
for <info@example.com>; Thu, 3 May 2012 12:04:28 +0100 (BST)
|
||||||
|
Subject: =?UTF-8?Q?Personal=C3=A4nderung?=
|
||||||
|
From: <John.Smith@example.com>
|
||||||
|
Content-Type: text/plain;
|
||||||
|
charset=iso-8859-1
|
||||||
|
Content-Transfer-Encoding: quoted-printable
|
||||||
|
Date: Fri, 4 May 2012 14:01:03 +0200
|
||||||
|
Message-Id: <BC182994-03FA-4DC5-8202-98CBFACA0887@example.com>
|
||||||
|
To: metest@znuny.com
|
||||||
|
|
||||||
|
=E4=F6=FC=DF ad asd
|
||||||
|
|
||||||
|
-Martin
|
||||||
|
|
||||||
|
--
|
||||||
|
Old programmers never die. They just branch to a new address.
|
2
test/data/mail/mail077.yml
Normal file
2
test/data/mail/mail077.yml
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
--- !ruby/hash:ActiveSupport::HashWithIndifferentAccess
|
||||||
|
subject: Personaländerung
|
Loading…
Reference in a new issue