diff --git a/app/models/channel/email_parser.rb b/app/models/channel/email_parser.rb index d61131fa8..387d1c0de 100644 --- a/app/models/channel/email_parser.rb +++ b/app/models/channel/email_parser.rb @@ -74,31 +74,42 @@ class Channel::EmailParser mail = Mail.new(msg) # set all headers - mail.header.fields.select(&:name).each { |field| + mail.header.fields.each { |field| # full line, encode, ready for storage begin - data[field.name.to_s.downcase.to_sym] = Encode.conv('utf8', field.to_s) + value = Encode.conv('utf8', field.to_s) + if value.blank? + value = field.raw_value + end + data[field.name.to_s.downcase.to_sym] = value rescue => e - data[field.name.to_s.downcase.to_sym] = e.message + data[field.name.to_s.downcase.to_sym] = field.raw_value end # if we need to access the lines by objects later again data["raw-#{field.name.downcase}".to_sym] = field } + # verify content, ignore recipients with non email address + ['to', 'cc', 'delivered-to', 'x-original-to', 'envelope-to'].each { |field| + next if data[field.to_sym].blank? + next if data[field.to_sym] =~ /@/ + data[field.to_sym] = '' + } + # get sender from = nil ['from', 'reply-to', 'return-path'].each { |item| - next if !mail[item.to_sym] - from = mail[item.to_sym].value + next if data[item.to_sym].blank? + from = data[item.to_sym] break if from } # set x-any-recipient data['x-any-recipient'.to_sym] = '' ['to', 'cc', 'delivered-to', 'x-original-to', 'envelope-to'].each { |item| - next if !mail[item.to_sym] + next if data[item.to_sym].blank? if data['x-any-recipient'.to_sym] != '' data['x-any-recipient'.to_sym] += ', ' end @@ -690,6 +701,14 @@ end module Mail + # workaround to get content of no parseable headers - in most cases with non 7 bit ascii signs + class Field + def raw_value + value = Encode.conv('utf8', @raw_value) + value.sub(/^.+?:(\s|)/, '') + end + end + # workaround to parse subjects with 2 different encodings correctly (e. g. quoted-printable see test/fixtures/mail9.box) module Encodings def self.value_decode(str) diff --git a/test/fixtures/mail46.box b/test/fixtures/mail46.box new file mode 100644 index 000000000..fb17bbdb0 --- /dev/null +++ b/test/fixtures/mail46.box @@ -0,0 +1,130 @@ +Received: from localhost by bob.example.io + with SpamAssassin (version 3.4.0); + Wed, 15 Mar 2017 13:00:46 +0100 +From: "ÎäÀ¼³É" +To: +Subject: ת·¢£ºÕûÌåÌáÉýÆóÒµ·þÎñˮƽ +Date: Wed, 15 Mar 2017 20:00:44 +0800 +Message-Id: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on bob.example.io +X-Spam-Flag: YES +X-Spam-Level: ********* +X-Spam-Status: Yes, score=9.4 required=5.0 tests=HELO_DYNAMIC_IPADDR, + HTML_MESSAGE,MIME_HTML_MOSTLY,MPART_ALT_DIFF,RCVD_IN_BRBL_LASTEXT, + RCVD_IN_DNSWL_BLOCKED,RCVD_IN_PBL,RDNS_NONE,SUBJECT_NEEDS_ENCODING, + SUBJ_ILLEGAL_CHARS autolearn=no autolearn_force=no version=3.4.0 +Content-Type: multipart/mixed; boundary="----------=_58C92CEE.B3199ED3" + +This is a multi-part message in MIME format. + +------------=_58C92CEE.B3199ED3 +Content-Type: text/plain; charset=iso-8859-1 +Content-Disposition: inline +Content-Transfer-Encoding: 8bit + +Software zur Erkennung von "Spam" auf dem Rechner + + bob.example.io + +hat die eingegangene E-mail als mögliche "Spam"-Nachricht identifiziert. +Die ursprüngliche Nachricht wurde an diesen Bericht angehängt, so dass +Sie sie anschauen können (falls es doch eine legitime E-Mail ist) oder +ähnliche unerwünschte Nachrichten in Zukunft markieren können. +Bei Fragen zu diesem Vorgang wenden Sie sich bitte an + + the administrator of that system + +Vorschau: ¡°Ladies and gentlemen,¡± he said loudly, waving for quiet. + ¡°What an extraordinary moment this is! The perfect moment for me to make + a little announcement I've been sitting on for some time! [...] + +Inhaltsanalyse im Detail: (9.4 Punkte, 5.0 benötigt) + +Pkte Regelname Beschreibung +---- ---------------------- -------------------------------------------------- + 0.0 RCVD_IN_DNSWL_BLOCKED RBL: ADMINISTRATOR NOTICE: The query to DNSWL + was blocked. See + http://wiki.apache.org/spamassassin/DnsBlocklists#dnsbl-block + for more information. + [127.0.0.1 listed in list.dnswl.org] + 1.6 RCVD_IN_BRBL_LASTEXT RBL: No description available. + [127.0.0.1 listed in bb.barracudacentral.org] + 1.0 RCVD_IN_PBL RBL: Received via a relay in Spamhaus PBL + [127.0.0.1 listed in zen.spamhaus.org] + 0.0 MIME_HTML_MOSTLY BODY: Mehrteilige MIME-Nachricht überwiegend in HTML + 0.0 HTML_MESSAGE BODY: Nachricht enthält HTML + 0.7 MPART_ALT_DIFF BODY: Nachrichtentext im Text- und HTML-Format + unterscheiden sich + 1.6 RDNS_NONE Delivered to internal network by a host with no rDNS + 1.1 SUBJ_ILLEGAL_CHARS Betreff enthält zu viele ungültige Zeichen + 0.1 SUBJECT_NEEDS_ENCODING Subject is encoded but does not specify the + encoding + 3.2 HELO_DYNAMIC_IPADDR HELO-Rechnername verdächtig (IP-Adresse 1) + +Die ursprüngliche Nachricht enthielt nicht ausschließlich Klartext +(plain text) und kann eventuell eine Gefahr für einige E-Mail-Programme +darstellen (falls sie z.B. einen Computervirus enthält). +Möchten Sie die Nachricht dennoch ansehen, ist es wahrscheinlich +sicherer, sie zuerst in einer Datei zu speichern und diese Datei danach +mit einem Texteditor zu öffnen. + + +------------=_58C92CEE.B3199ED3 +Content-Type: message/rfc822; x-spam-type=original +Content-Description: original message before SpamAssassin +Content-Disposition: attachment +Content-Transfer-Encoding: 8bit + +Return-Path: +Delivered-To: info@example.de +Received: from E3C9C040C60E4E4.yinksoft.com (unknown [127.0.0.1]) + by mail.example.io (Postfix) with SMTP id 830916E566 + for ; Wed, 15 Mar 2017 13:00:40 +0100 (CET) +Date: Wed, 15 Mar 2017 20:00:44 +0800 +Subject: ת·¢£ºÕûÌåÌáÉýÆóÒµ·þÎñˮƽ +From: "ÎäÀ¼³É" +Reply-To: "ÎäÀ¼³É" +To: +Message-ID: +Content-Type: multipart/alternative; + boundary="----=_SAKbound_20_0044_20170315_46FB33A1.1A9BCC2C" +Content-Transfer-Encoding: quoted-printable +X-Priority: 3 +Author: yinksoft + +---- Original mail message ----- +·¢¼þÈË:»ôÕôÓå +ÊÕ¼þÈË: +·¢ËÍʱ¼ä:1988-08-12 + +MIME-Version: 1.0 + +This is a multi-part message in MIME format. + +------=_SAKbound_20_0044_20170315_46FB33A1.1A9BCC2C +Content-Type: text/plain; + charset="gb2312" +Content-Transfer-Encoding: quoted-printable + + +------=_SAKbound_20_0044_20170315_46FB33A1.1A9BCC2C +Content-Type: text/html; + charset="gb2312" +Content-Transfer-Encoding: quoted-printable + + +

=A1=B0Ladies and gentlemen,=A1=B1 he said loudly, waving for q= +uiet. =A1=B0What an extraordinary moment this is! The perfect moment fo= +r me to make a little announcement I've been sitting on for some time!<= +/P> +

+ + +------=_SAKbound_20_0044_20170315_46FB33A1.1A9BCC2C-- + + + +------------=_58C92CEE.B3199ED3-- + diff --git a/test/unit/email_parser_test.rb b/test/unit/email_parser_test.rb index c7ddb01ce..9eab1da9f 100644 --- a/test/unit/email_parser_test.rb +++ b/test/unit/email_parser_test.rb @@ -555,7 +555,7 @@ Newsletter abbestellen (', - from_email: '"=?GB2312?B?ztI=?=" <>', + from_email: '"我" <>', from_display_name: '', subject: '《欧美简讯》', to: '377861373 <377861373@qq.com>', diff --git a/test/unit/email_process_test.rb b/test/unit/email_process_test.rb index a4b1b9afc..09d681a26 100644 --- a/test/unit/email_process_test.rb +++ b/test/unit/email_process_test.rb @@ -124,7 +124,7 @@ Some Textäöü".encode('ISO-8859-1'), result: { 0 => { priority: '2 normal', - title: '-', # should be äöü some subject, but can not be parsed from mime tools + title: 'äöü some subject', }, 1 => { body: 'Some Textäöü', @@ -2171,7 +2171,32 @@ Some Text', email: 'abuse@domain.com', }, ], - } + }, + }, + { + data: IO.binread('test/fixtures/mail46.box'), + success: true, + result: { + 0 => { + priority: '2 normal', + title: 'ת·¢£ºÕûÌåÌáÉýÆóÒµ·þÎñˮƽ', + }, + 1 => { + from: '"ÎäÀ¼³É" ', + sender: 'Customer', + type: 'email', + }, + }, + verify: { + users: [ + { + firstname: 'ÎäÀ¼³É', + lastname: '', + fullname: 'ÎäÀ¼³É', + email: 'glopelf7121@example.com', + }, + ], + }, }, ] assert_process(files)