Improved email parsing.
This commit is contained in:
parent
63feba3e57
commit
21b244bc08
5 changed files with 303 additions and 6 deletions
|
@ -2,7 +2,7 @@ require 'mail'
|
||||||
require 'iconv'
|
require 'iconv'
|
||||||
class Channel::EmailParser
|
class Channel::EmailParser
|
||||||
def conv (charset, string)
|
def conv (charset, string)
|
||||||
if charset == 'US-ASCII' || charset == 'ASCII-8BIT'
|
if !charset || charset == 'US-ASCII' || charset == 'ASCII-8BIT'
|
||||||
charset = 'LATIN1'
|
charset = 'LATIN1'
|
||||||
end
|
end
|
||||||
return string if charset.downcase == 'utf8' || charset.downcase == 'utf-8'
|
return string if charset.downcase == 'utf8' || charset.downcase == 'utf-8'
|
||||||
|
@ -25,7 +25,8 @@ class Channel::EmailParser
|
||||||
data[:from_email] = Mail::Address.new( mail[:from].value ).address
|
data[:from_email] = Mail::Address.new( mail[:from].value ).address
|
||||||
data[:from_local] = Mail::Address.new( mail[:from].value ).local
|
data[:from_local] = Mail::Address.new( mail[:from].value ).local
|
||||||
data[:from_domain] = Mail::Address.new( mail[:from].value ).domain
|
data[:from_domain] = Mail::Address.new( mail[:from].value ).domain
|
||||||
data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name
|
data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name ||
|
||||||
|
( Mail::Address.new( mail[:from].value ).comments && Mail::Address.new( mail[:from].value ).comments[0] )
|
||||||
|
|
||||||
# do extra decoding because we needed to use field.value
|
# do extra decoding because we needed to use field.value
|
||||||
data[:from_display_name] = Mail::Field.new( 'X-From', data[:from_display_name] ).to_s
|
data[:from_display_name] = Mail::Field.new( 'X-From', data[:from_display_name] ).to_s
|
||||||
|
@ -36,17 +37,50 @@ class Channel::EmailParser
|
||||||
# body
|
# body
|
||||||
# plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
|
# plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
|
||||||
# html_part = message.html_part ? message.html_part.body.decoded : nil
|
# html_part = message.html_part ? message.html_part.body.decoded : nil
|
||||||
|
data[:attachments] = []
|
||||||
if mail.multipart?
|
if mail.multipart?
|
||||||
data[:plain_part] = mail.text_part.body.decoded
|
data[:plain_part] = mail.text_part.body.decoded
|
||||||
data[:plain_part] = conv( mail.text_part.charset || 'LATIN1', data[:plain_part] )
|
data[:plain_part] = conv( mail.text_part.charset, data[:plain_part] )
|
||||||
else
|
else
|
||||||
data[:plain_part] = mail.body.decoded
|
|
||||||
data[:plain_part] = conv( mail.body.charset || 'LATIN1', data[:plain_part] )
|
# text part
|
||||||
|
if !mail.mime_type || mail.mime_type.to_s == '' || mail.mime_type.to_s.downcase == 'text/plain'
|
||||||
|
data[:plain_part] = mail.body.decoded
|
||||||
|
data[:plain_part] = conv( mail.charset, data[:plain_part] )
|
||||||
|
else
|
||||||
|
|
||||||
|
# html part
|
||||||
|
filename = '-no name-'
|
||||||
|
if mail.mime_type.to_s.downcase == 'text/html'
|
||||||
|
filename = 'html-email'
|
||||||
|
data[:plain_part] = mail.body.decoded
|
||||||
|
data[:plain_part] = conv( mail.charset, data[:plain_part] )
|
||||||
|
data[:plain_part] = html2ascii( data[:plain_part] )
|
||||||
|
|
||||||
|
# any other attachments
|
||||||
|
else
|
||||||
|
data[:plain_part] = 'no visible content'
|
||||||
|
end
|
||||||
|
|
||||||
|
# add body as attachment
|
||||||
|
headers_store = {}
|
||||||
|
if mail.mime_type
|
||||||
|
headers_store['Mime-Type'] = mail.mime_type
|
||||||
|
end
|
||||||
|
if mail.charset
|
||||||
|
headers_store['Charset'] = mail.charset
|
||||||
|
end
|
||||||
|
attachment = {
|
||||||
|
:data => mail.body.decoded,
|
||||||
|
:filename => mail.filename || filename,
|
||||||
|
:preferences => headers_store
|
||||||
|
}
|
||||||
|
data[:attachments].push attachment
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# attachments
|
# attachments
|
||||||
if mail.attachments
|
if mail.attachments
|
||||||
data[:attachments] = []
|
|
||||||
mail.attachments.each do |attachment|
|
mail.attachments.each do |attachment|
|
||||||
|
|
||||||
# get file preferences
|
# get file preferences
|
||||||
|
@ -232,4 +266,57 @@ class Channel::EmailParser
|
||||||
# return new objects
|
# return new objects
|
||||||
return ticket, article, user
|
return ticket, article, user
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def html2ascii(string)
|
||||||
|
|
||||||
|
# find <a href=....> and replace it with [x]
|
||||||
|
link_list = ''
|
||||||
|
counter = 0
|
||||||
|
string.gsub!( /<a\s.*?href=("|')(.+?)("|').*?>/ix ) { |item|
|
||||||
|
link = $2
|
||||||
|
counter = counter + 1
|
||||||
|
link_list += "[#{counter}] #{link}\n"
|
||||||
|
"[#{counter}]"
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove empty lines
|
||||||
|
string.gsub!( /^\s*/m, '' )
|
||||||
|
|
||||||
|
# fix some bad stuff from opera and others
|
||||||
|
string.gsub!( /(\n\r|\r\r\n|\r\n)/s, "\n" )
|
||||||
|
|
||||||
|
# strip all other tags
|
||||||
|
string.gsub!( /\<.+?\>/s, '' )
|
||||||
|
|
||||||
|
# encode html entities like "–"
|
||||||
|
string.gsub!( /(&\#(\d+);?)/x ) { |item|
|
||||||
|
$2.chr
|
||||||
|
}
|
||||||
|
|
||||||
|
# encode html entities like "d;"
|
||||||
|
string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) { |item|
|
||||||
|
chr_orig = $1
|
||||||
|
hex = $2.hex
|
||||||
|
if hex
|
||||||
|
chr = hex.chr
|
||||||
|
if chr
|
||||||
|
chr
|
||||||
|
else
|
||||||
|
chr_orig
|
||||||
|
end
|
||||||
|
else
|
||||||
|
chr_orig
|
||||||
|
end
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove empty lines
|
||||||
|
string.gsub!( /^\s*\n\s*\n/m, "\n" )
|
||||||
|
|
||||||
|
# add extracted links
|
||||||
|
if link_list
|
||||||
|
string += "\n\n" + link_list
|
||||||
|
end
|
||||||
|
|
||||||
|
return string
|
||||||
|
end
|
||||||
end
|
end
|
38
test/fixtures/mail4.box
vendored
Normal file
38
test/fixtures/mail4.box
vendored
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
From k.guenther@example.com Mon May 7 15:08:10 2012
|
||||||
|
Return-Path: <k.guenther@example.com>
|
||||||
|
X-Original-To: support@example.com
|
||||||
|
Delivered-To: box@samba.example.com
|
||||||
|
X-Greylist: delayed 355 seconds by postgrey-1.32 at samba; Mon, 07 May 2012 15:08:09 BST
|
||||||
|
Received: from smtprelay05.example.com (smtprelay05.example.com [8.6.3.9])
|
||||||
|
by samba.example.com (Postfix) with ESMTP id 011F9500D3D
|
||||||
|
for <support@example.com>; Mon, 7 May 2012 15:08:09 +0100 (BST)
|
||||||
|
Received: from [1.1.0.7] (helo=exchange.df.eu)
|
||||||
|
by smtprelay05.example.com with esmtps (TLSv1:RC4-MD5:128)
|
||||||
|
(Exim 4.68)
|
||||||
|
(envelope-from <k.guenther@example.com>)
|
||||||
|
id 1SROW2-0007tk-QP
|
||||||
|
for support@example.com; Mon, 07 May 2012 16:02:18 +0200
|
||||||
|
Received: from ECCR04PUBLIC.exchange.local ([1.1.2.4]) by
|
||||||
|
efe04.exchange.local ([1.1.0.7]) with mapi; Mon, 7 May 2012 15:58:33 +0200
|
||||||
|
From: =?utf-8?B?R8O8bnRoZXIgS2F0amEgfCBFeGFtcGxlIEdtYkg=?=
|
||||||
|
<k.guenther@example.com>
|
||||||
|
To: Martin Edenhofer via Znuny Team <support@example.com>
|
||||||
|
Date: Mon, 7 May 2012 15:58:32 +0200
|
||||||
|
Subject: AW: Ticket Templates [Ticket#11168]
|
||||||
|
Thread-Topic: Ticket Templates [Ticket#11168]
|
||||||
|
Thread-Index: Ac0sGqTnvktNHx1lQoaTDcVI7lUxJQAPqvXA
|
||||||
|
Message-ID: <F799DA4E63A20B4EBE9D5A412196D71D3CADBEA04E@ECCR04PUBLIC.exchange.local>
|
||||||
|
References: <F799DA4E63A20B4EBE9D5A412196D71D3CADBE9DF6@ECCR04PUBLIC.exchange.local>
|
||||||
|
<20120507062840.265.107538@portal.example.com>
|
||||||
|
In-Reply-To: <20120507062840.265.107538@portal.example.com>
|
||||||
|
Accept-Language: de-DE
|
||||||
|
Content-Language: de-DE
|
||||||
|
X-MS-Has-Attach:
|
||||||
|
X-MS-TNEF-Correlator:
|
||||||
|
acceptlanguage: de-DE
|
||||||
|
Content-Type: text/plain; charset="utf-8"
|
||||||
|
Content-Transfer-Encoding: base64
|
||||||
|
MIME-Version: 1.0
|
||||||
|
|
||||||
|
SGFsbG8gS2F0amEsCgpzdXBlciEgSWNoIGZyZXUgbWljaCEKCldpciB3w7xyZGVuIGdlcm5lIGRpZSBQcsOkc2VudGF0aW9uL0VpbmbDvGhydW5nIGluIGRpZSBUaWNrZXQgVGVtcGxhdGVzIHBlciBTY3JlZW5zaGFyaW5nIG9kZXIgenVtaW5kZXN0IHBlciBUZWxlZm9uIG1hY2hlbi4KCk3DtmdsaWNoZSBUZXJtaW5lOgpvIERvLCAxMC4wNS4yMDEyIDE1OjAwLTE2OjAwCm8gRnIsICAxMS4wNS4yMDEyIDEzOjAwLTE0OjAwCm8gRGksICAxNS4wNS4yMDEyIDE3OjAwLTE4OjAwCgrDnGJlciBGZWVkYmFjayB3w7xyZGUgaWNoIG1pY2ggZnJldWVuIQoKUFM6IFp1ciBiZXNzZXJlbiDDnGJlcnNpY2h0IGhhYmUgaWNoIGVpbiBUaWNrZXQgZXJzdGVsbHQuIDopIEltIEZvb3RlciBzaW5kIHVuc2VyZSBnZXNjaMOkZnRsaWNoZW4gS29udGFrdGRhdGVuIChmYWxscyBkaWVzZSBpcmdlbmR3YW5uIGVpbm1hbCBiZW7DtnRpZ3Qgd2VyZGVuIHNvbGx0ZW4pLCBtZWhyIGRhenUgaW4gZWluIHBhYXIgVGFnZW4uCgpMaWViZSBHcsO8w59lIQoKIC1NYXJ0aW4KCgo
|
||||||
|
|
76
test/fixtures/mail5.box
vendored
Normal file
76
test/fixtures/mail5.box
vendored
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
From marc.smith@example.com Mon May 7 07:45:48 2012
|
||||||
|
Return-Path: <marc.smith@example.com>
|
||||||
|
X-Original-To: support@znuny.com
|
||||||
|
Delivered-To: box@samba.example.com
|
||||||
|
Received: from mailout-de.example.com (mailout-de.example.com [2.1.6.2])
|
||||||
|
by samba.example.com (Postfix) with SMTP id F1C9E500D3D
|
||||||
|
for <support@znuny.com>; Mon, 7 May 2012 07:45:47 +0100 (BST)
|
||||||
|
Received: (qmail invoked by alias); 07 May 2012 06:45:48 -0000
|
||||||
|
Received: from unknown (EHLO [1.2.1.2]) [7.3.2.1]
|
||||||
|
by mail.example.com (mp072) with SMTP; 07 May 2012 08:45:48 +0200
|
||||||
|
X-Authenticated: #69078992
|
||||||
|
X-Provags-ID: V01U2FsdGVkX1+IkUVPK6GIbZ2ezhmZfpCU0OVlFkuyPGDNsL0V5H
|
||||||
|
FxvJdecWb4ibKL
|
||||||
|
Message-ID: <4FA76F9A.3060602@example.com>
|
||||||
|
Date: Mon, 07 May 2012 08:45:46 +0200
|
||||||
|
From: marc.smith@example.com (Marc Smith)
|
||||||
|
User-Agent: Mozilla/5.0 (Windows NT 6.0; WOW64; rv:12.0) Gecko/20120428 Thunderbird/12.0.1
|
||||||
|
MIME-Version: 1.0
|
||||||
|
To: Martin Edenhofer via Znuny Team <support@znuny.com>
|
||||||
|
Subject: Re: XXXX Betatest Ticket Templates [Ticket#11162]
|
||||||
|
References: <20120507061007.259.822311@portal.znuny.com>
|
||||||
|
In-Reply-To: <20120507061007.259.822311@portal.znuny.com>
|
||||||
|
Content-Type: text/plain; charset=UTF-8; format=flowed
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
X-Y-GMX-Trusted: 0
|
||||||
|
Status: RO
|
||||||
|
Content-Length: 1418
|
||||||
|
Lines: 46
|
||||||
|
|
||||||
|
Am 07.05.2012 08:10, schrieb Martin Edenhofer via Znuny Team:
|
||||||
|
> Hallo Marc,
|
||||||
|
>
|
||||||
|
> super! Ich freu mich!
|
||||||
|
>
|
||||||
|
> Wir würden gerne die Präsentation/Einführung in die Ticket Templates per Screensharing oder zumindest per Telefon machen.
|
||||||
|
>
|
||||||
|
> Mögliche Termine:
|
||||||
|
> o Do, 10.05.2012 11:00-12:00
|
||||||
|
> o Fr, 11.05.2012 09:00-10:00
|
||||||
|
> o Di, 15.05.2012 14:00-15:00
|
||||||
|
>
|
||||||
|
> Über Feedback würde ich mich freuen!
|
||||||
|
>
|
||||||
|
> PS: Zur besseren Übersicht habe ich ein Ticket erstellt. :) Im Footer sind unsere geschäftlichen Kontaktdaten (falls diese irgendwann einmal benötigt werden sollten), mehr dazu in ein paar Tagen.
|
||||||
|
>
|
||||||
|
> Liebe Grüße!
|
||||||
|
>
|
||||||
|
> -Martin
|
||||||
|
>
|
||||||
|
> --
|
||||||
|
> Martin Edenhofer
|
||||||
|
>
|
||||||
|
> Znuny GmbH // Marienstraße 11 // 10117 Berlin // Germany
|
||||||
|
>
|
||||||
|
> P: +49 (0) 30 60 98 54 18-0
|
||||||
|
> F: +49 (0) 30 60 98 54 18-8
|
||||||
|
>
|
||||||
|
> Location: Berlin - HRB 139852 B Amtsgericht Berlin-Charlottenburg
|
||||||
|
> Managing Director: Martin Edenhofer
|
||||||
|
Hallo Martin,
|
||||||
|
|
||||||
|
John und ich könnten leider nur am Freitag, da wir Donnerstag und nächste
|
||||||
|
Woche bereits Termine haben.
|
||||||
|
|
||||||
|
Wir würden uns dann den Freitag vormerken...;-)
|
||||||
|
|
||||||
|
N Screensharing ist bei uns leider nicht so ohne Probleme möglich, bzw.
|
||||||
|
wir könnten einen PC aufsetzen mit nem seperaten Internetzugang auf dem
|
||||||
|
wir ne VM vorbereiten könnten, da wir von dem "Internet PC" nicht auf
|
||||||
|
unser XXXX zugreifen können. Falls ihr sonst noch irgendwas benötigt
|
||||||
|
einfach kurz ne Rückmeldung...;-)
|
||||||
|
|
||||||
|
Grüße aus Bonn
|
||||||
|
|
||||||
|
John & Marc
|
||||||
|
|
32
test/fixtures/mail6.box
vendored
Normal file
32
test/fixtures/mail6.box
vendored
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
From me@bogen.net Sat Sep 13 16:50:43 2003
|
||||||
|
Return-Path: <me@bogen.net>
|
||||||
|
Received: from airoma.example (law10-f30.law10.airoma.example [4.4.4.4]) by esanta.edenhofer.de (Postfix) with ESMTP id 2307484296 for <demo@exampel.com>; Sat, 13 Sep 2003 16:50:43 +0200 (CEST)
|
||||||
|
Received: from mail pickup service by airoma.example with Mc SMTPSVC; Sat, 13 Sep 2003 07:37:26 -0700
|
||||||
|
Received: from 11.11.11.11 by lw10fd.law10.com with HTTP; Sat, 13 Sep 2003 14:37:26 GMT
|
||||||
|
X-Originating-Ip: [5.5.5.5]
|
||||||
|
X-Originating-Email: [me@example.com]
|
||||||
|
From: =?Windows-1252?Q?Hans_B=C4KO?= =?iso-8859-15?q?Sch=F6nland?= <me@bogen.net>
|
||||||
|
To: =?iso-8859-2?Q?Namedy=F1ski?= (hans@example.com)
|
||||||
|
Subject: utf8: =?UTF-8?Q?=E4=BD=BF=E3=81=A3=E3=81=A6?= / ISO-8859-1: =?iso-8859-1?Q?Priorit=E4t=22_?= / cp-1251: =?windows-1251?B?0eXw4+XpINPj6+j26uj1?=
|
||||||
|
Date: Sat, 13 Sep 2003 10:37:26 -0400
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/html; charset="iso-8859-15"; format=flowed
|
||||||
|
Message-Id: <Law10-F30dRmhKuTqtA00018823@coolair.example>
|
||||||
|
X-Originalarrivaltime: 13 Sep 2003 14:37:26.0630 (UTC) FILETIME=[8D57B860:01C37A04]
|
||||||
|
|
||||||
|
<html><div style='background-color:'><P>this is a test</P></div><br clear=all><hr> <a href="http://localhost/8HMZENUS/2737??PS=">Compare Cable, DSL or Satellite plans: As low as $2.95. </a>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<br>
|
||||||
|
Test1:–
|
||||||
|
<br>
|
||||||
|
Test2:&
|
||||||
|
<br>
|
||||||
|
Test3:∋
|
||||||
|
<br>
|
||||||
|
Test4:&
|
||||||
|
<br>
|
||||||
|
Test5:=
|
||||||
|
|
||||||
|
</html>
|
|
@ -35,6 +35,70 @@ class EmailParserTest < ActiveSupport::TestCase
|
||||||
:subject => 'Ticket Templates',
|
:subject => 'Ticket Templates',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
:data => IO.read('test/fixtures/mail4.box'),
|
||||||
|
:body_md5 => '2f2c3a5c233dbd9658ab37d39469b7d0',
|
||||||
|
:params => {
|
||||||
|
:from => '"Günther Katja | Example GmbH" <k.guenther@example.com>',
|
||||||
|
:from_email => 'k.guenther@example.com',
|
||||||
|
:from_display_name => 'Günther Katja | Example GmbH',
|
||||||
|
:subject => 'AW: Ticket Templates [Ticket#11168]',
|
||||||
|
:plain_part => "Hallo Katja,
|
||||||
|
|
||||||
|
super! Ich freu mich!
|
||||||
|
|
||||||
|
Wir würden gerne die Präsentation/Einführung in die Ticket Templates per Screensharing oder zumindest per Telefon machen.
|
||||||
|
|
||||||
|
Mögliche Termine:
|
||||||
|
o Do, 10.05.2012 15:00-16:00
|
||||||
|
o Fr, 11.05.2012 13:00-14:00
|
||||||
|
o Di, 15.05.2012 17:00-18:00
|
||||||
|
|
||||||
|
Über Feedback würde ich mich freuen!
|
||||||
|
|
||||||
|
PS: Zur besseren Übersicht habe ich ein Ticket erstellt. :) Im Footer sind unsere geschäftlichen Kontaktdaten (falls diese irgendwann einmal benötigt werden sollten), mehr dazu in ein paar Tagen.
|
||||||
|
|
||||||
|
Liebe Grüße!
|
||||||
|
|
||||||
|
-Martin
|
||||||
|
",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
:data => IO.read('test/fixtures/mail5.box'),
|
||||||
|
:body_md5 => '51364a306362f513f53f2bbea7820f37',
|
||||||
|
:params => {
|
||||||
|
:from => 'marc.smith@example.com (Marc Smith)',
|
||||||
|
:from_email => 'marc.smith@example.com',
|
||||||
|
:from_display_name => 'Marc Smith',
|
||||||
|
:subject => 'Re: XXXX Betatest Ticket Templates [Ticket#11162]',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
:data => IO.read('test/fixtures/mail6.box'),
|
||||||
|
:body_md5 => '1fc492b8d762d82f861dbb70b7cf7610',
|
||||||
|
:params => {
|
||||||
|
:from => '"Hans BÄKOSchönland" <me@bogen.net>',
|
||||||
|
:from_email => 'me@bogen.net',
|
||||||
|
:from_display_name => 'Hans BÄKOSchönland',
|
||||||
|
:subject => 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких',
|
||||||
|
:plain_part => "this is a test [1]Compare Cable, DSL or Satellite plans: As low as $2.95.
|
||||||
|
|
||||||
|
Test1:8
|
||||||
|
|
||||||
|
Test2:&
|
||||||
|
|
||||||
|
Test3:∋
|
||||||
|
|
||||||
|
Test4:&
|
||||||
|
|
||||||
|
Test5:=
|
||||||
|
|
||||||
|
|
||||||
|
[1] http://localhost/8HMZENUS/2737??PS=
|
||||||
|
"
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
files.each { |file|
|
files.each { |file|
|
||||||
|
|
Loading…
Reference in a new issue