Improved email parsing.
This commit is contained in:
parent
63feba3e57
commit
21b244bc08
5 changed files with 303 additions and 6 deletions
|
@ -2,7 +2,7 @@ require 'mail'
|
|||
require 'iconv'
|
||||
class Channel::EmailParser
|
||||
def conv (charset, string)
|
||||
if charset == 'US-ASCII' || charset == 'ASCII-8BIT'
|
||||
if !charset || charset == 'US-ASCII' || charset == 'ASCII-8BIT'
|
||||
charset = 'LATIN1'
|
||||
end
|
||||
return string if charset.downcase == 'utf8' || charset.downcase == 'utf-8'
|
||||
|
@ -25,7 +25,8 @@ class Channel::EmailParser
|
|||
data[:from_email] = Mail::Address.new( mail[:from].value ).address
|
||||
data[:from_local] = Mail::Address.new( mail[:from].value ).local
|
||||
data[:from_domain] = Mail::Address.new( mail[:from].value ).domain
|
||||
data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name
|
||||
data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name ||
|
||||
( Mail::Address.new( mail[:from].value ).comments && Mail::Address.new( mail[:from].value ).comments[0] )
|
||||
|
||||
# do extra decoding because we needed to use field.value
|
||||
data[:from_display_name] = Mail::Field.new( 'X-From', data[:from_display_name] ).to_s
|
||||
|
@ -36,17 +37,50 @@ class Channel::EmailParser
|
|||
# body
|
||||
# plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
|
||||
# html_part = message.html_part ? message.html_part.body.decoded : nil
|
||||
data[:attachments] = []
|
||||
if mail.multipart?
|
||||
data[:plain_part] = mail.text_part.body.decoded
|
||||
data[:plain_part] = conv( mail.text_part.charset || 'LATIN1', data[:plain_part] )
|
||||
data[:plain_part] = conv( mail.text_part.charset, data[:plain_part] )
|
||||
else
|
||||
data[:plain_part] = mail.body.decoded
|
||||
data[:plain_part] = conv( mail.body.charset || 'LATIN1', data[:plain_part] )
|
||||
|
||||
# text part
|
||||
if !mail.mime_type || mail.mime_type.to_s == '' || mail.mime_type.to_s.downcase == 'text/plain'
|
||||
data[:plain_part] = mail.body.decoded
|
||||
data[:plain_part] = conv( mail.charset, data[:plain_part] )
|
||||
else
|
||||
|
||||
# html part
|
||||
filename = '-no name-'
|
||||
if mail.mime_type.to_s.downcase == 'text/html'
|
||||
filename = 'html-email'
|
||||
data[:plain_part] = mail.body.decoded
|
||||
data[:plain_part] = conv( mail.charset, data[:plain_part] )
|
||||
data[:plain_part] = html2ascii( data[:plain_part] )
|
||||
|
||||
# any other attachments
|
||||
else
|
||||
data[:plain_part] = 'no visible content'
|
||||
end
|
||||
|
||||
# add body as attachment
|
||||
headers_store = {}
|
||||
if mail.mime_type
|
||||
headers_store['Mime-Type'] = mail.mime_type
|
||||
end
|
||||
if mail.charset
|
||||
headers_store['Charset'] = mail.charset
|
||||
end
|
||||
attachment = {
|
||||
:data => mail.body.decoded,
|
||||
:filename => mail.filename || filename,
|
||||
:preferences => headers_store
|
||||
}
|
||||
data[:attachments].push attachment
|
||||
end
|
||||
end
|
||||
|
||||
# attachments
|
||||
if mail.attachments
|
||||
data[:attachments] = []
|
||||
mail.attachments.each do |attachment|
|
||||
|
||||
# get file preferences
|
||||
|
@ -232,4 +266,57 @@ class Channel::EmailParser
|
|||
# return new objects
|
||||
return ticket, article, user
|
||||
end
|
||||
|
||||
def html2ascii(string)
|
||||
|
||||
# find <a href=....> and replace it with [x]
|
||||
link_list = ''
|
||||
counter = 0
|
||||
string.gsub!( /<a\s.*?href=("|')(.+?)("|').*?>/ix ) { |item|
|
||||
link = $2
|
||||
counter = counter + 1
|
||||
link_list += "[#{counter}] #{link}\n"
|
||||
"[#{counter}]"
|
||||
}
|
||||
|
||||
# remove empty lines
|
||||
string.gsub!( /^\s*/m, '' )
|
||||
|
||||
# fix some bad stuff from opera and others
|
||||
string.gsub!( /(\n\r|\r\r\n|\r\n)/s, "\n" )
|
||||
|
||||
# strip all other tags
|
||||
string.gsub!( /\<.+?\>/s, '' )
|
||||
|
||||
# encode html entities like "–"
|
||||
string.gsub!( /(&\#(\d+);?)/x ) { |item|
|
||||
$2.chr
|
||||
}
|
||||
|
||||
# encode html entities like "d;"
|
||||
string.gsub!( /(&\#[xX]([0-9a-fA-F]+);?)/x ) { |item|
|
||||
chr_orig = $1
|
||||
hex = $2.hex
|
||||
if hex
|
||||
chr = hex.chr
|
||||
if chr
|
||||
chr
|
||||
else
|
||||
chr_orig
|
||||
end
|
||||
else
|
||||
chr_orig
|
||||
end
|
||||
}
|
||||
|
||||
# remove empty lines
|
||||
string.gsub!( /^\s*\n\s*\n/m, "\n" )
|
||||
|
||||
# add extracted links
|
||||
if link_list
|
||||
string += "\n\n" + link_list
|
||||
end
|
||||
|
||||
return string
|
||||
end
|
||||
end
|
38
test/fixtures/mail4.box
vendored
Normal file
38
test/fixtures/mail4.box
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
From k.guenther@example.com Mon May 7 15:08:10 2012
|
||||
Return-Path: <k.guenther@example.com>
|
||||
X-Original-To: support@example.com
|
||||
Delivered-To: box@samba.example.com
|
||||
X-Greylist: delayed 355 seconds by postgrey-1.32 at samba; Mon, 07 May 2012 15:08:09 BST
|
||||
Received: from smtprelay05.example.com (smtprelay05.example.com [8.6.3.9])
|
||||
by samba.example.com (Postfix) with ESMTP id 011F9500D3D
|
||||
for <support@example.com>; Mon, 7 May 2012 15:08:09 +0100 (BST)
|
||||
Received: from [1.1.0.7] (helo=exchange.df.eu)
|
||||
by smtprelay05.example.com with esmtps (TLSv1:RC4-MD5:128)
|
||||
(Exim 4.68)
|
||||
(envelope-from <k.guenther@example.com>)
|
||||
id 1SROW2-0007tk-QP
|
||||
for support@example.com; Mon, 07 May 2012 16:02:18 +0200
|
||||
Received: from ECCR04PUBLIC.exchange.local ([1.1.2.4]) by
|
||||
efe04.exchange.local ([1.1.0.7]) with mapi; Mon, 7 May 2012 15:58:33 +0200
|
||||
From: =?utf-8?B?R8O8bnRoZXIgS2F0amEgfCBFeGFtcGxlIEdtYkg=?=
|
||||
<k.guenther@example.com>
|
||||
To: Martin Edenhofer via Znuny Team <support@example.com>
|
||||
Date: Mon, 7 May 2012 15:58:32 +0200
|
||||
Subject: AW: Ticket Templates [Ticket#11168]
|
||||
Thread-Topic: Ticket Templates [Ticket#11168]
|
||||
Thread-Index: Ac0sGqTnvktNHx1lQoaTDcVI7lUxJQAPqvXA
|
||||
Message-ID: <F799DA4E63A20B4EBE9D5A412196D71D3CADBEA04E@ECCR04PUBLIC.exchange.local>
|
||||
References: <F799DA4E63A20B4EBE9D5A412196D71D3CADBE9DF6@ECCR04PUBLIC.exchange.local>
|
||||
<20120507062840.265.107538@portal.example.com>
|
||||
In-Reply-To: <20120507062840.265.107538@portal.example.com>
|
||||
Accept-Language: de-DE
|
||||
Content-Language: de-DE
|
||||
X-MS-Has-Attach:
|
||||
X-MS-TNEF-Correlator:
|
||||
acceptlanguage: de-DE
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: base64
|
||||
MIME-Version: 1.0
|
||||
|
||||
SGFsbG8gS2F0amEsCgpzdXBlciEgSWNoIGZyZXUgbWljaCEKCldpciB3w7xyZGVuIGdlcm5lIGRpZSBQcsOkc2VudGF0aW9uL0VpbmbDvGhydW5nIGluIGRpZSBUaWNrZXQgVGVtcGxhdGVzIHBlciBTY3JlZW5zaGFyaW5nIG9kZXIgenVtaW5kZXN0IHBlciBUZWxlZm9uIG1hY2hlbi4KCk3DtmdsaWNoZSBUZXJtaW5lOgpvIERvLCAxMC4wNS4yMDEyIDE1OjAwLTE2OjAwCm8gRnIsICAxMS4wNS4yMDEyIDEzOjAwLTE0OjAwCm8gRGksICAxNS4wNS4yMDEyIDE3OjAwLTE4OjAwCgrDnGJlciBGZWVkYmFjayB3w7xyZGUgaWNoIG1pY2ggZnJldWVuIQoKUFM6IFp1ciBiZXNzZXJlbiDDnGJlcnNpY2h0IGhhYmUgaWNoIGVpbiBUaWNrZXQgZXJzdGVsbHQuIDopIEltIEZvb3RlciBzaW5kIHVuc2VyZSBnZXNjaMOkZnRsaWNoZW4gS29udGFrdGRhdGVuIChmYWxscyBkaWVzZSBpcmdlbmR3YW5uIGVpbm1hbCBiZW7DtnRpZ3Qgd2VyZGVuIHNvbGx0ZW4pLCBtZWhyIGRhenUgaW4gZWluIHBhYXIgVGFnZW4uCgpMaWViZSBHcsO8w59lIQoKIC1NYXJ0aW4KCgo
|
||||
|
76
test/fixtures/mail5.box
vendored
Normal file
76
test/fixtures/mail5.box
vendored
Normal file
|
@ -0,0 +1,76 @@
|
|||
From marc.smith@example.com Mon May 7 07:45:48 2012
|
||||
Return-Path: <marc.smith@example.com>
|
||||
X-Original-To: support@znuny.com
|
||||
Delivered-To: box@samba.example.com
|
||||
Received: from mailout-de.example.com (mailout-de.example.com [2.1.6.2])
|
||||
by samba.example.com (Postfix) with SMTP id F1C9E500D3D
|
||||
for <support@znuny.com>; Mon, 7 May 2012 07:45:47 +0100 (BST)
|
||||
Received: (qmail invoked by alias); 07 May 2012 06:45:48 -0000
|
||||
Received: from unknown (EHLO [1.2.1.2]) [7.3.2.1]
|
||||
by mail.example.com (mp072) with SMTP; 07 May 2012 08:45:48 +0200
|
||||
X-Authenticated: #69078992
|
||||
X-Provags-ID: V01U2FsdGVkX1+IkUVPK6GIbZ2ezhmZfpCU0OVlFkuyPGDNsL0V5H
|
||||
FxvJdecWb4ibKL
|
||||
Message-ID: <4FA76F9A.3060602@example.com>
|
||||
Date: Mon, 07 May 2012 08:45:46 +0200
|
||||
From: marc.smith@example.com (Marc Smith)
|
||||
User-Agent: Mozilla/5.0 (Windows NT 6.0; WOW64; rv:12.0) Gecko/20120428 Thunderbird/12.0.1
|
||||
MIME-Version: 1.0
|
||||
To: Martin Edenhofer via Znuny Team <support@znuny.com>
|
||||
Subject: Re: XXXX Betatest Ticket Templates [Ticket#11162]
|
||||
References: <20120507061007.259.822311@portal.znuny.com>
|
||||
In-Reply-To: <20120507061007.259.822311@portal.znuny.com>
|
||||
Content-Type: text/plain; charset=UTF-8; format=flowed
|
||||
Content-Transfer-Encoding: 8bit
|
||||
X-Y-GMX-Trusted: 0
|
||||
Status: RO
|
||||
Content-Length: 1418
|
||||
Lines: 46
|
||||
|
||||
Am 07.05.2012 08:10, schrieb Martin Edenhofer via Znuny Team:
|
||||
> Hallo Marc,
|
||||
>
|
||||
> super! Ich freu mich!
|
||||
>
|
||||
> Wir würden gerne die Präsentation/Einführung in die Ticket Templates per Screensharing oder zumindest per Telefon machen.
|
||||
>
|
||||
> Mögliche Termine:
|
||||
> o Do, 10.05.2012 11:00-12:00
|
||||
> o Fr, 11.05.2012 09:00-10:00
|
||||
> o Di, 15.05.2012 14:00-15:00
|
||||
>
|
||||
> Über Feedback würde ich mich freuen!
|
||||
>
|
||||
> PS: Zur besseren Übersicht habe ich ein Ticket erstellt. :) Im Footer sind unsere geschäftlichen Kontaktdaten (falls diese irgendwann einmal benötigt werden sollten), mehr dazu in ein paar Tagen.
|
||||
>
|
||||
> Liebe Grüße!
|
||||
>
|
||||
> -Martin
|
||||
>
|
||||
> --
|
||||
> Martin Edenhofer
|
||||
>
|
||||
> Znuny GmbH // Marienstraße 11 // 10117 Berlin // Germany
|
||||
>
|
||||
> P: +49 (0) 30 60 98 54 18-0
|
||||
> F: +49 (0) 30 60 98 54 18-8
|
||||
>
|
||||
> Location: Berlin - HRB 139852 B Amtsgericht Berlin-Charlottenburg
|
||||
> Managing Director: Martin Edenhofer
|
||||
Hallo Martin,
|
||||
|
||||
John und ich könnten leider nur am Freitag, da wir Donnerstag und nächste
|
||||
Woche bereits Termine haben.
|
||||
|
||||
Wir würden uns dann den Freitag vormerken...;-)
|
||||
|
||||
N Screensharing ist bei uns leider nicht so ohne Probleme möglich, bzw.
|
||||
wir könnten einen PC aufsetzen mit nem seperaten Internetzugang auf dem
|
||||
wir ne VM vorbereiten könnten, da wir von dem "Internet PC" nicht auf
|
||||
unser XXXX zugreifen können. Falls ihr sonst noch irgendwas benötigt
|
||||
einfach kurz ne Rückmeldung...;-)
|
||||
|
||||
Grüße aus Bonn
|
||||
|
||||
John & Marc
|
||||
|
32
test/fixtures/mail6.box
vendored
Normal file
32
test/fixtures/mail6.box
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
From me@bogen.net Sat Sep 13 16:50:43 2003
|
||||
Return-Path: <me@bogen.net>
|
||||
Received: from airoma.example (law10-f30.law10.airoma.example [4.4.4.4]) by esanta.edenhofer.de (Postfix) with ESMTP id 2307484296 for <demo@exampel.com>; Sat, 13 Sep 2003 16:50:43 +0200 (CEST)
|
||||
Received: from mail pickup service by airoma.example with Mc SMTPSVC; Sat, 13 Sep 2003 07:37:26 -0700
|
||||
Received: from 11.11.11.11 by lw10fd.law10.com with HTTP; Sat, 13 Sep 2003 14:37:26 GMT
|
||||
X-Originating-Ip: [5.5.5.5]
|
||||
X-Originating-Email: [me@example.com]
|
||||
From: =?Windows-1252?Q?Hans_B=C4KO?= =?iso-8859-15?q?Sch=F6nland?= <me@bogen.net>
|
||||
To: =?iso-8859-2?Q?Namedy=F1ski?= (hans@example.com)
|
||||
Subject: utf8: =?UTF-8?Q?=E4=BD=BF=E3=81=A3=E3=81=A6?= / ISO-8859-1: =?iso-8859-1?Q?Priorit=E4t=22_?= / cp-1251: =?windows-1251?B?0eXw4+XpINPj6+j26uj1?=
|
||||
Date: Sat, 13 Sep 2003 10:37:26 -0400
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/html; charset="iso-8859-15"; format=flowed
|
||||
Message-Id: <Law10-F30dRmhKuTqtA00018823@coolair.example>
|
||||
X-Originalarrivaltime: 13 Sep 2003 14:37:26.0630 (UTC) FILETIME=[8D57B860:01C37A04]
|
||||
|
||||
<html><div style='background-color:'><P>this is a test</P></div><br clear=all><hr> <a href="http://localhost/8HMZENUS/2737??PS=">Compare Cable, DSL or Satellite plans: As low as $2.95. </a>
|
||||
|
||||
<br>
|
||||
|
||||
<br>
|
||||
Test1:–
|
||||
<br>
|
||||
Test2:&
|
||||
<br>
|
||||
Test3:∋
|
||||
<br>
|
||||
Test4:&
|
||||
<br>
|
||||
Test5:=
|
||||
|
||||
</html>
|
|
@ -35,6 +35,70 @@ class EmailParserTest < ActiveSupport::TestCase
|
|||
:subject => 'Ticket Templates',
|
||||
},
|
||||
},
|
||||
{
|
||||
:data => IO.read('test/fixtures/mail4.box'),
|
||||
:body_md5 => '2f2c3a5c233dbd9658ab37d39469b7d0',
|
||||
:params => {
|
||||
:from => '"Günther Katja | Example GmbH" <k.guenther@example.com>',
|
||||
:from_email => 'k.guenther@example.com',
|
||||
:from_display_name => 'Günther Katja | Example GmbH',
|
||||
:subject => 'AW: Ticket Templates [Ticket#11168]',
|
||||
:plain_part => "Hallo Katja,
|
||||
|
||||
super! Ich freu mich!
|
||||
|
||||
Wir würden gerne die Präsentation/Einführung in die Ticket Templates per Screensharing oder zumindest per Telefon machen.
|
||||
|
||||
Mögliche Termine:
|
||||
o Do, 10.05.2012 15:00-16:00
|
||||
o Fr, 11.05.2012 13:00-14:00
|
||||
o Di, 15.05.2012 17:00-18:00
|
||||
|
||||
Über Feedback würde ich mich freuen!
|
||||
|
||||
PS: Zur besseren Übersicht habe ich ein Ticket erstellt. :) Im Footer sind unsere geschäftlichen Kontaktdaten (falls diese irgendwann einmal benötigt werden sollten), mehr dazu in ein paar Tagen.
|
||||
|
||||
Liebe Grüße!
|
||||
|
||||
-Martin
|
||||
",
|
||||
},
|
||||
},
|
||||
{
|
||||
:data => IO.read('test/fixtures/mail5.box'),
|
||||
:body_md5 => '51364a306362f513f53f2bbea7820f37',
|
||||
:params => {
|
||||
:from => 'marc.smith@example.com (Marc Smith)',
|
||||
:from_email => 'marc.smith@example.com',
|
||||
:from_display_name => 'Marc Smith',
|
||||
:subject => 'Re: XXXX Betatest Ticket Templates [Ticket#11162]',
|
||||
},
|
||||
},
|
||||
{
|
||||
:data => IO.read('test/fixtures/mail6.box'),
|
||||
:body_md5 => '1fc492b8d762d82f861dbb70b7cf7610',
|
||||
:params => {
|
||||
:from => '"Hans BÄKOSchönland" <me@bogen.net>',
|
||||
:from_email => 'me@bogen.net',
|
||||
:from_display_name => 'Hans BÄKOSchönland',
|
||||
:subject => 'utf8: 使って / ISO-8859-1: Priorität" / cp-1251: Сергей Углицких',
|
||||
:plain_part => "this is a test [1]Compare Cable, DSL or Satellite plans: As low as $2.95.
|
||||
|
||||
Test1:8
|
||||
|
||||
Test2:&
|
||||
|
||||
Test3:∋
|
||||
|
||||
Test4:&
|
||||
|
||||
Test5:=
|
||||
|
||||
|
||||
[1] http://localhost/8HMZENUS/2737??PS=
|
||||
"
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
files.each { |file|
|
||||
|
|
Loading…
Reference in a new issue