Improved email parsing (charset handling).

This commit is contained in:
Martin Edenhofer 2012-05-04 13:33:05 +02:00
parent 77ef83f9b3
commit 2a1e9dd65d
5 changed files with 308 additions and 53 deletions

View file

@ -1,24 +1,79 @@
require 'mail'
require 'iconv'
class Channel::EmailParser
def parse (channel, msg)
def conv (charset, string)
if charset == 'US-ASCII' then
charset = 'LATIN1'
end
Iconv.conv("UTF8", charset, string)
end
def parse (msg)
data = {}
mail = Mail.new( msg )
from_email = Mail::Address.new( mail[:from].value ).address
from_display_name = Mail::Address.new( mail[:from].value ).display_name
# headers
data[:from_email] = Mail::Address.new( mail[:from].value ).address
data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name
['from', 'to', 'cc', 'subject'].each {|key|
data[key.to_sym] = mail[key] ? conv( mail[key].charset || 'LATIN1', mail[key].to_s) : nil
}
# message id
data[:message_id] = mail['message_id'] ? mail['message_id'].to_s : nil
# body
# plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
# html_part = message.html_part ? message.html_part.body.decoded : nil
data[:plain_part] = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
data[:plain_part] = conv( mail.body.charset || 'LATIN1', data[:plain_part] )
# attachments
if mail.attachments
data[:attachments] = []
mail.attachments.each do |attachment|
# get file preferences
headers = {}
attachment.header.fields.each do |f|
headers[f.name] = f.value
end
headers_store = {}
headers_store['Mime-Type'] = attachment.mime_type
if attachment.charset
headers_store['Charset'] = attachment.charset
end
['Content-ID', 'Content-Type'].each do |item|
if headers[item]
headers_store[item] = headers[item]
end
end
attachment = {
:data => attachment.body.decoded,
:filename => attachment.filename,
:preferences => headers_store
}
data[:attachments].push attachment
end
end
return data
end
def process(channel, msg)
mail = parse( msg )
# use transaction
ActiveRecord::Base.transaction do
user = User.where( :email => from_email ).first
user = User.where( :email => mail[:from_email] ).first
if !user then
puts 'create user...'
roles = Role.where( :name => 'Customer' )
user = User.create(
:login => from_email,
:firstname => from_display_name,
:login => mail[:from_email],
:firstname => mail[:from_display_name],
:lastname => '',
:email => from_email,
:email => mail[:from_email],
:password => '',
:active => true,
:roles => roles,
@ -29,16 +84,9 @@ class Channel::EmailParser
# set current user
UserInfo.current_user_id = user.id
def conv (charset, string)
if charset == 'US-ASCII' then
charset = 'LATIN1'
end
Iconv.conv("UTF8", charset, string)
end
# get ticket# from subject
ticket = Ticket.number_check( mail[:subject].value )
ticket = Ticket.number_check( mail[:subject] )
# set ticket state to open if not new
if ticket
ticket_state = Ticket::State.find( ticket.ticket_state_id )
@ -54,7 +102,7 @@ class Channel::EmailParser
ticket = Ticket.create(
:group_id => channel[:group_id],
:customer_id => user.id,
:title => conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s),
:title => mail[:subject],
:ticket_state_id => Ticket::State.where(:name => 'new').first.id,
:ticket_priority_id => Ticket::Priority.where(:name => '2 normal').first.id,
:created_by_id => user.id
@ -62,19 +110,17 @@ class Channel::EmailParser
end
# import mail
plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded
# html_part = message.html_part ? message.html_part.body.decoded : nil
article = Ticket::Article.create(
:created_by_id => user.id,
:ticket_id => ticket.id,
:ticket_article_type_id => Ticket::Article::Type.where(:name => 'email').first.id,
:ticket_article_sender_id => Ticket::Article::Sender.where(:name => 'Customer').first.id,
:body => conv(mail.body.charset || 'LATIN1', plain_part),
:from => mail['from'] ? conv(mail['from'].charset || 'LATIN1', mail['from'].to_s) : nil,
:to => mail['to'] ? conv(mail['to'].charset || 'LATIN1', mail['to'].to_s) : nil,
:cc => mail['cc'] ? conv(mail['cc'].charset || 'LATIN1', mail['cc'].to_s) : nil,
:subject => mail['subject'] ? conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s) : nil,
:message_id => mail['message_id'] ? mail['message_id'].to_s : nil,
:body => mail[:plain_part],
:from => mail[:from],
:to => mail[:to],
:cc => mail[:cc],
:subject => mail[:subject],
:message_id => mail[:message_id],
:internal => false
)
@ -88,35 +134,18 @@ class Channel::EmailParser
)
# store attachments
if mail.attachments
mail.attachments.each do |attachment|
# get file preferences
headers = {}
attachment.header.fields.each do |f|
headers[f.name] = f.value
end
headers_store = {}
headers_store['Mime-Type'] = attachment.mime_type
if attachment.charset
headers_store['Charset'] = attachment.charset
end
['Content-ID', 'Content-Type'].each do |item|
if headers[item]
headers_store[item] = headers[item]
end
end
# store file
if mail[:attachments]
mail[:attachments].each do |attachment|
Store.add(
:object => 'Ticket::Article',
:o_id => article.id,
:data => attachment.body.decoded,
:filename => attachment.filename,
:preferences => headers_store
:data => attachment[:data],
:filename => attachment[:filename],
:preferences => attachment[:preferences]
)
end
end
return ticket, article, user
end
# execute ticket events

View file

@ -6,7 +6,7 @@ class Channel::IMAP < Channel::EmailParser
def fetch (channel)
puts "fetching imap (#{channel[:options][:host]}/#{channel[:options][:user]})"
imap = Net::IMAP.new(channel[:options][:host], 993, true )
imap = Net::IMAP.new(channel[:options][:host], 993, true, nil, false )
imap.authenticate('LOGIN', channel[:options][:user], channel[:options][:password])
imap.select('INBOX')
count = 0
@ -18,7 +18,7 @@ class Channel::IMAP < Channel::EmailParser
# puts msg.to_s
# delete email from server after article was created
if parse(channel, msg)
if process(channel, msg)
imap.store(message_id, "+FLAGS", [:Deleted])
end
end

View file

@ -16,7 +16,7 @@ class Channel::POP3 < Channel::EmailParser
puts " - message #{count.to_s}/#{count_all.to_s}"
# delete email from server after article was created
if parse(channel, m.pop)
if process(channel, m.pop)
m.delete
end
end

194
test/fixtures/mail1.box vendored Normal file
View file

@ -0,0 +1,194 @@
From martin@example.com Thu May 3 12:04:29 2012
Return-Path: <martin@example.com>
X-Original-To: info@example.com
Delivered-To: box@samba.example.com
Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1])
by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D
for <info@example.com>; Thu, 3 May 2012 12:04:28 +0100 (BST)
Subject: =?iso-8859-1?Q?CI_Daten_f=FCr_PublicView_?=
Mime-Version: 1.0 (Apple Message framework v1257)
Content-Type: multipart/alternative; boundary="Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394"
From: <John.Smith@example.com>
Resent-From: Martin Test <martin@example.com>
Date: Thu, 3 May 2012 11:36:43 +0200
Resent-Date: Thu, 3 May 2012 13:04:31 +0200
Resent-To: info@example.com
Message-Id: <053EA3703574649ABDAF24D43A05604F327A130@MEMASFRK004.example.com>
To: <martin@example.com>
X-Mailer: Apple Mail (2.1257)
Status: RO
X-Status:
X-Keywords:
X-UID: 82
--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=iso-8859-1
Hallo Martin,
=20
wie besprochen hier noch die Daten f=FCr die Intranetseite:
=20
Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht unterst=FCtzt =
oder nicht vorhanden wird Arial 11 Pt genommen
Schriftfarbe: Schwarz
Farbe f=FCr die Balken in der Grafik: D7DDE9 (Blau)
=20
Wenn noch was fehlt oder du was brauchst sag mir Bescheid.
=20
Mit freundlichem Gru=DF=20
John Smith
Service und Support
Example Service AG & Co.
Management OHG
Someware-Str. 4
xxxxx Someware
Tel.: +49 001 000 46
Fax: +49 001 000 47
john.smith@example.com
www.example.com
OHG mit Sitz in Someware
AG: Someware - HRA XXX
Gesch=E4ftsf=FChrung: Tilman Test, Klaus J=FCrgen Test,
Bernhard Test, Ulrich Test
USt-IdNr. DE 1010101010
Pers=F6nlich haftende gesch=E4ftsf=FChrende Gesellschafterin:
Marie Test Example Stiftung, Someware
Vorstand: Rolf Test
Pers=F6nlich haftende Gesellschafterin:
Example Service AG, Someware
AG: Someware - HRB xxx
Vorstand: Marie Test
=20=
--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html;
charset=iso-8859-1
<html><head><base href=3D"x-msg://2849/"></head><body style=3D"word-wrap: =
break-word; -webkit-nbsp-mode: space; -webkit-line-break: =
after-white-space; "><span class=3D"Apple-style-span" =
style=3D"border-collapse: separate; font-family: Helvetica; font-style: =
normal; font-variant: normal; font-weight: normal; letter-spacing: =
normal; line-height: normal; orphans: 2; text-align: -webkit-auto; =
text-indent: 0px; text-transform: none; white-space: normal; widows: 2; =
word-spacing: 0px; -webkit-border-horizontal-spacing: 0px; =
-webkit-border-vertical-spacing: 0px; =
-webkit-text-decorations-in-effect: none; -webkit-text-size-adjust: =
auto; -webkit-text-stroke-width: 0px; font-size: medium; "><div =
lang=3D"DE" link=3D"blue" vlink=3D"purple"><div class=3D"Section1" =
style=3D"page: Section1; "><div style=3D"margin-top: 0cm; margin-right: =
0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
font-family: Arial, sans-serif; ">Hallo =
Martin,<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; =
"><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; ">wie besprochen hier noch die =
Daten f=FCr die Intranetseite:<o:p></o:p></span></div><div =
style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
sans-serif; "><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: =
0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
font-size: 11pt; font-family: Calibri, sans-serif; "><span =
style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
">Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht =
unterst=FCtzt oder nicht vorhanden wird Arial 11 Pt =
genommen<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; ">Schriftfarbe: =
Schwarz<o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; ">Farbe f=FCr die Balken in der =
Grafik: D7DDE9 (Blau)<o:p></o:p></span></div><div style=3D"margin-top: =
0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
font-size: 11pt; font-family: Calibri, sans-serif; "><span =
style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
"><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; ">Wenn noch was fehlt oder du was =
brauchst sag mir Bescheid.<o:p></o:p></span></div><div =
style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
sans-serif; "><o:p>&nbsp;</o:p></span></div><div style=3D"margin-top: =
0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
font-size: 11pt; font-family: Calibri, sans-serif; "><span =
style=3D"font-size: 10pt; font-family: Arial, sans-serif; ">Mit =
freundlichem Gru=DF<span =
class=3D"Apple-converted-space">&nbsp;</span><br><br>John =
Smith<br>Service und Support<br><br>Example Service AG &amp; =
Co.<o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
font-family: Arial, sans-serif; ">Management OHG<br>Someware-Str. =
4<br>xxxxx Someware<br><br></span><span style=3D"font-size: 10pt; =
font-family: Arial, sans-serif; "><o:p></o:p></span></div><div =
style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
sans-serif; "><span style=3D"font-size: 10pt; font-family: Arial, =
sans-serif; ">Tel.: +49 001 7601 462<br>Fax: +49 001 7601 =
472</span><span style=3D"font-size: 10pt; font-family: Arial, =
sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; "><span style=3D"font-size: =
10pt; font-family: Arial, sans-serif; "><a =
href=3D"mailto:john.smith@example.com" style=3D"color: blue; =
text-decoration: underline; ">john.smith@example.com</a></span><span =
style=3D"font-size: 10pt; font-family: Arial, sans-serif; =
"><o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
font-family: Calibri, sans-serif; "><span style=3D"font-size: 10pt; =
font-family: Arial, sans-serif; "><a href=3D"http://www.example.com" =
style=3D"color: blue; text-decoration: underline; =
">www.example.com</a></span><span style=3D"font-size: 10pt; font-family: =
Arial, sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: =
0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
font-size: 11pt; font-family: Calibri, sans-serif; "><span =
style=3D"font-size: 8pt; font-family: Arial, sans-serif; "><br>OHG mit =
Sitz in Someware<br>AG: Someware - HRA 4158<br>Gesch=E4ftsf=FChrung: =
Tilman Test, Klaus J=FCrgen Test,</span><span style=3D"font-size: 8pt; =
font-family: Arial, sans-serif; "><o:p></o:p></span></div><div =
style=3D"margin-top: 0cm; margin-right: 0cm; margin-left: 0cm; =
margin-bottom: 0.0001pt; font-size: 11pt; font-family: Calibri, =
sans-serif; "><span style=3D"font-size: 8pt; font-family: Arial, =
sans-serif; ">Bernhard Test, Ulrich Test<br>USt-IdNr. DE =
1010101010<br><br>Pers=F6nlich haftende gesch=E4ftsf=FChrende =
Gesellschafterin:</span><span style=3D"font-size: 8pt; font-family: =
Arial, sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: =
0cm; margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; =
font-size: 11pt; font-family: Calibri, sans-serif; "><span =
style=3D"font-size: 8pt; font-family: Arial, sans-serif; ">Marie =
Test Example Stiftung, Someware<br>Vorstand: Rolf =
Test<br><br>Pers=F6nlich haftende Gesellschafterin:</span><span =
style=3D"font-size: 8pt; font-family: Arial, sans-serif; =
"><o:p></o:p></span></div><div style=3D"margin-top: 0cm; margin-right: =
0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: 11pt; =
font-family: Calibri, sans-serif; "><span style=3D"font-size: 8pt; =
font-family: Arial, sans-serif; ">Example Service AG, =
Someware<br>AG: Someware - HRB xxx<br>Vorstand: Marie =
Test</span><span style=3D"font-size: 8pt; font-family: Arial, =
sans-serif; "><o:p></o:p></span></div><div style=3D"margin-top: 0cm; =
margin-right: 0cm; margin-left: 0cm; margin-bottom: 0.0001pt; font-size: =
11pt; font-family: Calibri, sans-serif; =
"><o:p>&nbsp;</o:p></div></div></div></span></body></html>=
--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394--

View file

@ -0,0 +1,32 @@
# encoding: utf-8
require 'test_helper'
class EmailParserTest < ActiveSupport::TestCase
test 'parse' do
files = [
{
:data => IO.read('test/fixtures/mail1.box'),
:body_md5 => 'fb6ed5070ffbb821b67b15b83239e1db',
:params => {
:from => 'John.Smith@example.com',
:from_email => 'John.Smith@example.com',
:from_display_name => nil,
:subject => 'CI Daten für PublicView ',
},
},
]
files.each { |file|
parser = Channel::EmailParser.new
data = parser.parse( file[:data] )
# create md5 of body
md5 = Digest::MD5.hexdigest( data[:plain_part] )
assert_equal( file[:body_md5], md5 )
file[:params].each { |key, value|
assert_equal( file[:params][key.to_sym], data[key.to_sym] )
}
}
end
end