From 2a1e9dd65d79ebe522b4a02e8f52527724568723 Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Fri, 4 May 2012 13:33:05 +0200 Subject: [PATCH] Improved email parsing (charset handling). --- app/models/channel/email_parser.rb | 129 +++++++++++-------- app/models/channel/imap.rb | 4 +- app/models/channel/pop3.rb | 2 +- test/fixtures/mail1.box | 194 +++++++++++++++++++++++++++++ test/unit/email_parser_test.rb | 32 +++++ 5 files changed, 308 insertions(+), 53 deletions(-) create mode 100644 test/fixtures/mail1.box create mode 100644 test/unit/email_parser_test.rb diff --git a/app/models/channel/email_parser.rb b/app/models/channel/email_parser.rb index 6a3af4c2e..28f95dc7c 100644 --- a/app/models/channel/email_parser.rb +++ b/app/models/channel/email_parser.rb @@ -1,24 +1,79 @@ require 'mail' - +require 'iconv' class Channel::EmailParser - - def parse (channel, msg) + def conv (charset, string) + if charset == 'US-ASCII' then + charset = 'LATIN1' + end + Iconv.conv("UTF8", charset, string) + end + + def parse (msg) + data = {} mail = Mail.new( msg ) - from_email = Mail::Address.new( mail[:from].value ).address - from_display_name = Mail::Address.new( mail[:from].value ).display_name + + # headers + data[:from_email] = Mail::Address.new( mail[:from].value ).address + data[:from_display_name] = Mail::Address.new( mail[:from].value ).display_name + ['from', 'to', 'cc', 'subject'].each {|key| + data[key.to_sym] = mail[key] ? conv( mail[key].charset || 'LATIN1', mail[key].to_s) : nil + } + + # message id + data[:message_id] = mail['message_id'] ? mail['message_id'].to_s : nil + + # body + # plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded + # html_part = message.html_part ? message.html_part.body.decoded : nil + data[:plain_part] = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded + data[:plain_part] = conv( mail.body.charset || 'LATIN1', data[:plain_part] ) + + # attachments + if mail.attachments + data[:attachments] = [] + mail.attachments.each do |attachment| + + # get file preferences + headers = {} + attachment.header.fields.each do |f| + headers[f.name] = f.value + end + headers_store = {} + headers_store['Mime-Type'] = attachment.mime_type + if attachment.charset + headers_store['Charset'] = attachment.charset + end + ['Content-ID', 'Content-Type'].each do |item| + if headers[item] + headers_store[item] = headers[item] + end + end + attachment = { + :data => attachment.body.decoded, + :filename => attachment.filename, + :preferences => headers_store + } + data[:attachments].push attachment + end + end + return data + end + + def process(channel, msg) + mail = parse( msg ) # use transaction ActiveRecord::Base.transaction do - user = User.where( :email => from_email ).first + user = User.where( :email => mail[:from_email] ).first if !user then puts 'create user...' roles = Role.where( :name => 'Customer' ) user = User.create( - :login => from_email, - :firstname => from_display_name, + :login => mail[:from_email], + :firstname => mail[:from_display_name], :lastname => '', - :email => from_email, + :email => mail[:from_email], :password => '', :active => true, :roles => roles, @@ -29,16 +84,9 @@ class Channel::EmailParser # set current user UserInfo.current_user_id = user.id - def conv (charset, string) - if charset == 'US-ASCII' then - charset = 'LATIN1' - end - Iconv.conv("UTF8", charset, string) - end - # get ticket# from subject - ticket = Ticket.number_check( mail[:subject].value ) - + ticket = Ticket.number_check( mail[:subject] ) + # set ticket state to open if not new if ticket ticket_state = Ticket::State.find( ticket.ticket_state_id ) @@ -54,7 +102,7 @@ class Channel::EmailParser ticket = Ticket.create( :group_id => channel[:group_id], :customer_id => user.id, - :title => conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s), + :title => mail[:subject], :ticket_state_id => Ticket::State.where(:name => 'new').first.id, :ticket_priority_id => Ticket::Priority.where(:name => '2 normal').first.id, :created_by_id => user.id @@ -62,19 +110,17 @@ class Channel::EmailParser end # import mail - plain_part = mail.multipart? ? (mail.text_part ? mail.text_part.body.decoded : nil) : mail.body.decoded - # html_part = message.html_part ? message.html_part.body.decoded : nil article = Ticket::Article.create( :created_by_id => user.id, :ticket_id => ticket.id, :ticket_article_type_id => Ticket::Article::Type.where(:name => 'email').first.id, :ticket_article_sender_id => Ticket::Article::Sender.where(:name => 'Customer').first.id, - :body => conv(mail.body.charset || 'LATIN1', plain_part), - :from => mail['from'] ? conv(mail['from'].charset || 'LATIN1', mail['from'].to_s) : nil, - :to => mail['to'] ? conv(mail['to'].charset || 'LATIN1', mail['to'].to_s) : nil, - :cc => mail['cc'] ? conv(mail['cc'].charset || 'LATIN1', mail['cc'].to_s) : nil, - :subject => mail['subject'] ? conv(mail['subject'].charset || 'LATIN1', mail['subject'].to_s) : nil, - :message_id => mail['message_id'] ? mail['message_id'].to_s : nil, + :body => mail[:plain_part], + :from => mail[:from], + :to => mail[:to], + :cc => mail[:cc], + :subject => mail[:subject], + :message_id => mail[:message_id], :internal => false ) @@ -88,35 +134,18 @@ class Channel::EmailParser ) # store attachments - if mail.attachments - mail.attachments.each do |attachment| - - # get file preferences - headers = {} - attachment.header.fields.each do |f| - headers[f.name] = f.value - end - headers_store = {} - headers_store['Mime-Type'] = attachment.mime_type - if attachment.charset - headers_store['Charset'] = attachment.charset - end - ['Content-ID', 'Content-Type'].each do |item| - if headers[item] - headers_store[item] = headers[item] - end - end - - # store file + if mail[:attachments] + mail[:attachments].each do |attachment| Store.add( :object => 'Ticket::Article', :o_id => article.id, - :data => attachment.body.decoded, - :filename => attachment.filename, - :preferences => headers_store + :data => attachment[:data], + :filename => attachment[:filename], + :preferences => attachment[:preferences] ) end end + return ticket, article, user end # execute ticket events diff --git a/app/models/channel/imap.rb b/app/models/channel/imap.rb index 408de5881..1e851af24 100644 --- a/app/models/channel/imap.rb +++ b/app/models/channel/imap.rb @@ -6,7 +6,7 @@ class Channel::IMAP < Channel::EmailParser def fetch (channel) puts "fetching imap (#{channel[:options][:host]}/#{channel[:options][:user]})" - imap = Net::IMAP.new(channel[:options][:host], 993, true ) + imap = Net::IMAP.new(channel[:options][:host], 993, true, nil, false ) imap.authenticate('LOGIN', channel[:options][:user], channel[:options][:password]) imap.select('INBOX') count = 0 @@ -18,7 +18,7 @@ class Channel::IMAP < Channel::EmailParser # puts msg.to_s # delete email from server after article was created - if parse(channel, msg) + if process(channel, msg) imap.store(message_id, "+FLAGS", [:Deleted]) end end diff --git a/app/models/channel/pop3.rb b/app/models/channel/pop3.rb index 86dfc3f30..f974bfaa1 100644 --- a/app/models/channel/pop3.rb +++ b/app/models/channel/pop3.rb @@ -16,7 +16,7 @@ class Channel::POP3 < Channel::EmailParser puts " - message #{count.to_s}/#{count_all.to_s}" # delete email from server after article was created - if parse(channel, m.pop) + if process(channel, m.pop) m.delete end end diff --git a/test/fixtures/mail1.box b/test/fixtures/mail1.box new file mode 100644 index 000000000..4ca220f9e --- /dev/null +++ b/test/fixtures/mail1.box @@ -0,0 +1,194 @@ +From martin@example.com Thu May 3 12:04:29 2012 +Return-Path: +X-Original-To: info@example.com +Delivered-To: box@samba.example.com +Received: from me.home (1-2-1-1.adsl.highway.example.com [1.2.1.1]) + by samba.example.com (Postfix) with ESMTPSA id C96F8500D3D + for ; Thu, 3 May 2012 12:04:28 +0100 (BST) +Subject: =?iso-8859-1?Q?CI_Daten_f=FCr_PublicView_?= +Mime-Version: 1.0 (Apple Message framework v1257) +Content-Type: multipart/alternative; boundary="Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394" +From: +Resent-From: Martin Test +Date: Thu, 3 May 2012 11:36:43 +0200 +Resent-Date: Thu, 3 May 2012 13:04:31 +0200 +Resent-To: info@example.com +Message-Id: <053EA3703574649ABDAF24D43A05604F327A130@MEMASFRK004.example.com> +To: +X-Mailer: Apple Mail (2.1257) +Status: RO +X-Status: +X-Keywords: +X-UID: 82 + + +--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/plain; + charset=iso-8859-1 + +Hallo Martin, +=20 +wie besprochen hier noch die Daten f=FCr die Intranetseite: +=20 +Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht unterst=FCtzt = +oder nicht vorhanden wird Arial 11 Pt genommen +Schriftfarbe: Schwarz +Farbe f=FCr die Balken in der Grafik: D7DDE9 (Blau) +=20 +Wenn noch was fehlt oder du was brauchst sag mir Bescheid. +=20 +Mit freundlichem Gru=DF=20 + +John Smith +Service und Support + +Example Service AG & Co. +Management OHG +Someware-Str. 4 +xxxxx Someware + +Tel.: +49 001 000 46 +Fax: +49 001 000 47 +john.smith@example.com +www.example.com + +OHG mit Sitz in Someware +AG: Someware - HRA XXX +Gesch=E4ftsf=FChrung: Tilman Test, Klaus J=FCrgen Test, +Bernhard Test, Ulrich Test +USt-IdNr. DE 1010101010 + +Pers=F6nlich haftende gesch=E4ftsf=FChrende Gesellschafterin: +Marie Test Example Stiftung, Someware +Vorstand: Rolf Test + +Pers=F6nlich haftende Gesellschafterin: +Example Service AG, Someware +AG: Someware - HRB xxx +Vorstand: Marie Test +=20= + +--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394 +Content-Transfer-Encoding: quoted-printable +Content-Type: text/html; + charset=iso-8859-1 + +
Hallo = +Martin,
 
wie besprochen hier noch die = +Daten f=FCr die Intranetseite:
 
Schriftart/-gr=F6=DFe: Verdana 11 Pt wenn von Browser nicht = +unterst=FCtzt oder nicht vorhanden wird Arial 11 Pt = +genommen
Schriftfarbe: = +Schwarz
Farbe f=FCr die Balken in der = +Grafik: D7DDE9 (Blau)
 
Wenn noch was fehlt oder du was = +brauchst sag mir Bescheid.
 
Mit = +freundlichem Gru=DF 

John = +Smith
Service und Support

Example Service AG & = +Co.
Management OHG
Someware-Str. = +4
xxxxx Someware

Tel.: +49 001 7601 462
Fax: +49 001 7601 = +472

OHG mit = +Sitz in Someware
AG: Someware - HRA 4158
Gesch=E4ftsf=FChrung: = +Tilman Test, Klaus J=FCrgen Test,
Bernhard Test, Ulrich Test
USt-IdNr. DE = +1010101010

Pers=F6nlich haftende gesch=E4ftsf=FChrende = +Gesellschafterin:
Marie = +Test Example Stiftung, Someware
Vorstand: Rolf = +Test

Pers=F6nlich haftende Gesellschafterin:
Example Service AG, = +Someware
AG: Someware - HRB xxx
Vorstand: Marie = +Test
 
= + +--Apple-Mail=_EB2F27C4-F4CD-40C9-82F1-D115D4FFA394-- + diff --git a/test/unit/email_parser_test.rb b/test/unit/email_parser_test.rb new file mode 100644 index 000000000..e2a47b0c8 --- /dev/null +++ b/test/unit/email_parser_test.rb @@ -0,0 +1,32 @@ +# encoding: utf-8 +require 'test_helper' + +class EmailParserTest < ActiveSupport::TestCase + test 'parse' do + files = [ + { + :data => IO.read('test/fixtures/mail1.box'), + :body_md5 => 'fb6ed5070ffbb821b67b15b83239e1db', + :params => { + :from => 'John.Smith@example.com', + :from_email => 'John.Smith@example.com', + :from_display_name => nil, + :subject => 'CI Daten für PublicView ', + }, + }, + ] + + files.each { |file| + + parser = Channel::EmailParser.new + data = parser.parse( file[:data] ) + + # create md5 of body + md5 = Digest::MD5.hexdigest( data[:plain_part] ) + assert_equal( file[:body_md5], md5 ) + file[:params].each { |key, value| + assert_equal( file[:params][key.to_sym], data[key.to_sym] ) + } + } + end +end \ No newline at end of file