2015-10-07 18:42:29 +00:00
|
|
|
module SignatureDetection
|
|
|
|
|
|
|
|
=begin
|
|
|
|
|
|
|
|
try to detect the signature in list of articles for example
|
|
|
|
|
2016-06-28 20:49:38 +00:00
|
|
|
messages = [
|
|
|
|
{
|
|
|
|
content: 'some content',
|
|
|
|
content_type: 'text/plain',
|
|
|
|
},
|
|
|
|
]
|
|
|
|
|
|
|
|
signature = SignatureDetection.find_signature(messages)
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
signature = '...signature possible match...'
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
2016-06-28 20:49:38 +00:00
|
|
|
def self.find_signature(messages)
|
|
|
|
|
|
|
|
string_list = []
|
2017-10-01 12:25:52 +00:00
|
|
|
messages.each do |message|
|
2017-11-23 08:09:44 +00:00
|
|
|
if message[:content_type].match?(%r{text/html}i)
|
2016-06-28 20:49:38 +00:00
|
|
|
string_list.push message[:content].html2text(true)
|
|
|
|
next
|
|
|
|
end
|
|
|
|
string_list.push message[:content]
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# hash with possible signature and count of matches in string list
|
|
|
|
possible_signatures = {}
|
|
|
|
|
|
|
|
# loop all strings in array
|
2017-10-01 12:25:52 +00:00
|
|
|
string_list.each_with_index do |_main_string, main_string_index|
|
2015-10-07 18:42:29 +00:00
|
|
|
break if main_string_index + 1 > string_list.length - 1
|
|
|
|
|
|
|
|
# loop all all strings in array except of the previous index
|
2017-10-01 12:25:52 +00:00
|
|
|
( main_string_index + 1..string_list.length - 1 ).each do |second_string_index|
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# get content of string 1
|
|
|
|
string1_content = string_list[main_string_index]
|
|
|
|
|
|
|
|
# get content of string 2
|
|
|
|
string2_content = string_list[second_string_index]
|
|
|
|
|
|
|
|
# diff strings
|
|
|
|
diff_result = Diffy::Diff.new(string1_content, string2_content)
|
|
|
|
|
|
|
|
# split diff result by new line
|
|
|
|
diff_result_array = diff_result.to_s.split("\n")
|
|
|
|
|
|
|
|
# define start index for blocks with no difference
|
|
|
|
match_block = nil
|
|
|
|
|
|
|
|
# loop of lines of the diff result
|
2017-10-01 12:25:52 +00:00
|
|
|
( 0..diff_result_array.length - 1 ).each do |diff_string_index|
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# if no block with difference is defined then we try to find a string block without a difference
|
|
|
|
if !match_block
|
|
|
|
match_block = diff_string_index
|
|
|
|
end
|
|
|
|
|
|
|
|
# get line of diff result with current loop inde
|
|
|
|
line = diff_result_array[diff_string_index]
|
|
|
|
|
|
|
|
# check if the line starts with
|
|
|
|
# + = new content incoming
|
|
|
|
# - = removed content
|
|
|
|
# \ = end of file
|
|
|
|
# or if the current line is the last line of the diff result
|
|
|
|
next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1
|
|
|
|
|
2016-06-29 13:07:08 +00:00
|
|
|
# if the count of the lines without any difference is higher than 4 lines
|
|
|
|
if diff_string_index - match_block > 4
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# define the block size without any difference
|
|
|
|
# except "-" because in this case 1 line is removed to much
|
2017-11-23 08:09:44 +00:00
|
|
|
match_block_total = diff_string_index + (line.match?(/^(\\|\+)/i) ? -1 : 0)
|
2015-10-07 18:42:29 +00:00
|
|
|
|
2016-06-29 13:07:08 +00:00
|
|
|
# get string of possible signature, use only the first 10 lines
|
|
|
|
match_max_content = 0
|
2015-10-07 18:42:29 +00:00
|
|
|
match_content = ''
|
2017-10-01 12:25:52 +00:00
|
|
|
( match_block..match_block_total ).each do |match_block_index|
|
2016-06-29 13:07:08 +00:00
|
|
|
break if match_max_content == 10
|
|
|
|
match_max_content += 1
|
2015-10-07 18:42:29 +00:00
|
|
|
match_content += "#{diff_result_array[match_block_index][1..-1]}\n"
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# count the match of the signature in string list to rank
|
|
|
|
# the signature
|
|
|
|
possible_signatures[match_content] ||= 0
|
|
|
|
possible_signatures[match_content] += 1
|
|
|
|
|
2015-10-08 06:33:01 +00:00
|
|
|
break
|
2015-10-07 18:42:29 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
match_block = nil
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# loop all possible signature by rating and return highest rating
|
|
|
|
possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score|
|
|
|
|
return content.chomp
|
|
|
|
end
|
|
|
|
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
|
|
|
=begin
|
|
|
|
|
|
|
|
this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
|
|
|
|
|
2016-06-28 20:49:38 +00:00
|
|
|
signature_line = SignatureDetection.find_signature_line(signature, message, content_type)
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
signature_line = 123
|
|
|
|
|
|
|
|
or
|
|
|
|
|
|
|
|
signature_line = nil
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
2016-06-28 20:49:38 +00:00
|
|
|
def self.find_signature_line(signature, string, content_type)
|
|
|
|
|
2017-11-23 08:09:44 +00:00
|
|
|
if content_type.match?(%r{text/html}i)
|
2016-06-28 20:49:38 +00:00
|
|
|
string = string.html2text(true)
|
|
|
|
end
|
2015-10-07 18:42:29 +00:00
|
|
|
|
|
|
|
# try to find the char position of the signature
|
|
|
|
search_position = string.index(signature)
|
|
|
|
|
|
|
|
return if search_position.nil?
|
|
|
|
|
|
|
|
# count new lines up to signature
|
2015-10-08 06:33:01 +00:00
|
|
|
string[0..search_position].split("\n").length + 1
|
|
|
|
end
|
|
|
|
|
|
|
|
=begin
|
|
|
|
|
2016-06-30 09:53:13 +00:00
|
|
|
find signature line of message by user and article
|
|
|
|
|
|
|
|
signature_line = SignatureDetection.find_signature_line_by_article(user, article)
|
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
signature_line = 123
|
|
|
|
|
|
|
|
or
|
|
|
|
|
|
|
|
signature_line = nil
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
2016-06-30 11:37:13 +00:00
|
|
|
def self.find_signature_line_by_article(user, article)
|
2016-06-30 09:53:13 +00:00
|
|
|
return if !user.preferences[:signature_detection]
|
|
|
|
SignatureDetection.find_signature_line(
|
|
|
|
user.preferences[:signature_detection],
|
|
|
|
article.body,
|
|
|
|
article.content_type,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
=begin
|
|
|
|
|
2015-10-08 06:33:01 +00:00
|
|
|
this function will search for a signature string in all articles of a given user_id
|
|
|
|
|
|
|
|
signature = SignatureDetection.by_user_id(user_id)
|
|
|
|
|
|
|
|
returns
|
2015-10-07 18:42:29 +00:00
|
|
|
|
2015-10-08 06:33:01 +00:00
|
|
|
signature = '...signature possible match...'
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
|
|
|
def self.by_user_id(user_id)
|
2015-10-08 14:06:20 +00:00
|
|
|
type = Ticket::Article::Type.lookup(name: 'email')
|
|
|
|
sender = Ticket::Article::Sender.lookup(name: 'Customer')
|
2016-06-28 20:49:38 +00:00
|
|
|
tickets = Ticket.where(
|
|
|
|
created_by_id: user_id,
|
|
|
|
create_article_type_id: type.id,
|
|
|
|
create_article_sender_id: sender.id
|
|
|
|
).limit(5).order(id: :desc)
|
2015-10-08 06:33:01 +00:00
|
|
|
article_bodies = []
|
2017-10-01 12:25:52 +00:00
|
|
|
tickets.each do |ticket|
|
2015-10-08 06:33:01 +00:00
|
|
|
article = ticket.articles.first
|
2015-10-08 15:13:41 +00:00
|
|
|
next if !article
|
2016-06-28 20:49:38 +00:00
|
|
|
data = {
|
|
|
|
content: article.body,
|
|
|
|
content_type: article.content_type,
|
|
|
|
}
|
|
|
|
article_bodies.push data
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-10-08 08:42:13 +00:00
|
|
|
|
2016-04-22 06:55:10 +00:00
|
|
|
find_signature(article_bodies)
|
2015-10-07 18:42:29 +00:00
|
|
|
end
|
2015-10-08 06:33:01 +00:00
|
|
|
|
|
|
|
=begin
|
|
|
|
|
|
|
|
rebuild signature for each user
|
|
|
|
|
2015-10-08 08:46:28 +00:00
|
|
|
SignatureDetection.rebuild_all_user
|
2015-10-08 06:33:01 +00:00
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
true/false
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
2015-10-08 08:46:28 +00:00
|
|
|
def self.rebuild_all_user
|
2017-10-01 12:25:52 +00:00
|
|
|
User.select('id').where(active: true).order(id: :desc).each do |local_user|
|
2015-10-08 09:05:39 +00:00
|
|
|
rebuild_user(local_user.id)
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-10-08 09:05:39 +00:00
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
=begin
|
2015-10-08 08:42:13 +00:00
|
|
|
|
2016-04-22 06:55:10 +00:00
|
|
|
rebuild signature detection for user
|
2015-10-08 08:42:13 +00:00
|
|
|
|
2015-10-08 14:06:20 +00:00
|
|
|
SignatureDetection.rebuild_user(user_id)
|
2015-10-08 09:05:39 +00:00
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
true/false
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
|
|
|
def self.rebuild_user(user_id)
|
|
|
|
signature_detection = by_user_id(user_id)
|
|
|
|
return if !signature_detection
|
|
|
|
|
|
|
|
user = User.find(user_id)
|
|
|
|
return if user.preferences[:signature_detection] == signature_detection
|
|
|
|
|
|
|
|
user.preferences[:signature_detection] = signature_detection
|
|
|
|
user.save
|
2015-10-08 08:42:13 +00:00
|
|
|
|
2015-10-08 06:33:01 +00:00
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2015-10-08 08:42:13 +00:00
|
|
|
=begin
|
|
|
|
|
|
|
|
rebuild signature for all articles
|
|
|
|
|
|
|
|
SignatureDetection.rebuild_all_articles
|
|
|
|
|
|
|
|
returns
|
|
|
|
|
|
|
|
true/false
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
|
|
|
def self.rebuild_all_articles
|
|
|
|
|
2015-10-08 14:06:20 +00:00
|
|
|
article_type = Ticket::Article::Type.lookup(name: 'email')
|
2017-10-01 12:25:52 +00:00
|
|
|
Ticket::Article.select('id').where(type_id: article_type.id).order(id: :desc).each do |local_article|
|
2015-10-08 14:06:20 +00:00
|
|
|
article = Ticket::Article.find(local_article.id)
|
2015-10-08 08:42:13 +00:00
|
|
|
user = User.find(article.created_by_id)
|
|
|
|
next if !user.preferences[:signature_detection]
|
|
|
|
|
2016-06-28 20:49:38 +00:00
|
|
|
signature_line = find_signature_line(
|
|
|
|
user.preferences[:signature_detection],
|
|
|
|
article.body,
|
|
|
|
article.content_type,
|
|
|
|
)
|
2015-10-08 08:42:13 +00:00
|
|
|
next if !signature_line
|
|
|
|
next if article.preferences[:signature_detection] == signature_line
|
|
|
|
|
|
|
|
article.preferences[:signature_detection] = signature_line
|
|
|
|
article.save
|
2017-10-01 12:25:52 +00:00
|
|
|
end
|
2015-10-08 08:42:13 +00:00
|
|
|
true
|
|
|
|
end
|
|
|
|
|
2015-10-07 18:42:29 +00:00
|
|
|
end
|