Added proving backend to detect signatures by diff module 'diffy' to reduce redundancy in article views.

This commit is contained in:
Rolf Schmidt 2015-10-07 20:42:29 +02:00
parent 52e7ab2e1c
commit 7a7d9471c9
6 changed files with 198 additions and 45 deletions

View file

@ -62,6 +62,8 @@ gem 'browser'
gem 'eventmachine' gem 'eventmachine'
gem 'em-websocket' gem 'em-websocket'
gem 'diffy'
# Gems used only for develop/test and not required # Gems used only for develop/test and not required
# in production environments by default. # in production environments by default.
group :development, :test do group :development, :test do

View file

@ -76,6 +76,7 @@ GEM
delayed_job_active_record (4.1.0) delayed_job_active_record (4.1.0)
activerecord (>= 3.0, < 5) activerecord (>= 3.0, < 5)
delayed_job (>= 3.0, < 5) delayed_job (>= 3.0, < 5)
diffy (3.0.7)
docile (1.1.5) docile (1.1.5)
eco (1.0.0) eco (1.0.0)
coffee-script coffee-script
@ -302,6 +303,7 @@ DEPENDENCIES
coffee-script-source coffee-script-source
daemons daemons
delayed_job_active_record delayed_job_active_record
diffy
eco eco
em-websocket em-websocket
eventmachine eventmachine

122
lib/signature_detection.rb Normal file
View file

@ -0,0 +1,122 @@
module SignatureDetection
=begin
try to detect the signature in list of articles for example
signature = SignatureDetection.find_signature(string_list)
returns
signature = '...signature possible match...'
=end
def self.find_signature(string_list)
# hash with possible signature and count of matches in string list
possible_signatures = {}
# loop all strings in array
#for main_string_index in 0 .. string_list.length - 1
( 0..string_list.length - 1 ).each {|main_string_index|
break if main_string_index + 1 > string_list.length - 1
# loop all all strings in array except of the previous index
( main_string_index + 1..string_list.length - 1 ).each {|second_string_index|
# get content of string 1
string1_content = string_list[main_string_index]
# get content of string 2
string2_content = string_list[second_string_index]
# diff strings
diff_result = Diffy::Diff.new(string1_content, string2_content)
# split diff result by new line
diff_result_array = diff_result.to_s.split("\n")
# define start index for blocks with no difference
match_block = nil
# loop of lines of the diff result
( 0..diff_result_array.length - 1 ).each {|diff_string_index|
# if no block with difference is defined then we try to find a string block without a difference
if !match_block
match_block = diff_string_index
end
# get line of diff result with current loop inde
line = diff_result_array[diff_string_index]
# check if the line starts with
# + = new content incoming
# - = removed content
# \ = end of file
# or if the current line is the last line of the diff result
next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1
# if the count of the lines without any difference is higher than 5 lines
if diff_string_index - match_block > 5
# define the block size without any difference
# except "-" because in this case 1 line is removed to much
match_block_total = diff_string_index + (line =~ /^(\\|\+)/i ? -1 : 0)
# get string of possible signature
match_content = ''
( match_block..match_block_total ).each {|match_block_index|
match_content += "#{diff_result_array[match_block_index][1..-1]}\n"
}
# count the match of the signature in string list to rank
# the signature
possible_signatures[match_content] ||= 0
possible_signatures[match_content] += 1
end
match_block = nil
}
}
}
# loop all possible signature by rating and return highest rating
possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score|
return content.chomp
end
nil
end
=begin
this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
signature_line = SignatureDetection.find_signature_line(signature, string)
returns
signature_line = 123
or
signature_line = nil
=end
def self.find_signature_line(signature, string)
# try to find the char position of the signature
search_position = string.index(signature)
return if search_position.nil?
# count new lines up to signature
search_newlines = string[0..search_position].split("\n").length + 1
search_newlines
end
end

View file

@ -1,5 +1,7 @@
Hi, Hi,
123
uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll. uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll.
Das Modul Custom/Kernel/System/Ticket/Article.pm sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder? Das Modul Custom/Kernel/System/Ticket/Article.pm sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder?

View file

@ -1,7 +1,21 @@
Hi Martin, Hi Martin,
123
ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen). ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen).
asd
fa
sdf
a
sdf
asd
f
as
df
asd
f
Vielen Dank! Vielen Dank!
Mit freundlichen Grüßen Mit freundlichen Grüßen

View file

@ -6,61 +6,72 @@ class EmailSignaturDetectionTest < ActiveSupport::TestCase
test 'test case I - sender a' do test 'test case I - sender a' do
# fixtures of sender a # fixtures of sender a
fixture_files = [ fixture_files = {
'email_signature_detection/client_a_1.txt', 'email_signature_detection/client_a_1.txt' => { line: 10 },
'email_signature_detection/client_a_2.txt', 'email_signature_detection/client_a_2.txt' => { line: 20 },
'email_signature_detection/client_a_3.txt', 'email_signature_detection/client_a_3.txt' => { line: 6 },
]
# detect signature
match_structure = ''
# tests
# 'email_signature_detection/client_a_1.txt'
result_should = {
line: 9
} }
# 'email_signature_detection/client_a_2.txt' fixture_files_string_list = []
result_should = {
line: 7
}
# 'email_signature_detection/client_a_3.txt' fixture_files.keys.each do |filepath|
result_should = {
line: 7 file_content = ''
}
assert(true) file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r')
while (line = file.gets)
file_content += line
end
file.close
fixture_files[filepath][:content] = file_content
fixture_files_string_list.push(file_content)
end
signature = SignatureDetection.find_signature(fixture_files_string_list)
expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2\n12345 Hornhausen\nTel.: +49 911 6760\nFax: +49 911 85 6760\nMobil: +49 173 911\nE-Mail: Bob.Smith@music.com\nhttp://www.music.com\n\nMusik AG | Kastanien 2 | 12345 Hornhausen\nSitz der AG: Hornhausen, HRB xxxxx | USt.-ID: DE 111222333444\nVorstand: Marc Smith, Weber Huber\nAufsichtsrat: Max Mix (Vors.)"
assert_equal(expected_signature, signature)
fixture_files.keys.each do |filepath|
expected_signature_position = fixture_files[filepath][:line]
assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
end
end end
test 'test case II - sender b' do test 'test case II - sender b' do
# fixtures of sender a fixture_files = {
fixture_files = [ 'email_signature_detection/client_b_1.txt' => { line: 26 },
'email_signature_detection/client_b_1.txt', 'email_signature_detection/client_b_2.txt' => { line: 4 },
'email_signature_detection/client_b_2.txt', 'email_signature_detection/client_b_3.txt' => { line: 6 },
'email_signature_detection/client_b_3.txt',
]
# detect signature
match_structure = ''
# tests
# 'email_signature_detection/client_b_1.txt'
result_should = {
line: 27
} }
# 'email_signature_detection/client_b_2.txt' fixture_files_string_list = []
result_should = {
line: 5
}
# 'email_signature_detection/client_b_3.txt' fixture_files.keys.each do |filepath|
result_should = {
line: 7 file_content = ''
}
assert(true) file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r')
while (line = file.gets)
file_content += line
end
file.close
fixture_files[filepath][:content] = file_content
fixture_files_string_list.push(file_content)
end
signature = SignatureDetection.find_signature(fixture_files_string_list)
expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139\nE-Mail Günter.Lässig@example.com<mailto:Günter.Lässig@example.com>\n\nExample. Zusammen für eine bessere Welt.\n[cid:image001.png@01CE92A6.EC495B60]<http://www.example.com/>\n\n[cid:image002.png@01CE92A6.EC495B60]<http://www.facebook.com/example.com>\n\n[cid:image003.png@01CE92A6.EC495B60]<http://twitter.com/example>\n\n[cid:image004.png@01CE92A6.EC495B60]<https://www.xing.com/companies/example/neu-example>\n\n[cid:image005.jpg@01CE92A6.EC495B60]<http://www.youtube.com/example>\n\n[cid:image006.png@01CE92A6.EC495B60]<http://www.example.com/no_cache/privatkunden/aktuelles/news-presse/newsletter.html>\n\nSitz der Gesellschaft: Max City, Amtsgericht Max City HRB Nr. 1234\nGeschäftsführer: Bob Smith\nVorsitzender des Aufsichtsrats: Alex Marx"
assert_equal(expected_signature, signature)
fixture_files.keys.each do |filepath|
expected_signature_position = fixture_files[filepath][:line]
assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
end
end end
end end