Added proving backend to detect signatures by diff module 'diffy' to reduce redundancy in article views.
This commit is contained in:
parent
52e7ab2e1c
commit
7a7d9471c9
6 changed files with 198 additions and 45 deletions
2
Gemfile
2
Gemfile
|
@ -62,6 +62,8 @@ gem 'browser'
|
||||||
gem 'eventmachine'
|
gem 'eventmachine'
|
||||||
gem 'em-websocket'
|
gem 'em-websocket'
|
||||||
|
|
||||||
|
gem 'diffy'
|
||||||
|
|
||||||
# Gems used only for develop/test and not required
|
# Gems used only for develop/test and not required
|
||||||
# in production environments by default.
|
# in production environments by default.
|
||||||
group :development, :test do
|
group :development, :test do
|
||||||
|
|
|
@ -76,6 +76,7 @@ GEM
|
||||||
delayed_job_active_record (4.1.0)
|
delayed_job_active_record (4.1.0)
|
||||||
activerecord (>= 3.0, < 5)
|
activerecord (>= 3.0, < 5)
|
||||||
delayed_job (>= 3.0, < 5)
|
delayed_job (>= 3.0, < 5)
|
||||||
|
diffy (3.0.7)
|
||||||
docile (1.1.5)
|
docile (1.1.5)
|
||||||
eco (1.0.0)
|
eco (1.0.0)
|
||||||
coffee-script
|
coffee-script
|
||||||
|
@ -302,6 +303,7 @@ DEPENDENCIES
|
||||||
coffee-script-source
|
coffee-script-source
|
||||||
daemons
|
daemons
|
||||||
delayed_job_active_record
|
delayed_job_active_record
|
||||||
|
diffy
|
||||||
eco
|
eco
|
||||||
em-websocket
|
em-websocket
|
||||||
eventmachine
|
eventmachine
|
||||||
|
|
122
lib/signature_detection.rb
Normal file
122
lib/signature_detection.rb
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
module SignatureDetection
|
||||||
|
|
||||||
|
=begin
|
||||||
|
|
||||||
|
try to detect the signature in list of articles for example
|
||||||
|
|
||||||
|
signature = SignatureDetection.find_signature(string_list)
|
||||||
|
|
||||||
|
returns
|
||||||
|
|
||||||
|
signature = '...signature possible match...'
|
||||||
|
|
||||||
|
=end
|
||||||
|
|
||||||
|
def self.find_signature(string_list)
|
||||||
|
|
||||||
|
# hash with possible signature and count of matches in string list
|
||||||
|
possible_signatures = {}
|
||||||
|
|
||||||
|
# loop all strings in array
|
||||||
|
#for main_string_index in 0 .. string_list.length - 1
|
||||||
|
( 0..string_list.length - 1 ).each {|main_string_index|
|
||||||
|
break if main_string_index + 1 > string_list.length - 1
|
||||||
|
|
||||||
|
# loop all all strings in array except of the previous index
|
||||||
|
( main_string_index + 1..string_list.length - 1 ).each {|second_string_index|
|
||||||
|
|
||||||
|
# get content of string 1
|
||||||
|
string1_content = string_list[main_string_index]
|
||||||
|
|
||||||
|
# get content of string 2
|
||||||
|
string2_content = string_list[second_string_index]
|
||||||
|
|
||||||
|
# diff strings
|
||||||
|
diff_result = Diffy::Diff.new(string1_content, string2_content)
|
||||||
|
|
||||||
|
# split diff result by new line
|
||||||
|
diff_result_array = diff_result.to_s.split("\n")
|
||||||
|
|
||||||
|
# define start index for blocks with no difference
|
||||||
|
match_block = nil
|
||||||
|
|
||||||
|
# loop of lines of the diff result
|
||||||
|
( 0..diff_result_array.length - 1 ).each {|diff_string_index|
|
||||||
|
|
||||||
|
# if no block with difference is defined then we try to find a string block without a difference
|
||||||
|
if !match_block
|
||||||
|
match_block = diff_string_index
|
||||||
|
end
|
||||||
|
|
||||||
|
# get line of diff result with current loop inde
|
||||||
|
line = diff_result_array[diff_string_index]
|
||||||
|
|
||||||
|
# check if the line starts with
|
||||||
|
# + = new content incoming
|
||||||
|
# - = removed content
|
||||||
|
# \ = end of file
|
||||||
|
# or if the current line is the last line of the diff result
|
||||||
|
next if line !~ /^(\\|\+|\-)/i && diff_string_index != diff_result_array.length - 1
|
||||||
|
|
||||||
|
# if the count of the lines without any difference is higher than 5 lines
|
||||||
|
if diff_string_index - match_block > 5
|
||||||
|
|
||||||
|
# define the block size without any difference
|
||||||
|
# except "-" because in this case 1 line is removed to much
|
||||||
|
match_block_total = diff_string_index + (line =~ /^(\\|\+)/i ? -1 : 0)
|
||||||
|
|
||||||
|
# get string of possible signature
|
||||||
|
match_content = ''
|
||||||
|
( match_block..match_block_total ).each {|match_block_index|
|
||||||
|
match_content += "#{diff_result_array[match_block_index][1..-1]}\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
# count the match of the signature in string list to rank
|
||||||
|
# the signature
|
||||||
|
possible_signatures[match_content] ||= 0
|
||||||
|
possible_signatures[match_content] += 1
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
match_block = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# loop all possible signature by rating and return highest rating
|
||||||
|
possible_signatures.sort { |a1, a2| a2[1].to_i <=> a1[1].to_i }.map do |content, _score|
|
||||||
|
return content.chomp
|
||||||
|
end
|
||||||
|
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
=begin
|
||||||
|
|
||||||
|
this function will search for a signature string in a string (e.g. article) and return the line number of the signature start
|
||||||
|
|
||||||
|
signature_line = SignatureDetection.find_signature_line(signature, string)
|
||||||
|
|
||||||
|
returns
|
||||||
|
|
||||||
|
signature_line = 123
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
signature_line = nil
|
||||||
|
|
||||||
|
=end
|
||||||
|
|
||||||
|
def self.find_signature_line(signature, string)
|
||||||
|
|
||||||
|
# try to find the char position of the signature
|
||||||
|
search_position = string.index(signature)
|
||||||
|
|
||||||
|
return if search_position.nil?
|
||||||
|
|
||||||
|
# count new lines up to signature
|
||||||
|
search_newlines = string[0..search_position].split("\n").length + 1
|
||||||
|
|
||||||
|
search_newlines
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,5 +1,7 @@
|
||||||
Hi,
|
Hi,
|
||||||
|
|
||||||
|
123
|
||||||
|
|
||||||
uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll.
|
uns liegt die fachliche Anforderung vor, dass eine Agent-AddNote-Benachrichtigung für die beiden o. g. TicketHistory-Typen versendet werden soll.
|
||||||
|
|
||||||
Das Modul Custom/Kernel/System/Ticket/Article.pm sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder?
|
Das Modul Custom/Kernel/System/Ticket/Article.pm sieht diese Benachrichtigungen nach meinem Verständnis bisher nicht vor. Dafür wäre doch eine Codeerweiterung erforderlich, oder?
|
||||||
|
|
|
@ -1,7 +1,21 @@
|
||||||
Hi Martin,
|
Hi Martin,
|
||||||
|
|
||||||
|
123
|
||||||
|
|
||||||
ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen).
|
ich benötige von Dir eine Aufwandschätzung für ein Upgrade von x.1 auf x.5 (wir hatten schon mal diesbezüglich informiert, jetzt wollen die Entscheider Zahlen sehen).
|
||||||
|
|
||||||
|
asd
|
||||||
|
fa
|
||||||
|
sdf
|
||||||
|
a
|
||||||
|
sdf
|
||||||
|
asd
|
||||||
|
f
|
||||||
|
as
|
||||||
|
df
|
||||||
|
asd
|
||||||
|
f
|
||||||
|
|
||||||
Vielen Dank!
|
Vielen Dank!
|
||||||
|
|
||||||
Mit freundlichen Grüßen
|
Mit freundlichen Grüßen
|
||||||
|
|
|
@ -6,61 +6,72 @@ class EmailSignaturDetectionTest < ActiveSupport::TestCase
|
||||||
test 'test case I - sender a' do
|
test 'test case I - sender a' do
|
||||||
|
|
||||||
# fixtures of sender a
|
# fixtures of sender a
|
||||||
fixture_files = [
|
fixture_files = {
|
||||||
'email_signature_detection/client_a_1.txt',
|
'email_signature_detection/client_a_1.txt' => { line: 10 },
|
||||||
'email_signature_detection/client_a_2.txt',
|
'email_signature_detection/client_a_2.txt' => { line: 20 },
|
||||||
'email_signature_detection/client_a_3.txt',
|
'email_signature_detection/client_a_3.txt' => { line: 6 },
|
||||||
]
|
|
||||||
|
|
||||||
# detect signature
|
|
||||||
match_structure = ''
|
|
||||||
|
|
||||||
# tests
|
|
||||||
# 'email_signature_detection/client_a_1.txt'
|
|
||||||
result_should = {
|
|
||||||
line: 9
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 'email_signature_detection/client_a_2.txt'
|
fixture_files_string_list = []
|
||||||
result_should = {
|
|
||||||
line: 7
|
|
||||||
}
|
|
||||||
|
|
||||||
# 'email_signature_detection/client_a_3.txt'
|
fixture_files.keys.each do |filepath|
|
||||||
result_should = {
|
|
||||||
line: 7
|
file_content = ''
|
||||||
}
|
|
||||||
assert(true)
|
file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r')
|
||||||
|
while (line = file.gets)
|
||||||
|
file_content += line
|
||||||
|
end
|
||||||
|
file.close
|
||||||
|
|
||||||
|
fixture_files[filepath][:content] = file_content
|
||||||
|
fixture_files_string_list.push(file_content)
|
||||||
|
end
|
||||||
|
|
||||||
|
signature = SignatureDetection.find_signature(fixture_files_string_list)
|
||||||
|
expected_signature = "\nMit freundlichen Grüßen\n\nBob Smith\nBerechtigungen und dez. Department\n________________________________\n\nMusik AG\nBerechtigungen und dez. Department (ITPBM)\nKastanien 2\n12345 Hornhausen\nTel.: +49 911 6760\nFax: +49 911 85 6760\nMobil: +49 173 911\nE-Mail: Bob.Smith@music.com\nhttp://www.music.com\n\nMusik AG | Kastanien 2 | 12345 Hornhausen\nSitz der AG: Hornhausen, HRB xxxxx | USt.-ID: DE 111222333444\nVorstand: Marc Smith, Weber Huber\nAufsichtsrat: Max Mix (Vors.)"
|
||||||
|
assert_equal(expected_signature, signature)
|
||||||
|
|
||||||
|
fixture_files.keys.each do |filepath|
|
||||||
|
expected_signature_position = fixture_files[filepath][:line]
|
||||||
|
|
||||||
|
assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
test 'test case II - sender b' do
|
test 'test case II - sender b' do
|
||||||
|
|
||||||
# fixtures of sender a
|
fixture_files = {
|
||||||
fixture_files = [
|
'email_signature_detection/client_b_1.txt' => { line: 26 },
|
||||||
'email_signature_detection/client_b_1.txt',
|
'email_signature_detection/client_b_2.txt' => { line: 4 },
|
||||||
'email_signature_detection/client_b_2.txt',
|
'email_signature_detection/client_b_3.txt' => { line: 6 },
|
||||||
'email_signature_detection/client_b_3.txt',
|
|
||||||
]
|
|
||||||
|
|
||||||
# detect signature
|
|
||||||
match_structure = ''
|
|
||||||
|
|
||||||
# tests
|
|
||||||
# 'email_signature_detection/client_b_1.txt'
|
|
||||||
result_should = {
|
|
||||||
line: 27
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 'email_signature_detection/client_b_2.txt'
|
fixture_files_string_list = []
|
||||||
result_should = {
|
|
||||||
line: 5
|
|
||||||
}
|
|
||||||
|
|
||||||
# 'email_signature_detection/client_b_3.txt'
|
fixture_files.keys.each do |filepath|
|
||||||
result_should = {
|
|
||||||
line: 7
|
file_content = ''
|
||||||
}
|
|
||||||
assert(true)
|
file = File.new("#{Rails.root}/test/fixtures/#{filepath}", 'r')
|
||||||
|
while (line = file.gets)
|
||||||
|
file_content += line
|
||||||
|
end
|
||||||
|
file.close
|
||||||
|
|
||||||
|
fixture_files[filepath][:content] = file_content
|
||||||
|
fixture_files_string_list.push(file_content)
|
||||||
|
end
|
||||||
|
|
||||||
|
signature = SignatureDetection.find_signature(fixture_files_string_list)
|
||||||
|
expected_signature = "\nFreundliche Grüße\n\nGünter Lässig\nLokale Daten\n\nMusic GmbH\nBaustraße 123, 12345 Max City\nTelefon 0123 5432114\nTelefax 0123 5432139\nE-Mail Günter.Lässig@example.com<mailto:Günter.Lässig@example.com>\n\nExample. Zusammen für eine bessere Welt.\n[cid:image001.png@01CE92A6.EC495B60]<http://www.example.com/>\n\n[cid:image002.png@01CE92A6.EC495B60]<http://www.facebook.com/example.com>\n\n[cid:image003.png@01CE92A6.EC495B60]<http://twitter.com/example>\n\n[cid:image004.png@01CE92A6.EC495B60]<https://www.xing.com/companies/example/neu-example>\n\n[cid:image005.jpg@01CE92A6.EC495B60]<http://www.youtube.com/example>\n\n[cid:image006.png@01CE92A6.EC495B60]<http://www.example.com/no_cache/privatkunden/aktuelles/news-presse/newsletter.html>\n\nSitz der Gesellschaft: Max City, Amtsgericht Max City HRB Nr. 1234\nGeschäftsführer: Bob Smith\nVorsitzender des Aufsichtsrats: Alex Marx"
|
||||||
|
assert_equal(expected_signature, signature)
|
||||||
|
|
||||||
|
fixture_files.keys.each do |filepath|
|
||||||
|
expected_signature_position = fixture_files[filepath][:line]
|
||||||
|
|
||||||
|
assert_equal(expected_signature_position, SignatureDetection.find_signature_line(signature, fixture_files[filepath][:content]))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue