Improved html2text.

This commit is contained in:
Martin Edenhofer 2016-01-13 11:03:34 +01:00
parent 6bb8c7b6eb
commit 7434971416
2 changed files with 41 additions and 34 deletions

View file

@ -103,6 +103,9 @@ class String
# remove style tags with content # remove style tags with content
string.gsub!( %r{<style(|\s.+?)>(.+?)</style>}im, '') string.gsub!( %r{<style(|\s.+?)>(.+?)</style>}im, '')
# insert spaces on [A-z]\n[A-z]
string.gsub!( /([A-z])\n([A-z])/m, '\1 \2' )
# remove empty lines # remove empty lines
string.gsub!( /^\s*/m, '' ) string.gsub!( /^\s*/m, '' )

View file

@ -7,123 +7,127 @@ class AaaStringTest < ActiveSupport::TestCase
modul = 'test' modul = 'test'
result = 'test' result = 'test'
modul.to_filename modul.to_filename
assert_equal( result, modul ) assert_equal(result, modul)
modul = 'Some::File' modul = 'Some::File'
result = 'Some::File' result = 'Some::File'
modul.to_filename modul.to_filename
assert_equal( result, modul ) assert_equal(result, modul)
end end
test 'to_filename function' do test 'to_filename function' do
modul = 'test' modul = 'test'
result = 'test' result = 'test'
assert_equal( result, modul.to_filename ) assert_equal(result, modul.to_filename)
modul = 'Some::File' modul = 'Some::File'
result = 'some/file' result = 'some/file'
assert_equal( result, modul.to_filename ) assert_equal(result, modul.to_filename)
end end
test 'to_classname ref' do test 'to_classname ref' do
modul = 'test' modul = 'test'
result = 'test' result = 'test'
modul.to_filename modul.to_filename
assert_equal( result, modul ) assert_equal(result, modul)
modul = 'some/file' modul = 'some/file'
result = 'some/file' result = 'some/file'
modul.to_filename modul.to_filename
assert_equal( result, modul ) assert_equal(result, modul)
end end
test 'to_classname function' do test 'to_classname function' do
modul = 'test' modul = 'test'
result = 'Test' result = 'Test'
assert_equal( result, modul.to_classname ) assert_equal(result, modul.to_classname)
modul = 'some/file' modul = 'some/file'
result = 'Some::File' result = 'Some::File'
assert_equal( result, modul.to_classname ) assert_equal(result, modul.to_classname)
modul = 'some/files' modul = 'some/files'
result = 'Some::Files' result = 'Some::Files'
assert_equal( result, modul.to_classname ) assert_equal(result, modul.to_classname)
modul = 'some_test/files' modul = 'some_test/files'
result = 'SomeTest::Files' result = 'SomeTest::Files'
assert_equal( result, modul.to_classname ) assert_equal(result, modul.to_classname)
end end
test 'html2text ref' do test 'html2text ref' do
html = 'test' html = 'test'
result = 'test' result = 'test'
html.html2text html.html2text
assert_equal( result, html ) assert_equal(result, html)
html = '<div>test</div>' html = '<div>test</div>'
result = '<div>test</div>' result = '<div>test</div>'
html.html2text html.html2text
assert_equal( result, html ) assert_equal(result, html)
end end
test 'html2text function' do test 'html2text function' do
html = 'test' html = 'test'
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = ' test ' html = ' test '
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "\n\n test \n\n\n" html = "\n\n test \n\n\n"
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = '<div>test</div>' html = '<div>test</div>'
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = '<div>test<br></div>' html = '<div>test<br></div>'
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<div>test<br><br><br>\n<br>\n<br>\n</div>" html = "<div>test<br><br><br>\n<br>\n<br>\n</div>"
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<pre>test\n\ntest</pre>" html = "<pre>test\n\ntest</pre>"
result = "test\ntest" result = "test\ntest"
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<code>test\n\ntest</code>" html = "<code>test\n\ntest</code>"
result = "test\ntest" result = "test\ntest"
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = '<table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>' html = '<table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>'
result = "test col \ntest 4711" result = "test col \ntest 4711"
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<p><span>Was\nsoll verbessert werden:</span></p>"
result = 'Was soll verbessert werden:'
assert_equal(result, html.html2text)
html = "<!-- some comment --> html = "<!-- some comment -->
<div> <div>
test<br><br><br>\n<br>\n<br>\n test<br><br><br>\n<br>\n<br>\n
</div>" </div>"
result = 'test' result = 'test'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "\n<div><a href=\"http://zammad.org\">Best Tool of the World</a> html = "\n<div><a href=\"http://zammad.org\">Best Tool of the World</a>
some other text</div> some other text</div>
<div>" <div>"
result = "[1] Best Tool of the Worldsome other text\n\n\n[1] http://zammad.org" result = "[1] Best Tool of the Worldsome other text\n\n\n[1] http://zammad.org"
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<!-- some comment --> html = "<!-- some comment -->
<div> <div>
test<br><br><br>\n<hr/>\n<br>\n test<br><br><br>\n<hr/>\n<br>\n
</div>" </div>"
result = "test\n\n___" result = "test\n\n___"
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = ' line&nbsp;1<br> html = ' line&nbsp;1<br>
you<br/> you<br/>
@ -131,12 +135,12 @@ you<br/>
should = 'line 1 should = 'line 1
you you
-----&' -----&'
assert_equal( should, html.html2text ) assert_equal( should, html.html2text)
html = ' <ul><li>#1</li><li>#2</li></ul>' html = ' <ul><li>#1</li><li>#2</li></ul>'
should = '* #1 should = '* #1
* #2' * #2'
assert_equal( should, html.html2text ) assert_equal( should, html.html2text)
html = '<!DOCTYPE html> html = '<!DOCTYPE html>
<html> <html>
@ -151,7 +155,7 @@ you
> >
> Thank you for installing Zammad. > Thank you for installing Zammad.
>' >'
assert_equal( should, html.html2text ) assert_equal( should, html.html2text)
html = ' <style type="text/css"> html = ' <style type="text/css">
body { body {
@ -186,7 +190,7 @@ ont-size: 12px;;
</style><p>some other content</p>' </style><p>some other content</p>'
should = 'some other content' should = 'some other content'
assert_equal( should, html.html2text ) assert_equal( should, html.html2text)
html = ' IT-Infrastruktur</span><br> html = ' IT-Infrastruktur</span><br>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
@ -253,7 +257,7 @@ div.wordsection1
<o:idmap v:ext="edit" data="1" /> <o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->' </o:shapelayout></xml><![endif]-->'
should = 'IT-Infrastruktur' should = 'IT-Infrastruktur'
assert_equal( should, html.html2text ) assert_equal( should, html.html2text)
html = "<h1>some head</h1> html = "<h1>some head</h1>
some content some content
@ -268,7 +272,7 @@ some content
> line 2 > line 2
some text later' some text later'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<h1>some head</h1> html = "<h1>some head</h1>
some content some content
@ -283,7 +287,7 @@ some content
> line 2 > line 2
some text later' some text later'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<h1>some head</h1> html = "<h1>some head</h1>
some content some content
@ -298,7 +302,7 @@ some content
> >
> line 2 > line 2
some text later' some text later'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<p>Best regards,</p> html = "<p>Best regards,</p>
<p><i>Your Team Team</i></p> <p><i>Your Team Team</i></p>
@ -312,7 +316,7 @@ P.S.: You receive this e-mail because you are listed in our database as person w
[1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx' [1] http://www.teamviewer.example/en/company/unsubscribe.aspx?id=1009645&ident=xxx'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
html = "<div><br>Dave and leaned her html = "<div><br>Dave and leaned her
days adam.</div><span style=\"color:#F7F3FF; font-size:8px\">Maybe we days adam.</div><span style=\"color:#F7F3FF; font-size:8px\">Maybe we
@ -325,7 +329,7 @@ Maybe we want any help me that.
Next morning charlie saw at their father. Next morning charlie saw at their father.
Well as though adam took out here. Melvin will be more money. Called him into this one last thing. Well as though adam took out here. Melvin will be more money. Called him into this one last thing.
Men-----------------------' Men-----------------------'
assert_equal( result, html.html2text ) assert_equal(result, html.html2text)
end end