Improved html2text.
This commit is contained in:
parent
2824280a30
commit
e9eae11262
4 changed files with 43 additions and 33 deletions
|
@ -124,7 +124,7 @@ class String
|
|||
string.gsub!(%r{<style(|\s.+?)>(.+?)</style>}im, '')
|
||||
|
||||
# remove empty lines
|
||||
string.gsub!(/^\s*/m, '')
|
||||
string.gsub!(/^[[:space:]]*/m, '')
|
||||
if strict
|
||||
string.gsub!(%r{< \s* (/*) \s* (b|i|ul|ol|li|u|h1|h2|h3|hr) (\s*|\s+[^>]*) >}mxi, '######\1\2######')
|
||||
end
|
||||
|
@ -138,7 +138,7 @@ class String
|
|||
}
|
||||
|
||||
# insert spaces on [A-z]\n[A-z]
|
||||
string.gsub!(/([A-z])\n([A-z])/m, '\1 \2')
|
||||
string.gsub!(/([A-z])[[:space:]]([A-z])/m, '\1 \2')
|
||||
|
||||
# remove all new lines
|
||||
string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, '')
|
||||
|
@ -219,18 +219,16 @@ class String
|
|||
end
|
||||
|
||||
# remove tailing empty spaces
|
||||
string.gsub!(/\s+\n$/, "\n")
|
||||
|
||||
# remove multiple empty lines
|
||||
string.gsub!(/\n\n\n/, "\n\n")
|
||||
|
||||
string.strip!
|
||||
string.gsub!(/[[:blank:]]+$/, '')
|
||||
|
||||
# add extracted links
|
||||
if link_list != ''
|
||||
string += "\n\n\n" + link_list
|
||||
end
|
||||
|
||||
# remove double multiple empty lines
|
||||
string.gsub!(/\n\n\n/, "\n\n")
|
||||
|
||||
string.strip
|
||||
end
|
||||
|
||||
|
|
|
@ -93,6 +93,18 @@ class AaaStringTest < ActiveSupport::TestCase
|
|||
result = 'test'
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "<div>test<br><br> <br> \n<br> \n<br> \n</div>"
|
||||
result = 'test'
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "<div>test<br><br> <br> \n<br> \n<br> \n</div>"
|
||||
result = 'test'
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "<div>test<br><br> <br> \n<br> \n<br> \n</div> "
|
||||
result = 'test'
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "<pre>test\n\ntest</pre>"
|
||||
result = "test\ntest"
|
||||
assert_equal(result, html.html2text)
|
||||
|
@ -102,7 +114,7 @@ class AaaStringTest < ActiveSupport::TestCase
|
|||
assert_equal(result, html.html2text)
|
||||
|
||||
html = '<table><tr><td>test</td><td>col</td></td></tr><tr><td>test</td><td>4711</td></tr></table>'
|
||||
result = "test col \ntest 4711"
|
||||
result = "test col\ntest 4711"
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "<p><span>Was\nsoll verbessert werden:</span></p>"
|
||||
|
@ -299,7 +311,7 @@ some text later'
|
|||
result = 'some head
|
||||
some content
|
||||
> line 1
|
||||
>
|
||||
>
|
||||
> line 2
|
||||
some text later'
|
||||
assert_equal(result, html.html2text)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue