Improved html2text to remove html comments.
This commit is contained in:
parent
a881e21f10
commit
1c1398d60c
2 changed files with 16 additions and 0 deletions
|
@ -88,6 +88,9 @@ class String
|
|||
string = string.chars.select(&:valid_encoding?).join
|
||||
end
|
||||
|
||||
# remove html comments
|
||||
string.gsub!(/<!--.+?-->/m, '')
|
||||
|
||||
# find <a href=....> and replace it with [x]
|
||||
link_list = ''
|
||||
counter = 0
|
||||
|
|
|
@ -141,6 +141,19 @@ class AaaStringTest < ActiveSupport::TestCase
|
|||
result = "test\n\n___"
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = "Ihr RZ-Team<br />
|
||||
<br />
|
||||
<!--[if gte mso 9]><xml> <o:DocumentProperties> <o:Author>test</o:Author> =
|
||||
<o:Template>A75DB76E.dotm</o:Template> <o:LastAuthor>test</o:LastAuthor> =
|
||||
<o:Revision>5</o:Revision> <o:Created>2011-05-18T07:08:00Z</o:Created> <=
|
||||
o:LastSaved>2011-07-04T17:59:00Z</o:LastSaved> <o:Pages>1</o:Pages> <o:Wo=
|
||||
rds>189</o:Words> <o:Characters>1192</o:Characters> <o:Lines>9</o:Lines> =
|
||||
<o:Paragraphs>2</o:Paragraphs> <o:CharactersWithSpaces>1379</o:Characters=
|
||||
WithSpaces> <o:Version>11.5606</o:Version> </o:DocumentProperties></xml><!=
|
||||
[endif]-->"
|
||||
result = 'Ihr RZ-Team'
|
||||
assert_equal(result, html.html2text)
|
||||
|
||||
html = ' line 1<br>
|
||||
you<br/>
|
||||
-----&'
|
||||
|
|
Loading…
Reference in a new issue