Improved performance of html utils (do replacement in a copy of sting, replace it later once), added word markup removal.
This commit is contained in:
parent
1741bc1bd1
commit
9643e2f214
2 changed files with 102 additions and 72 deletions
|
@ -100,47 +100,62 @@ class App.Utils
|
|||
|
||||
# textWithoutTags = App.Utils.htmlRemoveTags( html )
|
||||
@htmlRemoveTags: (html) ->
|
||||
htmlTmp = $( '<div>' + html.html() + '</div>' )
|
||||
|
||||
# remove comments
|
||||
@_removeComments( htmlTmp )
|
||||
|
||||
# remove work markup
|
||||
htmlTmp = @_removeWordMarkup( htmlTmp )
|
||||
|
||||
# remove tags, keep content
|
||||
html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( ->
|
||||
htmlTmp.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( ->
|
||||
$(@).contents()
|
||||
)
|
||||
|
||||
# remove tags & content
|
||||
html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, br, hr, img, input, select, button, style, applet, canvas, script, frame, iframe').remove()
|
||||
htmlTmp.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, br, hr, img, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe').remove()
|
||||
|
||||
html
|
||||
html.html(htmlTmp)
|
||||
|
||||
# htmlOnlyWithRichtext = App.Utils.htmlRemoveRichtext( html )
|
||||
@htmlRemoveRichtext: (html) ->
|
||||
htmlTmp = $( '<div>' + html.html() + '</div>' )
|
||||
|
||||
# remove comments
|
||||
@_removeComments( html )
|
||||
@_removeComments( htmlTmp )
|
||||
|
||||
# remove style and class
|
||||
@_removeAttributes( html )
|
||||
@_removeAttributes( htmlTmp )
|
||||
|
||||
# remove work markup
|
||||
htmlTmp = @_removeWordMarkup( htmlTmp )
|
||||
|
||||
# remove tags, keep content
|
||||
html.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( ->
|
||||
htmlTmp.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( ->
|
||||
$(@).contents()
|
||||
)
|
||||
|
||||
# remove tags & content
|
||||
html.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, hr, img, input, select, button, style, applet, canvas, script, frame, iframe').remove()
|
||||
htmlTmp.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, hr, img, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe').remove()
|
||||
|
||||
html
|
||||
html.html(htmlTmp)
|
||||
|
||||
# cleanHtmlWithRichText = App.Utils.htmlCleanup( html )
|
||||
@htmlCleanup: (html) ->
|
||||
htmlTmp = $( '<div>' + html.html() + '</div>' )
|
||||
|
||||
# remove comments
|
||||
@_removeComments( html )
|
||||
@_removeComments( htmlTmp )
|
||||
|
||||
# remove style and class
|
||||
@_removeAttributes( html )
|
||||
@_removeAttributes( htmlTmp )
|
||||
|
||||
# remove work markup
|
||||
htmlTmp = @_removeWordMarkup( htmlTmp )
|
||||
|
||||
# remove tags, keep content
|
||||
html.find('a, font, small, time').replaceWith( ->
|
||||
htmlTmp.find('a, font, small, time').replaceWith( ->
|
||||
$(@).contents()
|
||||
)
|
||||
|
||||
|
@ -148,31 +163,32 @@ class App.Utils
|
|||
# New type of the tag
|
||||
replacementTag = 'div';
|
||||
|
||||
# Replace all a tags with the type of replacementTag
|
||||
html.find('h1, h2, h3, h4, h5, h6, textarea').each( ->
|
||||
# Replace all x tags with the type of replacementTag
|
||||
htmlTmp.find('h1, h2, h3, h4, h5, h6, textarea').each( ->
|
||||
outer = this.outerHTML;
|
||||
|
||||
# Replace opening tag
|
||||
regex = new RegExp('<' + this.tagName, 'i');
|
||||
newTag = outer.replace(regex, '<' + replacementTag);
|
||||
regex = new RegExp('<' + this.tagName, 'i')
|
||||
newTag = outer.replace(regex, '<' + replacementTag)
|
||||
|
||||
# Replace closing tag
|
||||
regex = new RegExp('</' + this.tagName, 'i');
|
||||
newTag = newTag.replace(regex, '</' + replacementTag);
|
||||
regex = new RegExp('</' + this.tagName, 'i')
|
||||
newTag = newTag.replace(regex, '</' + replacementTag)
|
||||
|
||||
$(@).replaceWith(newTag);
|
||||
$(@).replaceWith(newTag)
|
||||
)
|
||||
|
||||
# remove tags & content
|
||||
html.find('form, font, hr, img, input, select, button, style, applet, canvas, script, frame, iframe').remove()
|
||||
htmlTmp.find('form, font, hr, img, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe').remove()
|
||||
|
||||
html
|
||||
html.html(htmlTmp)
|
||||
|
||||
@_removeAttributes: (html) ->
|
||||
html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, h1, h2, h3, h4, h5, h6')
|
||||
html.find('*')
|
||||
.removeAttr( 'style' )
|
||||
.removeAttr( 'class' )
|
||||
.removeAttr( 'title' )
|
||||
.removeAttr( 'lang' )
|
||||
html
|
||||
|
||||
@_removeComments: (html) ->
|
||||
|
@ -182,6 +198,14 @@ class App.Utils
|
|||
)
|
||||
html
|
||||
|
||||
@_removeWordMarkup: (html) ->
|
||||
htmlTmp = html.get(0).outerHTML
|
||||
regex = new RegExp('<(/w|w)\:[A-Za-z]{3}>')
|
||||
htmlTmp = htmlTmp.replace(regex, '')
|
||||
regex = new RegExp('<(/o|o)\:[A-Za-z]{1}>')
|
||||
htmlTmp = htmlTmp.replace(regex, '')
|
||||
$(htmlTmp)
|
||||
|
||||
# signatureNeeded = App.Utils.signatureCheck( message, signature )
|
||||
@signatureCheck: (message, signature) ->
|
||||
messageText = $( '<div>' + message + '</div>' ).text().trim()
|
||||
|
|
|
@ -212,43 +212,47 @@ test( "htmlEscape", function() {
|
|||
test( "htmlRemoveTags", function() {
|
||||
|
||||
var source = "<div>test</div>"
|
||||
var should = "test"
|
||||
var should = "<div>test</div>"
|
||||
var result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div>test<!-- some comment --></div>"
|
||||
should = "<div>test</div>"
|
||||
result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<a href=\"some_link\">some link to somewhere</a>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><a href=\"some_link\">some link to somewhere</a></div>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><a href=\"some_link\">some link to somewhere</a><input value=\"should not be shown\"></div>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><a href=\"some_link\">some link to somewhere</a> <div><hr></div> <span>123</span> <img src=\"some_image\"/></div>"
|
||||
should = "some link to somewhere 123 "
|
||||
should = "<div>some link to somewhere 123 </div>"
|
||||
result = App.Utils.htmlRemoveTags( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><form class=\"xxx\">test 123</form></div>"
|
||||
should = "test 123"
|
||||
should = "<div>test 123</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><textarea class=\"xxx\">test 123</textarea></div>"
|
||||
should = "test 123"
|
||||
should = "<div>test 123</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><font size=\"3\" color=\"red\">This is some text!</font></div>"
|
||||
//should = "<div>This is some text!</div>"
|
||||
should = "This is some text!"
|
||||
should = "<div>This is some text!</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
});
|
||||
|
@ -257,145 +261,147 @@ test( "htmlRemoveTags", function() {
|
|||
test( "htmlRemoveRichtext", function() {
|
||||
|
||||
var source = "<div><!--test comment--><a href=\"test\">test</a></div>"
|
||||
var should = "test"
|
||||
var should = "<div>test</div>"
|
||||
var result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><!--[if !supportLists]--><span lang=\"DE\">1.1.1<span> </span></span><!--[endif]--><span lang=\"DE\">Description</span></div>"
|
||||
should = "<div><span>1.1.1<span> </span></span><span>Description</span></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<a href=\"some_link\">some link to somewhere</a>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><a href=\"some_link\"></a> test </div>"
|
||||
should = " test "
|
||||
should = "<div> test </div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><b></b> test </div>"
|
||||
should = " test "
|
||||
should = "<div> test </div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div><b></b> test </div></div>"
|
||||
should = "<div> test </div>"
|
||||
should = "<div><div> test </div></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div><b></b> test <input value=\"should not be shown\"></div></div>"
|
||||
should = "<div> test </div>"
|
||||
should = "<div><div> test </div></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div><b></b> test </div><span>123</span></div>"
|
||||
should = "<div> test </div><span>123</span>"
|
||||
should = "<div><div> test </div><span>123</span></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div class=\"xxx\"><b></b> test </div></div>"
|
||||
should = "<div> test </div>"
|
||||
source = "<div><div class=\"xxx\" title=\"some title\" lang=\"en\"><b></b> test </div></div>"
|
||||
should = "<div><div> test </div></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><textarea class=\"xxx\"> test </textarea></div>"
|
||||
//should = "<div> test </div>"
|
||||
should = " test "
|
||||
should = "<div> test </div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><br></div>"
|
||||
should = "<br>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div class=\"xxx\"><br></div></div>"
|
||||
should = "<div><br></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div class=\"xxx\"><br></div></div>"
|
||||
should = "<div><div><br></div></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><form class=\"xxx\">test 123</form></div>"
|
||||
//should = "<div>test 123</div>"
|
||||
should = "test 123"
|
||||
should = "<div>test 123</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><font size=\"3\" color=\"red\">This is some text!</font></div>"
|
||||
//should = "<div>This is some text!</div>"
|
||||
should = "This is some text!"
|
||||
should = "<div>This is some text!</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
|
||||
});
|
||||
|
||||
// htmlCleanup
|
||||
test( "htmlCleanup", function() {
|
||||
|
||||
var source = "<div><!--test comment--><a href=\"test\">test</a></div>"
|
||||
var should = "test"
|
||||
var should = "<div>test</div>"
|
||||
var result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<a href=\"some_link\">some link to somewhere</a>"
|
||||
should = "some link to somewhere"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><h1>some link to somewhere</h1></a>"
|
||||
//should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><h1>some link to somewhere</h1></div>"
|
||||
should = "<div><div>some link to somewhere</div></div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><small>some link to somewhere</small></a>"
|
||||
//should = "<div>some link to somewhere</div>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><time>some link to somewhere</time></a>"
|
||||
//should = "<div>some link to somewhere</div>"
|
||||
should = "some link to somewhere"
|
||||
should = "<div>some link to somewhere</div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><h1>some link to somewhere</h1><p><hr></p></div>"
|
||||
should = "<div>some link to somewhere</div><p></p><p></p>"
|
||||
source = "<div><h1>some h1 for somewhere</h1><p><hr></p></div>"
|
||||
should = "<div><div>some h1 for somewhere</div><p></p><p></p></div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><br></div>"
|
||||
should = "<br>"
|
||||
should = "<div><br></div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><div class=\"xxx\"><br></div></div>"
|
||||
should = "<div><br></div>"
|
||||
should = "<div><div><br></div></div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><form class=\"xxx\">test 123</form></div>"
|
||||
//should = "<div>test 123<br></div>"
|
||||
should = "test 123"
|
||||
should = "<div>test 123</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><form class=\"xxx\">test 123</form> some other value</div>"
|
||||
//should = "<div>ttest 123 some other value</div>"
|
||||
should = "test 123 some other value"
|
||||
should = "<div>test 123 some other value</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><form class=\"xxx\">test 123</form> some other value<input value=\"should not be shown\"></div>"
|
||||
//should = "<div>test 123 some other value</div>"
|
||||
should = "test 123 some other value"
|
||||
should = "<div>test 123 some other value</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><font size=\"3\" color=\"red\">This is some text!</font></div>"
|
||||
//should = "<div>This is some text!</div>"
|
||||
should = "This is some text!"
|
||||
should = "<div>This is some text!</div>"
|
||||
result = App.Utils.htmlRemoveRichtext( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
source = "<div><p>some link to somewhere from word<w:sdt>abc</w:sdt></p><o:p></o:p></a>"
|
||||
should = "<div><p>some link to somewhere from wordabc</p></div>"
|
||||
result = App.Utils.htmlCleanup( $(source) )
|
||||
equal( result.html(), should, source )
|
||||
|
||||
});
|
||||
|
||||
// wrap
|
||||
|
|
Loading…
Reference in a new issue