From 9643e2f2149c5b76526559621452b2373b831492 Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Wed, 5 Aug 2015 11:40:50 +0200 Subject: [PATCH] Improved performance of html utils (do replacement in a copy of sting, replace it later once), added word markup removal. --- .../app/lib/app_post/utils.js.coffee | 66 +++++++---- public/assets/tests/html-utils.js | 108 +++++++++--------- 2 files changed, 102 insertions(+), 72 deletions(-) diff --git a/app/assets/javascripts/app/lib/app_post/utils.js.coffee b/app/assets/javascripts/app/lib/app_post/utils.js.coffee index a3096c1bc..064d7bf6a 100644 --- a/app/assets/javascripts/app/lib/app_post/utils.js.coffee +++ b/app/assets/javascripts/app/lib/app_post/utils.js.coffee @@ -100,47 +100,62 @@ class App.Utils # textWithoutTags = App.Utils.htmlRemoveTags( html ) @htmlRemoveTags: (html) -> + htmlTmp = $( '
' + html.html() + '
' ) + + # remove comments + @_removeComments( htmlTmp ) + + # remove work markup + htmlTmp = @_removeWordMarkup( htmlTmp ) # remove tags, keep content - html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( -> + htmlTmp.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( -> $(@).contents() ) # remove tags & content - html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, br, hr, img, input, select, button, style, applet, canvas, script, frame, iframe').remove() + htmlTmp.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, br, hr, img, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe').remove() - html + html.html(htmlTmp) # htmlOnlyWithRichtext = App.Utils.htmlRemoveRichtext( html ) @htmlRemoveRichtext: (html) -> + htmlTmp = $( '
' + html.html() + '
' ) # remove comments - @_removeComments( html ) + @_removeComments( htmlTmp ) # remove style and class - @_removeAttributes( html ) + @_removeAttributes( htmlTmp ) + + # remove work markup + htmlTmp = @_removeWordMarkup( htmlTmp ) # remove tags, keep content - html.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( -> + htmlTmp.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6').replaceWith( -> $(@).contents() ) # remove tags & content - html.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, hr, img, input, select, button, style, applet, canvas, script, frame, iframe').remove() + htmlTmp.find('li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, form, textarea, font, address, table, thead, tbody, tr, td, h1, h2, h3, h4, h5, h6, hr, img, input, select, button, style, applet, embed, noframes, canvas, script, frame, iframe').remove() - html + html.html(htmlTmp) # cleanHtmlWithRichText = App.Utils.htmlCleanup( html ) @htmlCleanup: (html) -> + htmlTmp = $( '
' + html.html() + '
' ) # remove comments - @_removeComments( html ) + @_removeComments( htmlTmp ) # remove style and class - @_removeAttributes( html ) + @_removeAttributes( htmlTmp ) + + # remove work markup + htmlTmp = @_removeWordMarkup( htmlTmp ) # remove tags, keep content - html.find('a, font, small, time').replaceWith( -> + htmlTmp.find('a, font, small, time').replaceWith( -> $(@).contents() ) @@ -148,31 +163,32 @@ class App.Utils # New type of the tag replacementTag = 'div'; - # Replace all a tags with the type of replacementTag - html.find('h1, h2, h3, h4, h5, h6, textarea').each( -> + # Replace all x tags with the type of replacementTag + htmlTmp.find('h1, h2, h3, h4, h5, h6, textarea').each( -> outer = this.outerHTML; # Replace opening tag - regex = new RegExp('<' + this.tagName, 'i'); - newTag = outer.replace(regex, '<' + replacementTag); + regex = new RegExp('<' + this.tagName, 'i') + newTag = outer.replace(regex, '<' + replacementTag) # Replace closing tag - regex = new RegExp(' - html.find('div, span, p, li, ul, ol, a, b, u, i, label, small, strong, strike, pre, code, center, blockquote, h1, h2, h3, h4, h5, h6') + html.find('*') .removeAttr( 'style' ) .removeAttr( 'class' ) .removeAttr( 'title' ) + .removeAttr( 'lang' ) html @_removeComments: (html) -> @@ -182,6 +198,14 @@ class App.Utils ) html + @_removeWordMarkup: (html) -> + htmlTmp = html.get(0).outerHTML + regex = new RegExp('<(/w|w)\:[A-Za-z]{3}>') + htmlTmp = htmlTmp.replace(regex, '') + regex = new RegExp('<(/o|o)\:[A-Za-z]{1}>') + htmlTmp = htmlTmp.replace(regex, '') + $(htmlTmp) + # signatureNeeded = App.Utils.signatureCheck( message, signature ) @signatureCheck: (message, signature) -> messageText = $( '
' + message + '
' ).text().trim() diff --git a/public/assets/tests/html-utils.js b/public/assets/tests/html-utils.js index 18e42afc1..b603ee1b0 100644 --- a/public/assets/tests/html-utils.js +++ b/public/assets/tests/html-utils.js @@ -212,43 +212,47 @@ test( "htmlEscape", function() { test( "htmlRemoveTags", function() { var source = "
test
" - var should = "test" + var should = "
test
" var result = App.Utils.htmlRemoveTags( $(source) ) equal( result.html(), should, source ) + source = "
test
" + should = "
test
" + result = App.Utils.htmlRemoveTags( $(source) ) + equal( result.html(), should, source ) + source = "some link to somewhere" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlRemoveTags( $(source) ) equal( result.html(), should, source ) source = "
some link to somewhere
" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlRemoveTags( $(source) ) equal( result.html(), should, source ) source = "
some link to somewhere
" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlRemoveTags( $(source) ) equal( result.html(), should, source ) source = "
some link to somewhere

123
" - should = "some link to somewhere 123 " + should = "
some link to somewhere 123
" result = App.Utils.htmlRemoveTags( $(source) ) equal( result.html(), should, source ) source = "
test 123
" - should = "test 123" + should = "
test 123
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
" - should = "test 123" + should = "
test 123
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
This is some text!
" - //should = "
This is some text!
" - should = "This is some text!" + should = "
This is some text!
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) }); @@ -257,145 +261,147 @@ test( "htmlRemoveTags", function() { test( "htmlRemoveRichtext", function() { var source = "
test
" - var should = "test" + var should = "
test
" var result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) + source = "
1.1.1     Description
" + should = "
1.1.1     Description
" + result = App.Utils.htmlRemoveRichtext( $(source) ) + equal( result.html(), should, source ) + source = "some link to somewhere" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test
" - should = " test " + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test
" - should = " test " + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test
" - should = "
test
" + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test
" - should = "
test
" + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test
123
" - should = "
test
123" + should = "
test
123
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) - source = "
test
" - should = "
test
" + source = "
test
" + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
" - //should = "
test
" - should = " test " + should = "
test
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "

" - should = "
" - result = App.Utils.htmlRemoveRichtext( $(source) ) - equal( result.html(), should, source ) - - source = "

" should = "

" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) + source = "

" + should = "

" + result = App.Utils.htmlRemoveRichtext( $(source) ) + equal( result.html(), should, source ) + source = "
test 123
" - //should = "
test 123
" - should = "test 123" + should = "
test 123
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
This is some text!
" - //should = "
This is some text!
" - should = "This is some text!" + should = "
This is some text!
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) - }); // htmlCleanup test( "htmlCleanup", function() { var source = "
test
" - var should = "test" + var should = "
test
" var result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) source = "some link to somewhere" - should = "some link to somewhere" - result = App.Utils.htmlCleanup( $(source) ) - equal( result.html(), should, source ) - - source = "

some link to somewhere

" + //should = "some link to somewhere" should = "
some link to somewhere
" result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) + source = "

some link to somewhere

" + should = "
some link to somewhere
" + result = App.Utils.htmlCleanup( $(source) ) + equal( result.html(), should, source ) + source = "
some link to somewhere" - //should = "
some link to somewhere
" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) source = "
" - //should = "
some link to somewhere
" - should = "some link to somewhere" + should = "
some link to somewhere
" result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) - source = "

some link to somewhere


" - should = "
some link to somewhere

" + source = "

some h1 for somewhere


" + should = "
some h1 for somewhere

" result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) source = "

" - should = "
" + should = "

" result = App.Utils.htmlCleanup( $(source) ) equal( result.html(), should, source ) source = "

" - should = "

" + should = "

" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test 123
" //should = "
test 123
" - should = "test 123" + should = "
test 123
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test 123
some other value
" - //should = "
ttest 123 some other value
" - should = "test 123 some other value" + should = "
test 123 some other value
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
test 123
some other value
" - //should = "
test 123 some other value
" - should = "test 123 some other value" + should = "
test 123 some other value
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) source = "
This is some text!
" - //should = "
This is some text!
" - should = "This is some text!" + should = "
This is some text!
" result = App.Utils.htmlRemoveRichtext( $(source) ) equal( result.html(), should, source ) + source = "

some link to somewhere from wordabc

" + should = "

some link to somewhere from wordabc

" + result = App.Utils.htmlCleanup( $(source) ) + equal( result.html(), should, source ) + }); // wrap