From 060051b710900acd85d6a223e3c423516dcd2b75 Mon Sep 17 00:00:00 2001 From: Martin Edenhofer Date: Sat, 29 Apr 2017 11:44:35 +0200 Subject: [PATCH] Improved html sanitizer for layout option / empty lines. --- config/initializers/html_sanitizer.rb | 28 +++++++++++-------- lib/core_ext/string.rb | 6 ++-- test/unit/aaa_string_test.rb | 21 +++++++++----- test/unit/email_parser_test.rb | 40 +++++++++++++-------------- test/unit/email_process_test.rb | 2 +- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/config/initializers/html_sanitizer.rb b/config/initializers/html_sanitizer.rb index 6866152ca..98520ca92 100644 --- a/config/initializers/html_sanitizer.rb +++ b/config/initializers/html_sanitizer.rb @@ -57,10 +57,11 @@ Rails.application.config.html_sanitizer_css_properties_whitelist = { color ), 'table' => %w( - background-color color - padding margin + background-color color font-size vertical-align + margin margin-top margin-right margin-bottom margin-left + padding padding-top padding-right padding-bottom padding-left text-align - border border-collapse border-style + border border-collapse border-style border-spacing border-top-width border-right-width @@ -73,10 +74,11 @@ Rails.application.config.html_sanitizer_css_properties_whitelist = { border-left-color ), 'th' => %w( - background-color color - padding margin + background-color color font-size vertical-align + margin margin-top margin-right margin-bottom margin-left + padding padding-top padding-right padding-bottom padding-left text-align - border border-collapse border-style + border border-collapse border-style border-spacing border-top-width border-right-width @@ -89,10 +91,11 @@ Rails.application.config.html_sanitizer_css_properties_whitelist = { border-left-color ), 'tr' => %w( - background-color color - padding margin + background-color color font-size vertical-align + margin margin-top margin-right margin-bottom margin-left + padding padding-top padding-right padding-bottom padding-left text-align - border border-collapse border-style + border border-collapse border-style border-spacing border-top-width border-right-width @@ -105,10 +108,11 @@ Rails.application.config.html_sanitizer_css_properties_whitelist = { border-left-color ), 'td' => %w( - background-color color - padding margin + background-color color font-size vertical-align + margin margin-top margin-right margin-bottom margin-left + padding padding-top padding-right padding-bottom padding-left text-align - border border-collapse border-style + border border-collapse border-style border-spacing border-top-width border-right-width diff --git a/lib/core_ext/string.rb b/lib/core_ext/string.rb index 5a8bf0a3b..6552f3e99 100644 --- a/lib/core_ext/string.rb +++ b/lib/core_ext/string.rb @@ -302,13 +302,15 @@ class String string.gsub!(/######SIGNATURE_MARKER######/, '') return string.chomp end - string.gsub!(%r{(

[[:space:]]*

([[:space:]]*)){2,}}im, '

\2') + string.gsub!(%r{(

[[:space:]]*

([[:space:]]*)){2,}}im, '

 

\2') string.gsub!(%r\
[[:space:]]*(([[:space:]]*)){2,}\im, '

\3') string.gsub!(%r\[[:space:]]*(
[[:space:]]*){3,}[[:space:]]*
\im, '

') - string.gsub!(%r\
[[:space:]]*(
[[:space:]]*){1,}[[:space:]]*
\im, '
') + string.gsub!(%r\
[[:space:]]*(
[[:space:]]*){1,}[[:space:]]*
\im, '
 
') + string.gsub!(%r\
[[:space:]]*(
[[:space:]]*{1,}
[[:space:]]*){2,}
\im, '
 
') string.gsub!(%r\

[[:space:]]*

([[:space:]]*){2,}[[:space:]]*\im, '


') string.gsub!(%r{

[[:space:]]*

([[:space:]]*)+

[[:space:]]*

}im, '

') string.gsub!(%r\(
[[:space:]]*
[[:space:]]*){2,}\im, '
') + string.gsub!(%r{
 
[[:space:]]*(
 
){1,}}im, '
 
') string.gsub!(/(
[[:space:]]*){3,}/im, '

') string.gsub!(%r\([[:space:]]*){3,}\im, '

') string.gsub!(%r{

[[:space:]]+

}im, '

 

') diff --git a/test/unit/aaa_string_test.rb b/test/unit/aaa_string_test.rb index 8530c2a2a..20d7766d0 100644 --- a/test/unit/aaa_string_test.rb +++ b/test/unit/aaa_string_test.rb @@ -466,22 +466,30 @@ Men-----------------------' result = "
test 123
" assert_equal(result, html.html2html_strict) + html = "
" + result = "
" + assert_equal(result, html.html2html_strict) + + html = "

" + result = "
 
" + assert_equal(result, html.html2html_strict) + html = "

" result = "

 

" assert_equal(result, html.html2html_strict) html = "
" - result = "
\n
" + result = "
 
" assert_equal(result, html.html2html_strict) html = "
" - result = "
\n
" + result = "
 
" assert_equal(result, html.html2html_strict) html = "


" - result = "
" + result = "
 
" assert_equal(result, html.html2html_strict) html = '
@@ -787,7 +795,7 @@ html.html2html_strict assert_equal(result, html.html2html_strict) html = '



' - result = '
' + result = '
 
' assert_equal(result, html.html2html_strict) html = '

abc

' @@ -914,7 +922,7 @@ html.html2html_strict assert_equal(result, html.html2html_strict) html = '
Wir brauchen also die Instanz example.zammad.com, kann die aber nicht mehr nutzen.

Bitte um Freischaltung.


' - result = '
Wir brauchen also die Instanz example.zammad.com, kann die aber nicht mehr nutzen.
Bitte um Freischaltung.
' + result = '
Wir brauchen also die Instanz example.zammad.com, kann die aber nicht mehr nutzen.
 
Bitte um Freischaltung.
 
' assert_equal(result, html.html2html_strict) html = '

oh jeee … Zauberwort vergessen ;-) Können Sie mir @@ -947,7 +955,7 @@ html.html2html_strict html = "

Dear Bob:Mr/Mrs

We are one of the leading manufacturer and supplier of conduits and cars since 3000.

Could you inform me the specification you need?

May I sent you our products catalogues for your reference?

Best regards!

Welcome to our booth B11/1 Hall 13 during SOMEWHERE\n9999.
Bob Smith
Exp. & Imp.
Town Example Electric Co., Ltd.
Tel: 0000-11-12345678 (Ext-220)  Fax: 0000-11-12345678 
Room1234, NO. 638, Smith Road, Town, 200000, Somewhere
Web: www.example.com
" result = "
-
Dear Bob:Mr/Mrs
We are one of the leading manufacturer and supplier of conduits and cars since 3000.
Could you inform me the specification you need?
May I sent you our products catalogues for your reference?
Best regards!
Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.
+
Dear Bob:Mr/Mrs
 
We are one of the leading manufacturer and supplier of conduits and cars since 3000.
 
Could you inform me the specification you need?
 
May I sent you our products catalogues for your reference?
 
Best regards!
 
Welcome to our booth B11/1 Hall 13 during SOMEWHERE 9999.
Bob Smith
Exp. & Imp.
Town Example Electric Co., Ltd.
Tel: 0000-11-12345678 (Ext-220) Fax: 0000-11-12345678
Room1234, NO. 638, Smith Road, Town, 200000, Somewhere
" @@ -955,7 +963,6 @@ html.html2html_strict html = '
  • Luxemburg
  • ' result = '
  • Luxemburg (http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnY25pLGUsdXJ0b3NVTGVpNWZ8fGZh)
  • ' -html.html2html_strict assert_equal(result, html.html2html_strict) end diff --git a/test/unit/email_parser_test.rb b/test/unit/email_parser_test.rb index 5f837cca3..1031ea657 100644 --- a/test/unit/email_parser_test.rb +++ b/test/unit/email_parser_test.rb @@ -321,7 +321,7 @@ Managing Director: Martin Edenhofer }, { data: IO.binread('test/fixtures/mail11.box'), - body_md5: 'fe7f9431109785573b9e68888815aca0', + body_md5: 'b211c9c28282ad0dd3fccbbf37d9928d', attachments: [ { md5: '08660cd33ce8c64b95bcf0207ff6c4d6', @@ -336,12 +336,12 @@ Managing Director: Martin Edenhofer subject: 'Eine schöne Adventszeit für ZNUNY GMBH - ENTERPRISE SERVICES FÜR OTRS', to: 'enjoy_us@znuny.com', content_type: 'text/html', - body: " + body: "

    -http://newsletters.cylex.de/ref/www.cylex.de/sid-105/uid-4134001/lid-2/http://web2.cylex.de/advent2012?b2b

    Lieber CYLEX Eintragsinhaber,

    das Jahr neigt sich dem Ende und die besinnliche Zeit beginnt laut Kalender mit dem
    1. Advent. Und wie immer wird es in der vorweihnachtlichen Zeit meist beruflich und privat
    so richtig schön hektisch.

    Um Ihre Weihnachtsstimmung in Schwung zu bringen kommen wir nun mit unserem Adventskalender ins Spiel. Denn 24 Tage werden Sie unsere netten Geschichten, Rezepte und Gewinnspiele sowie ausgesuchte Geschenktipps und Einkaufsgutscheine online begleiten. Damit lässt sich Ihre Freude auf das Fest garantiert mit jedem Tag steigern.

    +http://newsletters.cylex.de/ref/www.cylex.de/sid-105/uid-4134001/lid-2/http://web2.cylex.de/advent2012?b2b

    Lieber CYLEX Eintragsinhaber,

    das Jahr neigt sich dem Ende und die besinnliche Zeit beginnt laut Kalender mit dem
    1. Advent. Und wie immer wird es in der vorweihnachtlichen Zeit meist beruflich und privat
    so richtig schön hektisch.

    Um Ihre Weihnachtsstimmung in Schwung zu bringen kommen wir nun mit unserem Adventskalender ins Spiel. Denn 24 Tage werden Sie unsere netten Geschichten, Rezepte und Gewinnspiele sowie ausgesuchte Geschenktipps und Einkaufsgutscheine online begleiten. Damit lässt sich Ihre Freude auf das Fest garantiert mit jedem Tag steigern.

    @@ -356,7 +356,7 @@ Managing Director: Martin Edenhofer P.S. Damit Sie keinen Tag versäumen, empfehlen wir Ihnen den Link des Adventkalenders (http://newsletters.cylex.de/ref/www.cylex.de/sid-105/uid-4134001/lid-3/http://web2.cylex.de/advent2012?b2b) in
    Ihrer Lesezeichen-Symbolleiste zu ergänzen.

     

    -
    Einen gemütlichen Start in die Adventszeit wünscht Ihnen
    +
    Impressum
    S.C. CYLEX INTERNATIONAL S.N.C.
    Sat. Palota 119/A RO 417516 Palota Romania
    Tel.: +49 208/62957-0 |
    Geschäftsführer: Francisc Osvald
    Handelsregister: J05/1591/2009
    USt.IdNr.: RO26332771
    @@ -496,7 +496,7 @@ Managing Director: Martin Edenhofer }, { data: IO.binread('test/fixtures/mail19.box'), - body_md5: 'a02be0f16aa3727768710046c709a368', + body_md5: '812cd2870e97d806eef0ca5a44823333', params: { from: '"我" <>', from_email: '"我" <>', @@ -507,7 +507,7 @@ Managing Director: Martin Edenhofer }, { data: IO.binread('test/fixtures/mail20.box'), - body_md5: 'e66aa7a952ff92ca5b499e7572186054', + body_md5: '7cdfb67ce7bf914fa0a5b85f0a365fdc', params: { from: 'Health and Care-Mall ', from_email: 'drugs-cheapest8@sicor.com', @@ -516,7 +516,7 @@ Managing Director: Martin Edenhofer to: 'info2@znuny.com', body: "________________________________________________________________________Yeah but even when they. Beth liî ed her neck as well
    - +
    @@ -596,7 +596,7 @@ Managing Director: Martin Edenhofer }, { data: IO.binread('test/fixtures/mail21.box'), - body_md5: 'e29237e323306473380dbaeb34ce0189', + body_md5: '380ca2bca1d7e013abd4109459a06fac', params: { from: 'Viagra Super Force Online ', from_email: 'pharmacy_affordable1@ertelecom.ru', @@ -669,14 +669,14 @@ end }, { data: IO.binread('test/fixtures/mail26.box'), - body_md5: '471b7ec5f102fd49e5c442c50e9f4725', + body_md5: '48c2843d219a7430bc84533d67719e95', params: { from: 'gate ', from_email: 'team@support.gate.de', from_display_name: 'gate', subject: 'Ihre Rechnung als PDF-Dokument', to: 'Martin Edenhofer ', - body: "Ihre Rechnung als PDF-Dokument
    óû5aHw5³½IΨµÁxG⌊o8KHCmς9-Ö½23QgñV6UAD¿ùAX←t¨Lf7⊕®Ir²r½TLA5pYJhjV gPnãM36V®E89RUDΤÅ©ÈI9æsàCΘYEϒAfg∗bT¡1∫rIoiš¦O5oUIN±IsæSعPp Ÿÿq1FΧ⇑eGOz⌈F³R98y§ 74”lTr8r§HÐæuØEÛPËq VmkfB∫SKNElst4S∃Á8üTðG°í lY9åPu×8>RÒ¬⊕ΜIÙzÙCC4³ÌQEΡºSè!XgŒs.
    + body: "Ihre Rechnung als PDF-Dokument
    @@ -795,7 +795,7 @@ end }, { data: IO.binread('test/fixtures/mail36.box'), - body_md5: 'cb392b03439b840c21e45504d9a24c3a', + body_md5: '3c58aeb003a55cafb0893d69676b4316', params: { from: 'Martin Smith ', from_email: 'm.Smith@example.com', @@ -803,14 +803,14 @@ end subject: 'Fw: Zugangsdaten', to: 'Martin Edenhofer ', body: "
    -
    --
    don't cry - work! (Rainald Goetz)
    +
     
    --
    don't cry - work! (Rainald Goetz)
    Gesendet: Mittwoch, 03. Februar 2016 um 12:43 Uhr
    Von: \"Martin Smith\" <m.Smith@example.com>
    An: linuxhotel@example.com
    Betreff: Fw: Zugangsdaten
    -
    --
    don't cry - work! (Rainald Goetz)
    +
     
    --
    don't cry - work! (Rainald Goetz)
    Gesendet: Freitag, 22. Januar 2016 um 11:52 Uhr
    Von: \"Martin Edenhofer\" <me@example.com>
    @@ -938,7 +938,7 @@ end }, { data: IO.binread('test/fixtures/mail43.box'), - body_md5: '1f51ef40494cac193c40ef18a1549432', + body_md5: 'a3f7ff5e1876fdbf051c38649b4c9668', params: { from: 'Paula ', from_email: 'databases.en@example.com', @@ -947,12 +947,12 @@ end to: 'info@example.ch', cc: nil, body: "


    -
    http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnaWVpLGUzZHx4bnxlZWY=
    Geben Sie diese Information an den Direktor oder den für Marketing und Umsatzsteigerung verantwortlichen Mitarbeiter Ihrer Firma weiter! -
    +

    Hallo,

    Bestellen Sie online bei:

    company-catalogs.com (http://business-catalogs.example.com/ODtpbGs5MWIzbjUyYzExLTA4Yy06Mmg7N3AvL3R0bmFvY3B0LXlhbW9sc2Nhb3NnYy5lL3RpbXJlZi9lbS9ycnJuaWFpZXMsdGxnaWVpLGUzZHx4bnxlZWY=)

    Für weitere Informationen:

    E-Mail: databases.en@example.com
    Telefon: +370-52-071554 (languages: EN, PL, RU, LT)



    Unsubscribe from newsletter: Click here (http://business-catalogs.example.com/c2JudXVlcmNic2I4MWk7MTgxOTMyNS1jMmMtNzA=)", diff --git a/test/unit/email_process_test.rb b/test/unit/email_process_test.rb index ab88885c9..7c91adcdf 100644 --- a/test/unit/email_process_test.rb +++ b/test/unit/email_process_test.rb @@ -223,7 +223,7 @@ Some Text", content_type: 'text/html', body: "_________________________________________________________________________________Please beth saw his head
    - +
    9õhH3ÿoIÚõ´GÿiH±6u-û◊NQ4ùäU¹awAq¹JLZμÒIicgT1ζ2Y7⊆t 63‘Mñ36EßÝ→DAå†I048CvJ9A↑3iTc4ÉIΥvXO50ñNÁFJSð­r 154F1HPOÀ£CRxZp tLîT9öXH1b3Es±W mNàBg3õEbPŒSúfτTóY4 sUÖPÒζΔRFkcIÕ1™CÓZ3EΛRq!Cass is good to ask what that