From 182be906552cf990a222584de51c0b72e46ce85a Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 29 Dec 2020 01:08:55 +0800 Subject: [PATCH] Fix bug of link query order on markdown render (#14156) (#14171) * Fix bug of link query order on markdown render * Fix bluemonday bug and fix one wrong test Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: 6543 <6543@obermui.de> --- go.mod | 2 + go.sum | 5 +- modules/markup/html_test.go | 2 +- .../microcosm-cc/bluemonday/sanitize.go | 97 +++++++++++++++---- vendor/modules.txt | 3 +- 5 files changed, 84 insertions(+), 25 deletions(-) diff --git a/go.mod b/go.mod index 58ae69967..87f82c0e6 100644 --- a/go.mod +++ b/go.mod @@ -124,3 +124,5 @@ require ( ) replace github.com/hashicorp/go-version => github.com/6543/go-version v1.2.4 + +replace github.com/microcosm-cc/bluemonday => github.com/lunny/bluemonday v1.0.5-0.20201227154428-ca34796141e8 diff --git a/go.sum b/go.sum index 975e6585f..04a652f2a 100644 --- a/go.sum +++ b/go.sum @@ -598,6 +598,8 @@ github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lib/pq v1.8.1-0.20200908161135-083382b7e6fc h1:ERSU1OvZ6MdWhHieo2oT7xwR/HCksqKdgK6iYPU5pHI= github.com/lib/pq v1.8.1-0.20200908161135-083382b7e6fc/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lunny/bluemonday v1.0.5-0.20201227154428-ca34796141e8 h1:1omo92DLtxQu6VwVPSZAmduHaK5zssed6cvkHyl1XOg= +github.com/lunny/bluemonday v1.0.5-0.20201227154428-ca34796141e8/go.mod h1:8iwZnFn2CDDNZ0r6UXhF4xawGvzaqzCRa1n3/lO3W2w= github.com/lunny/dingtalk_webhook v0.0.0-20171025031554-e3534c89ef96 h1:uNwtsDp7ci48vBTTxDuwcoTXz4lwtDTe7TjCQ0noaWY= github.com/lunny/dingtalk_webhook v0.0.0-20171025031554-e3534c89ef96/go.mod h1:mmIfjCSQlGYXmJ95jFN84AkQFnVABtKuJL8IrzwvUKQ= github.com/lunny/log v0.0.0-20160921050905-7887c61bf0de h1:nyxwRdWHAVxpFcDThedEgQ07DbcRc5xgNObtbTp76fk= @@ -649,8 +651,6 @@ github.com/mgechev/revive v1.0.3-0.20200921231451-246eac737dc7 h1:ydVkpU/M4/c45y github.com/mgechev/revive v1.0.3-0.20200921231451-246eac737dc7/go.mod h1:no/hfevHbndpXR5CaJahkYCfM/FFpmM/dSOwFGU7Z1o= github.com/mholt/archiver/v3 v3.3.0 h1:vWjhY8SQp5yzM9P6OJ/eZEkmi3UAbRrxCq48MxjAzig= github.com/mholt/archiver/v3 v3.3.0/go.mod h1:YnQtqsp+94Rwd0D/rk5cnLrxusUBUXg+08Ebtr1Mqao= -github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912 h1:hJde9rA24hlTcAYSwJoXpDUyGtfKQ/jsofw+WaDqGrI= -github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912/go.mod h1:8iwZnFn2CDDNZ0r6UXhF4xawGvzaqzCRa1n3/lO3W2w= github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= github.com/minio/md5-simd v1.1.0 h1:QPfiOqlZH+Cj9teu0t9b1nTBfPbyTl16Of5MeuShdK4= github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw= @@ -937,7 +937,6 @@ golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a h1:vclmkQCjlDX5OydZ9wv8rBCcS0QyQY66Mpf/7BZbInM= golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201217014255-9d1352758620 h1:3wPMTskHO3+O6jqTEXyFcsnuxMQOqYSaHsDxcbUXpqA= golang.org/x/crypto v0.0.0-20201217014255-9d1352758620/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index a018d7484..a3c273e62 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -142,7 +142,7 @@ func TestRender_links(t *testing.T) { `

ftp://gitea.com/file.txt

`) test( "magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", - `

magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download

`) + `

magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download

`) // Test that should *not* be turned into URL test( diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go index 771fc2c2f..a58333aa6 100644 --- a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go +++ b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go @@ -122,22 +122,79 @@ func escapeUrlComponent(val string) string { return w.String() } -func sanitizedUrl(val string) (string, error) { +// Query represents a query +type Query struct { + Key string + Value string +} + +func parseQuery(query string) (values []Query, err error) { + for query != "" { + key := query + if i := strings.IndexAny(key, "&;"); i >= 0 { + key, query = key[:i], key[i+1:] + } else { + query = "" + } + if key == "" { + continue + } + value := "" + if i := strings.Index(key, "="); i >= 0 { + key, value = key[:i], key[i+1:] + } + key, err1 := url.QueryUnescape(key) + if err1 != nil { + if err == nil { + err = err1 + } + continue + } + value, err1 = url.QueryUnescape(value) + if err1 != nil { + if err == nil { + err = err1 + } + continue + } + values = append(values, Query{ + Key: key, + Value: value, + }) + } + return values, err +} + +func encodeQueries(queries []Query) string { + var b strings.Builder + for i, query := range queries { + b.WriteString(url.QueryEscape(query.Key)) + b.WriteString("=") + b.WriteString(url.QueryEscape(query.Value)) + if i < len(queries)-1 { + b.WriteString("&") + } + } + return b.String() +} + +func sanitizedURL(val string) (string, error) { u, err := url.Parse(val) if err != nil { return "", err } - // sanitize the url query params - sanitizedQueryValues := make(url.Values, 0) - queryValues := u.Query() - for k, vals := range queryValues { - sk := html.EscapeString(k) - for _, v := range vals { - sv := escapeUrlComponent(v) - sanitizedQueryValues.Set(sk, sv) - } + + // we use parseQuery but not u.Query to keep the order not change because + // url.Values is a map which has a random order. + queryValues, err := parseQuery(u.RawQuery) + if err != nil { + return "", err } - u.RawQuery = sanitizedQueryValues.Encode() + // sanitize the url query params + for i, query := range queryValues { + queryValues[i].Key = html.EscapeString(query.Key) + } + u.RawQuery = encodeQueries(queryValues) // u.String() will also sanitize host/scheme/user/pass return u.String(), nil } @@ -158,7 +215,7 @@ func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) { tokenBuff.WriteString(html.EscapeString(attr.Val)) continue } - u, err := sanitizedUrl(u) + u, err := sanitizedURL(u) if err == nil { tokenBuff.WriteString(u) } else { @@ -390,10 +447,10 @@ func (p *Policy) sanitizeAttrs( hasStylePolicies = true } // no specific element policy found, look for a pattern match - if !hasStylePolicies{ - for k, v := range p.elsMatchingAndStyles{ + if !hasStylePolicies { + for k, v := range p.elsMatchingAndStyles { if k.MatchString(elementName) { - if len(v) > 0{ + if len(v) > 0 { hasStylePolicies = true break } @@ -669,14 +726,14 @@ func (p *Policy) sanitizeAttrs( func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute { sps := p.elsAndStyles[elementName] - if len(sps) == 0{ + if len(sps) == 0 { sps = map[string]stylePolicy{} // check for any matching elements, if we don't already have a policy found // if multiple matches are found they will be overwritten, it's best // to not have overlapping matchers - for regex, policies :=range p.elsMatchingAndStyles{ - if regex.MatchString(elementName){ - for k, v := range policies{ + for regex, policies := range p.elsMatchingAndStyles { + if regex.MatchString(elementName) { + for k, v := range policies { sps[k] = v } } @@ -874,7 +931,7 @@ func removeUnicode(value string) string { return substitutedValue } -func (p *Policy) matchRegex(elementName string ) (map[string]attrPolicy, bool) { +func (p *Policy) matchRegex(elementName string) (map[string]attrPolicy, bool) { aps := make(map[string]attrPolicy, 0) matched := false for regex, attrs := range p.elsMatchingAndAttrs { diff --git a/vendor/modules.txt b/vendor/modules.txt index 1bbba9cfe..be206facd 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -566,7 +566,7 @@ github.com/mgechev/revive/rule # github.com/mholt/archiver/v3 v3.3.0 ## explicit github.com/mholt/archiver/v3 -# github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912 +# github.com/microcosm-cc/bluemonday v1.0.3-0.20191119130333-0a75d7616912 => github.com/lunny/bluemonday v1.0.5-0.20201227154428-ca34796141e8 ## explicit github.com/microcosm-cc/bluemonday # github.com/minio/md5-simd v1.1.0 @@ -979,3 +979,4 @@ xorm.io/xorm/names xorm.io/xorm/schemas xorm.io/xorm/tags # github.com/hashicorp/go-version => github.com/6543/go-version v1.2.4 +# github.com/microcosm-cc/bluemonday => github.com/lunny/bluemonday v1.0.5-0.20201227154428-ca34796141e8