From 6d4172987e971345ce5572b85749aff4d740e086 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sun, 12 Dec 2021 01:21:36 +0800 Subject: [PATCH] Fix markdown URL parsing (#17924) Co-authored-by: Lunny Xiao Co-authored-by: zeripath --- modules/markup/html.go | 67 +++++++++++++++++-- modules/markup/html_internal_test.go | 39 +++++------ modules/markup/html_test.go | 96 +++++++++++++++------------- 3 files changed, 127 insertions(+), 75 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index 746830720d..c47ecc165f 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -41,16 +41,21 @@ var ( // While fast, this is also incorrect and lead to false positives. // TODO: fix invalid linking issue + // valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/] + // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length - // so that abbreviated hash links can be used as well. This matches git and github useability. + // so that abbreviated hash links can be used as well. This matches git and GitHub usability. sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`) // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) - // anySHA1Pattern allows to split url containing SHA into parts - anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`) + // anySHA1Pattern splits url containing SHA into parts + anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) + + // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" + comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(\.\.\.?)([0-9a-f]{40})?(#[-+~_%.a-zA-Z0-9]+)?`) validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) @@ -65,7 +70,7 @@ var ( blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) // EmojiShortCodeRegex find emoji by alias like :smile: - EmojiShortCodeRegex = regexp.MustCompile(`:[\w\+\-]+:`) + EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) ) // CSS class for action keywords (e.g. "closes: #1") @@ -152,6 +157,7 @@ type processor func(ctx *RenderContext, node *html.Node) var defaultProcessors = []processor{ fullIssuePatternProcessor, + comparePatternProcessor, fullSha1PatternProcessor, shortLinkProcessor, linkProcessor, @@ -178,6 +184,7 @@ func PostProcess( var commitMessageProcessors = []processor{ fullIssuePatternProcessor, + comparePatternProcessor, fullSha1PatternProcessor, linkProcessor, mentionProcessor, @@ -208,6 +215,7 @@ func RenderCommitMessage( var commitMessageSubjectProcessors = []processor{ fullIssuePatternProcessor, + comparePatternProcessor, fullSha1PatternProcessor, linkProcessor, mentionProcessor, @@ -920,12 +928,57 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) { if hash != "" { text += " (" + hash + ")" } - replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) node = node.NextSibling.NextSibling } } +func comparePatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil { + return + } + + next := node.NextSibling + for node != nil && node != next { + m := comparePattern.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + + urlFull := node.Data[m[0]:m[1]] + text1 := base.ShortSha(node.Data[m[2]:m[3]]) + textDots := base.ShortSha(node.Data[m[4]:m[5]]) + text2 := base.ShortSha(node.Data[m[6]:m[7]]) + + hash := "" + if m[9] > 0 { + hash = node.Data[m[8]:m[9]][1:] + } + + start := m[0] + end := m[1] + + // If url ends in '.', it's very likely that it is not part of the + // actual url but used to finish a sentence. + if strings.HasSuffix(urlFull, ".") { + end-- + urlFull = urlFull[:len(urlFull)-1] + if hash != "" { + hash = hash[:len(hash)-1] + } else if text2 != "" { + text2 = text2[:len(text2)-1] + } + } + + text := text1 + textDots + text2 + if hash != "" { + text += " (" + hash + ")" + } + replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) + node = node.NextSibling.NextSibling + } +} + // emojiShortCodeProcessor for rendering text like :smile: into emoji func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { start := 0 @@ -1038,8 +1091,8 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { continue } - replaceContent(node, m[2], m[3], - createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit")) + link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) + replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) start = 0 node = node.NextSibling.NextSibling } diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go index f9ef90744b..a79b982473 100644 --- a/modules/markup/html_internal_test.go +++ b/modules/markup/html_internal_test.go @@ -15,9 +15,9 @@ import ( "github.com/stretchr/testify/assert" ) -const AppURL = "http://localhost:3000/" -const Repo = "gogits/gogs" -const AppSubURL = AppURL + Repo + "/" +const TestAppURL = "http://localhost:3000/" +const TestOrgRepo = "gogits/gogs" +const TestRepoURL = TestAppURL + TestOrgRepo + "/" // alphanumLink an HTML link to an alphanumeric-style issue func alphanumIssueLink(baseURL, class, name string) string { @@ -52,7 +52,7 @@ var alphanumericMetas = map[string]string{ "style": IssueNameStyleAlphanumeric, } -// these values should match the Repo const above +// these values should match the TestOrgRepo const above var localMetas = map[string]string{ "user": "gogits", "repo": "gogs", @@ -90,8 +90,7 @@ func TestRender_IssueIndexPattern(t *testing.T) { } func TestRender_IssueIndexPattern2(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL // numeric: render inputs with valid mentions test := func(s, expectedFmt, marker string, indices ...int) { @@ -108,7 +107,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) { links := make([]interface{}, len(indices)) for i, index := range indices { - links[i] = numericIssueLink(util.URLJoin(setting.AppSubURL, path), "ref-issue", index, marker) + links[i] = numericIssueLink(util.URLJoin(TestRepoURL, path), "ref-issue", index, marker) } expectedNil := fmt.Sprintf(expectedFmt, links...) testRenderIssueIndexPattern(t, s, expectedNil, &RenderContext{Metas: localMetas}) @@ -152,8 +151,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) { } func TestRender_IssueIndexPattern3(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL // alphanumeric: render inputs without valid mentions test := func(s string) { @@ -178,8 +176,7 @@ func TestRender_IssueIndexPattern3(t *testing.T) { } func TestRender_IssueIndexPattern4(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL // alphanumeric: render inputs with valid mentions test := func(s, expectedFmt string, names ...string) { @@ -197,7 +194,7 @@ func TestRender_IssueIndexPattern4(t *testing.T) { func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *RenderContext) { if ctx.URLPrefix == "" { - ctx.URLPrefix = AppSubURL + ctx.URLPrefix = TestAppURL } var buf strings.Builder @@ -207,13 +204,12 @@ func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *Rend } func TestRender_AutoLink(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL test := func(input, expected string) { var buffer strings.Builder err := PostProcess(&RenderContext{ - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, }, strings.NewReader(input), &buffer) assert.Equal(t, err, nil) @@ -221,7 +217,7 @@ func TestRender_AutoLink(t *testing.T) { buffer.Reset() err = PostProcess(&RenderContext{ - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, IsWiki: true, }, strings.NewReader(input), &buffer) @@ -230,11 +226,11 @@ func TestRender_AutoLink(t *testing.T) { } // render valid issue URLs - test(util.URLJoin(setting.AppSubURL, "issues", "3333"), - numericIssueLink(util.URLJoin(setting.AppSubURL, "issues"), "ref-issue", 3333, "#")) + test(util.URLJoin(TestRepoURL, "issues", "3333"), + numericIssueLink(util.URLJoin(TestRepoURL, "issues"), "ref-issue", 3333, "#")) // render valid commit URLs - tmp := util.URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae") + tmp := util.URLJoin(TestRepoURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae") test(tmp, "d8a994ef24") tmp += "#diff-2" test(tmp, "d8a994ef24 (diff-2)") @@ -245,13 +241,12 @@ func TestRender_AutoLink(t *testing.T) { } func TestRender_FullIssueURLs(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL test := func(input, expected string) { var result strings.Builder err := postProcess(&RenderContext{ - URLPrefix: AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, }, []processor{fullIssuePatternProcessor}, strings.NewReader(input), &result) assert.NoError(t, err) diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 3eb2df00a9..3fd9ef9f83 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -25,13 +25,11 @@ var localMetas = map[string]string{ } func TestRender_Commits(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - + setting.AppURL = TestAppURL test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ Filename: ".md", - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, }, input) assert.NoError(t, err) @@ -39,15 +37,30 @@ func TestRender_Commits(t *testing.T) { } var sha = "65f1bf27bc3bf70f64657658635e66094edbcb4d" - var commit = util.URLJoin(AppSubURL, "commit", sha) - var subtree = util.URLJoin(commit, "src") - var tree = strings.ReplaceAll(subtree, "/commit/", "/tree/") + var repo = TestRepoURL + var commit = util.URLJoin(repo, "commit", sha) + var tree = util.URLJoin(repo, "tree", sha, "src") + + var file = util.URLJoin(repo, "commit", sha, "example.txt") + var fileWithExtra = file + ":" + var fileWithHash = file + "#L2" + var fileWithHasExtra = file + "#L2:" + var commitCompare = util.URLJoin(repo, "compare", sha+"..."+sha) + var commitCompareWithHash = commitCompare + "#L2" test(sha, `

65f1bf27bc

`) test(sha[:7], `

65f1bf2

`) test(sha[:39], `

65f1bf27bc

`) test(commit, `

65f1bf27bc

`) test(tree, `

65f1bf27bc/src

`) + + test(file, `

65f1bf27bc/example.txt

`) + test(fileWithExtra, `

65f1bf27bc/example.txt:

`) + test(fileWithHash, `

65f1bf27bc/example.txt (L2)

`) + test(fileWithHasExtra, `

65f1bf27bc/example.txt (L2):

`) + test(commitCompare, `

65f1bf27bc...65f1bf27bc

`) + test(commitCompareWithHash, `

65f1bf27bc...65f1bf27bc (L2)

`) + test("commit "+sha, `

commit 65f1bf27bc

`) test("/home/gitea/"+sha, "

/home/gitea/"+sha+"

") test("deadbeef", `

deadbeef

`) @@ -61,8 +74,7 @@ func TestRender_Commits(t *testing.T) { } func TestRender_CrossReferences(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ @@ -76,21 +88,20 @@ func TestRender_CrossReferences(t *testing.T) { test( "gogits/gogs#12345", - `

gogits/gogs#12345

`) + `

gogits/gogs#12345

`) test( "go-gitea/gitea#12345", - `

go-gitea/gitea#12345

`) + `

go-gitea/gitea#12345

`) test( "/home/gitea/go-gitea/gitea#12345", `

/home/gitea/go-gitea/gitea#12345

`) } func TestMisc_IsSameDomain(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579" - var commit = util.URLJoin(AppSubURL, "commit", sha) + var commit = util.URLJoin(TestRepoURL, "commit", sha) assert.True(t, IsSameDomain(commit)) assert.False(t, IsSameDomain("http://google.com/ncr")) @@ -98,13 +109,12 @@ func TestMisc_IsSameDomain(t *testing.T) { } func TestRender_links(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL test := func(input, expected string) { buffer, err := RenderString(&RenderContext{ Filename: "a.md", - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) @@ -198,13 +208,12 @@ func TestRender_links(t *testing.T) { } func TestRender_email(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL test := func(input, expected string) { res, err := RenderString(&RenderContext{ Filename: "a.md", - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res)) @@ -255,15 +264,14 @@ func TestRender_email(t *testing.T) { } func TestRender_emoji(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - setting.StaticURLPrefix = AppURL + setting.AppURL = TestAppURL + setting.StaticURLPrefix = TestAppURL test := func(input, expected string) { expected = strings.ReplaceAll(expected, "&", "&") buffer, err := RenderString(&RenderContext{ Filename: "a.md", - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, }, input) assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) @@ -320,9 +328,8 @@ func TestRender_emoji(t *testing.T) { } func TestRender_ShortLinks(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - tree := util.URLJoin(AppSubURL, "src", "master") + setting.AppURL = TestAppURL + tree := util.URLJoin(TestRepoURL, "src", "master") test := func(input, expected, expectedWiki string) { buffer, err := markdown.RenderString(&RenderContext{ @@ -331,7 +338,7 @@ func TestRender_ShortLinks(t *testing.T) { assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) buffer, err = markdown.RenderString(&RenderContext{ - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, IsWiki: true, }, input) @@ -339,7 +346,7 @@ func TestRender_ShortLinks(t *testing.T) { assert.Equal(t, strings.TrimSpace(expectedWiki), strings.TrimSpace(buffer)) } - rawtree := util.URLJoin(AppSubURL, "raw", "master") + rawtree := util.URLJoin(TestRepoURL, "raw", "master") url := util.URLJoin(tree, "Link") otherURL := util.URLJoin(tree, "Other-Link") encodedURL := util.URLJoin(tree, "Link%3F") @@ -347,13 +354,13 @@ func TestRender_ShortLinks(t *testing.T) { otherImgurl := util.URLJoin(rawtree, "Link+Other.jpg") encodedImgurl := util.URLJoin(rawtree, "Link+%23.jpg") notencodedImgurl := util.URLJoin(rawtree, "some", "path", "Link+#.jpg") - urlWiki := util.URLJoin(AppSubURL, "wiki", "Link") - otherURLWiki := util.URLJoin(AppSubURL, "wiki", "Other-Link") - encodedURLWiki := util.URLJoin(AppSubURL, "wiki", "Link%3F") - imgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link.jpg") - otherImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link+Other.jpg") - encodedImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link+%23.jpg") - notencodedImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "some", "path", "Link+#.jpg") + urlWiki := util.URLJoin(TestRepoURL, "wiki", "Link") + otherURLWiki := util.URLJoin(TestRepoURL, "wiki", "Other-Link") + encodedURLWiki := util.URLJoin(TestRepoURL, "wiki", "Link%3F") + imgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link.jpg") + otherImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link+Other.jpg") + encodedImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link+%23.jpg") + notencodedImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "some", "path", "Link+#.jpg") favicon := "http://google.com/favicon.ico" test( @@ -427,9 +434,8 @@ func TestRender_ShortLinks(t *testing.T) { } func TestRender_RelativeImages(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL - tree := util.URLJoin(AppSubURL, "src", "master") + setting.AppURL = TestAppURL + tree := util.URLJoin(TestRepoURL, "src", "master") test := func(input, expected, expectedWiki string) { buffer, err := markdown.RenderString(&RenderContext{ @@ -439,7 +445,7 @@ func TestRender_RelativeImages(t *testing.T) { assert.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer)) buffer, err = markdown.RenderString(&RenderContext{ - URLPrefix: setting.AppSubURL, + URLPrefix: TestRepoURL, Metas: localMetas, IsWiki: true, }, input) @@ -447,8 +453,8 @@ func TestRender_RelativeImages(t *testing.T) { assert.Equal(t, strings.TrimSpace(expectedWiki), strings.TrimSpace(buffer)) } - rawwiki := util.URLJoin(AppSubURL, "wiki", "raw") - mediatree := util.URLJoin(AppSubURL, "media", "master") + rawwiki := util.URLJoin(TestRepoURL, "wiki", "raw") + mediatree := util.URLJoin(TestRepoURL, "media", "master") test( ``, @@ -462,8 +468,7 @@ func TestRender_RelativeImages(t *testing.T) { } func Test_ParseClusterFuzz(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL var localMetas = map[string]string{ "user": "go-gitea", @@ -493,8 +498,7 @@ func Test_ParseClusterFuzz(t *testing.T) { } func TestIssue16020(t *testing.T) { - setting.AppURL = AppURL - setting.AppSubURL = AppSubURL + setting.AppURL = TestAppURL var localMetas = map[string]string{ "user": "go-gitea",