Fix markdown URL parsing (#17924)

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
wxiaoguang 2021-12-12 01:21:36 +08:00 committed by GitHub
parent 379a5241c6
commit 6d4172987e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 127 additions and 75 deletions

View file

@ -41,16 +41,21 @@ var (
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
// valid chars in encoded path and parameter: [-+~_%.a-zA-Z0-9/]
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
// so that abbreviated hash links can be used as well. This matches git and GitHub usability.
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`)
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern allows to split url containing SHA into parts
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
// anySHA1Pattern splits url containing SHA into parts
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40})(\.\.\.?)([0-9a-f]{40})?(#[-+~_%.a-zA-Z0-9]+)?`)
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
@ -65,7 +70,7 @@ var (
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
// EmojiShortCodeRegex find emoji by alias like :smile:
EmojiShortCodeRegex = regexp.MustCompile(`:[\w\+\-]+:`)
EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
)
// CSS class for action keywords (e.g. "closes: #1")
@ -152,6 +157,7 @@ type processor func(ctx *RenderContext, node *html.Node)
var defaultProcessors = []processor{
fullIssuePatternProcessor,
comparePatternProcessor,
fullSha1PatternProcessor,
shortLinkProcessor,
linkProcessor,
@ -178,6 +184,7 @@ func PostProcess(
var commitMessageProcessors = []processor{
fullIssuePatternProcessor,
comparePatternProcessor,
fullSha1PatternProcessor,
linkProcessor,
mentionProcessor,
@ -208,6 +215,7 @@ func RenderCommitMessage(
var commitMessageSubjectProcessors = []processor{
fullIssuePatternProcessor,
comparePatternProcessor,
fullSha1PatternProcessor,
linkProcessor,
mentionProcessor,
@ -920,12 +928,57 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
if hash != "" {
text += " (" + hash + ")"
}
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
node = node.NextSibling.NextSibling
}
}
func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil {
return
}
next := node.NextSibling
for node != nil && node != next {
m := comparePattern.FindStringSubmatchIndex(node.Data)
if m == nil {
return
}
urlFull := node.Data[m[0]:m[1]]
text1 := base.ShortSha(node.Data[m[2]:m[3]])
textDots := base.ShortSha(node.Data[m[4]:m[5]])
text2 := base.ShortSha(node.Data[m[6]:m[7]])
hash := ""
if m[9] > 0 {
hash = node.Data[m[8]:m[9]][1:]
}
start := m[0]
end := m[1]
// If url ends in '.', it's very likely that it is not part of the
// actual url but used to finish a sentence.
if strings.HasSuffix(urlFull, ".") {
end--
urlFull = urlFull[:len(urlFull)-1]
if hash != "" {
hash = hash[:len(hash)-1]
} else if text2 != "" {
text2 = text2[:len(text2)-1]
}
}
text := text1 + textDots + text2
if hash != "" {
text += " (" + hash + ")"
}
replaceContent(node, start, end, createCodeLink(urlFull, text, "compare"))
node = node.NextSibling.NextSibling
}
}
// emojiShortCodeProcessor for rendering text like :smile: into emoji
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
start := 0
@ -1038,8 +1091,8 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
continue
}
replaceContent(node, m[2], m[3],
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
link := util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash)
replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit"))
start = 0
node = node.NextSibling.NextSibling
}

View file

@ -15,9 +15,9 @@ import (
"github.com/stretchr/testify/assert"
)
const AppURL = "http://localhost:3000/"
const Repo = "gogits/gogs"
const AppSubURL = AppURL + Repo + "/"
const TestAppURL = "http://localhost:3000/"
const TestOrgRepo = "gogits/gogs"
const TestRepoURL = TestAppURL + TestOrgRepo + "/"
// alphanumLink an HTML link to an alphanumeric-style issue
func alphanumIssueLink(baseURL, class, name string) string {
@ -52,7 +52,7 @@ var alphanumericMetas = map[string]string{
"style": IssueNameStyleAlphanumeric,
}
// these values should match the Repo const above
// these values should match the TestOrgRepo const above
var localMetas = map[string]string{
"user": "gogits",
"repo": "gogs",
@ -90,8 +90,7 @@ func TestRender_IssueIndexPattern(t *testing.T) {
}
func TestRender_IssueIndexPattern2(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
// numeric: render inputs with valid mentions
test := func(s, expectedFmt, marker string, indices ...int) {
@ -108,7 +107,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
links := make([]interface{}, len(indices))
for i, index := range indices {
links[i] = numericIssueLink(util.URLJoin(setting.AppSubURL, path), "ref-issue", index, marker)
links[i] = numericIssueLink(util.URLJoin(TestRepoURL, path), "ref-issue", index, marker)
}
expectedNil := fmt.Sprintf(expectedFmt, links...)
testRenderIssueIndexPattern(t, s, expectedNil, &RenderContext{Metas: localMetas})
@ -152,8 +151,7 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
}
func TestRender_IssueIndexPattern3(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
// alphanumeric: render inputs without valid mentions
test := func(s string) {
@ -178,8 +176,7 @@ func TestRender_IssueIndexPattern3(t *testing.T) {
}
func TestRender_IssueIndexPattern4(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
// alphanumeric: render inputs with valid mentions
test := func(s, expectedFmt string, names ...string) {
@ -197,7 +194,7 @@ func TestRender_IssueIndexPattern4(t *testing.T) {
func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *RenderContext) {
if ctx.URLPrefix == "" {
ctx.URLPrefix = AppSubURL
ctx.URLPrefix = TestAppURL
}
var buf strings.Builder
@ -207,13 +204,12 @@ func testRenderIssueIndexPattern(t *testing.T, input, expected string, ctx *Rend
}
func TestRender_AutoLink(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
var buffer strings.Builder
err := PostProcess(&RenderContext{
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
}, strings.NewReader(input), &buffer)
assert.Equal(t, err, nil)
@ -221,7 +217,7 @@ func TestRender_AutoLink(t *testing.T) {
buffer.Reset()
err = PostProcess(&RenderContext{
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
IsWiki: true,
}, strings.NewReader(input), &buffer)
@ -230,11 +226,11 @@ func TestRender_AutoLink(t *testing.T) {
}
// render valid issue URLs
test(util.URLJoin(setting.AppSubURL, "issues", "3333"),
numericIssueLink(util.URLJoin(setting.AppSubURL, "issues"), "ref-issue", 3333, "#"))
test(util.URLJoin(TestRepoURL, "issues", "3333"),
numericIssueLink(util.URLJoin(TestRepoURL, "issues"), "ref-issue", 3333, "#"))
// render valid commit URLs
tmp := util.URLJoin(AppSubURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae")
tmp := util.URLJoin(TestRepoURL, "commit", "d8a994ef243349f321568f9e36d5c3f444b99cae")
test(tmp, "<a href=\""+tmp+"\" class=\"commit\"><code class=\"nohighlight\">d8a994ef24</code></a>")
tmp += "#diff-2"
test(tmp, "<a href=\""+tmp+"\" class=\"commit\"><code class=\"nohighlight\">d8a994ef24 (diff-2)</code></a>")
@ -245,13 +241,12 @@ func TestRender_AutoLink(t *testing.T) {
}
func TestRender_FullIssueURLs(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
var result strings.Builder
err := postProcess(&RenderContext{
URLPrefix: AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
}, []processor{fullIssuePatternProcessor}, strings.NewReader(input), &result)
assert.NoError(t, err)

View file

@ -25,13 +25,11 @@ var localMetas = map[string]string{
}
func TestRender_Commits(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
buffer, err := RenderString(&RenderContext{
Filename: ".md",
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
}, input)
assert.NoError(t, err)
@ -39,15 +37,30 @@ func TestRender_Commits(t *testing.T) {
}
var sha = "65f1bf27bc3bf70f64657658635e66094edbcb4d"
var commit = util.URLJoin(AppSubURL, "commit", sha)
var subtree = util.URLJoin(commit, "src")
var tree = strings.ReplaceAll(subtree, "/commit/", "/tree/")
var repo = TestRepoURL
var commit = util.URLJoin(repo, "commit", sha)
var tree = util.URLJoin(repo, "tree", sha, "src")
var file = util.URLJoin(repo, "commit", sha, "example.txt")
var fileWithExtra = file + ":"
var fileWithHash = file + "#L2"
var fileWithHasExtra = file + "#L2:"
var commitCompare = util.URLJoin(repo, "compare", sha+"..."+sha)
var commitCompareWithHash = commitCompare + "#L2"
test(sha, `<p><a href="`+commit+`" rel="nofollow"><code>65f1bf27bc</code></a></p>`)
test(sha[:7], `<p><a href="`+commit[:len(commit)-(40-7)]+`" rel="nofollow"><code>65f1bf2</code></a></p>`)
test(sha[:39], `<p><a href="`+commit[:len(commit)-(40-39)]+`" rel="nofollow"><code>65f1bf27bc</code></a></p>`)
test(commit, `<p><a href="`+commit+`" rel="nofollow"><code>65f1bf27bc</code></a></p>`)
test(tree, `<p><a href="`+tree+`" rel="nofollow"><code>65f1bf27bc/src</code></a></p>`)
test(file, `<p><a href="`+file+`" rel="nofollow"><code>65f1bf27bc/example.txt</code></a></p>`)
test(fileWithExtra, `<p><a href="`+file+`" rel="nofollow"><code>65f1bf27bc/example.txt</code></a>:</p>`)
test(fileWithHash, `<p><a href="`+fileWithHash+`" rel="nofollow"><code>65f1bf27bc/example.txt (L2)</code></a></p>`)
test(fileWithHasExtra, `<p><a href="`+fileWithHash+`" rel="nofollow"><code>65f1bf27bc/example.txt (L2)</code></a>:</p>`)
test(commitCompare, `<p><a href="`+commitCompare+`" rel="nofollow"><code>65f1bf27bc...65f1bf27bc</code></a></p>`)
test(commitCompareWithHash, `<p><a href="`+commitCompareWithHash+`" rel="nofollow"><code>65f1bf27bc...65f1bf27bc (L2)</code></a></p>`)
test("commit "+sha, `<p>commit <a href="`+commit+`" rel="nofollow"><code>65f1bf27bc</code></a></p>`)
test("/home/gitea/"+sha, "<p>/home/gitea/"+sha+"</p>")
test("deadbeef", `<p>deadbeef</p>`)
@ -61,8 +74,7 @@ func TestRender_Commits(t *testing.T) {
}
func TestRender_CrossReferences(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
buffer, err := RenderString(&RenderContext{
@ -76,21 +88,20 @@ func TestRender_CrossReferences(t *testing.T) {
test(
"gogits/gogs#12345",
`<p><a href="`+util.URLJoin(AppURL, "gogits", "gogs", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogits/gogs#12345</a></p>`)
`<p><a href="`+util.URLJoin(TestAppURL, "gogits", "gogs", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogits/gogs#12345</a></p>`)
test(
"go-gitea/gitea#12345",
`<p><a href="`+util.URLJoin(AppURL, "go-gitea", "gitea", "issues", "12345")+`" class="ref-issue" rel="nofollow">go-gitea/gitea#12345</a></p>`)
`<p><a href="`+util.URLJoin(TestAppURL, "go-gitea", "gitea", "issues", "12345")+`" class="ref-issue" rel="nofollow">go-gitea/gitea#12345</a></p>`)
test(
"/home/gitea/go-gitea/gitea#12345",
`<p>/home/gitea/go-gitea/gitea#12345</p>`)
}
func TestMisc_IsSameDomain(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
var sha = "b6dd6210eaebc915fd5be5579c58cce4da2e2579"
var commit = util.URLJoin(AppSubURL, "commit", sha)
var commit = util.URLJoin(TestRepoURL, "commit", sha)
assert.True(t, IsSameDomain(commit))
assert.False(t, IsSameDomain("http://google.com/ncr"))
@ -98,13 +109,12 @@ func TestMisc_IsSameDomain(t *testing.T) {
}
func TestRender_links(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
buffer, err := RenderString(&RenderContext{
Filename: "a.md",
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
}, input)
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
@ -198,13 +208,12 @@ func TestRender_links(t *testing.T) {
}
func TestRender_email(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
test := func(input, expected string) {
res, err := RenderString(&RenderContext{
Filename: "a.md",
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
}, input)
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res))
@ -255,15 +264,14 @@ func TestRender_email(t *testing.T) {
}
func TestRender_emoji(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.StaticURLPrefix = AppURL
setting.AppURL = TestAppURL
setting.StaticURLPrefix = TestAppURL
test := func(input, expected string) {
expected = strings.ReplaceAll(expected, "&", "&amp;")
buffer, err := RenderString(&RenderContext{
Filename: "a.md",
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
}, input)
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
@ -320,9 +328,8 @@ func TestRender_emoji(t *testing.T) {
}
func TestRender_ShortLinks(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
tree := util.URLJoin(AppSubURL, "src", "master")
setting.AppURL = TestAppURL
tree := util.URLJoin(TestRepoURL, "src", "master")
test := func(input, expected, expectedWiki string) {
buffer, err := markdown.RenderString(&RenderContext{
@ -331,7 +338,7 @@ func TestRender_ShortLinks(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
buffer, err = markdown.RenderString(&RenderContext{
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
IsWiki: true,
}, input)
@ -339,7 +346,7 @@ func TestRender_ShortLinks(t *testing.T) {
assert.Equal(t, strings.TrimSpace(expectedWiki), strings.TrimSpace(buffer))
}
rawtree := util.URLJoin(AppSubURL, "raw", "master")
rawtree := util.URLJoin(TestRepoURL, "raw", "master")
url := util.URLJoin(tree, "Link")
otherURL := util.URLJoin(tree, "Other-Link")
encodedURL := util.URLJoin(tree, "Link%3F")
@ -347,13 +354,13 @@ func TestRender_ShortLinks(t *testing.T) {
otherImgurl := util.URLJoin(rawtree, "Link+Other.jpg")
encodedImgurl := util.URLJoin(rawtree, "Link+%23.jpg")
notencodedImgurl := util.URLJoin(rawtree, "some", "path", "Link+#.jpg")
urlWiki := util.URLJoin(AppSubURL, "wiki", "Link")
otherURLWiki := util.URLJoin(AppSubURL, "wiki", "Other-Link")
encodedURLWiki := util.URLJoin(AppSubURL, "wiki", "Link%3F")
imgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link.jpg")
otherImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link+Other.jpg")
encodedImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "Link+%23.jpg")
notencodedImgurlWiki := util.URLJoin(AppSubURL, "wiki", "raw", "some", "path", "Link+#.jpg")
urlWiki := util.URLJoin(TestRepoURL, "wiki", "Link")
otherURLWiki := util.URLJoin(TestRepoURL, "wiki", "Other-Link")
encodedURLWiki := util.URLJoin(TestRepoURL, "wiki", "Link%3F")
imgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link.jpg")
otherImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link+Other.jpg")
encodedImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "Link+%23.jpg")
notencodedImgurlWiki := util.URLJoin(TestRepoURL, "wiki", "raw", "some", "path", "Link+#.jpg")
favicon := "http://google.com/favicon.ico"
test(
@ -427,9 +434,8 @@ func TestRender_ShortLinks(t *testing.T) {
}
func TestRender_RelativeImages(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
tree := util.URLJoin(AppSubURL, "src", "master")
setting.AppURL = TestAppURL
tree := util.URLJoin(TestRepoURL, "src", "master")
test := func(input, expected, expectedWiki string) {
buffer, err := markdown.RenderString(&RenderContext{
@ -439,7 +445,7 @@ func TestRender_RelativeImages(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(buffer))
buffer, err = markdown.RenderString(&RenderContext{
URLPrefix: setting.AppSubURL,
URLPrefix: TestRepoURL,
Metas: localMetas,
IsWiki: true,
}, input)
@ -447,8 +453,8 @@ func TestRender_RelativeImages(t *testing.T) {
assert.Equal(t, strings.TrimSpace(expectedWiki), strings.TrimSpace(buffer))
}
rawwiki := util.URLJoin(AppSubURL, "wiki", "raw")
mediatree := util.URLJoin(AppSubURL, "media", "master")
rawwiki := util.URLJoin(TestRepoURL, "wiki", "raw")
mediatree := util.URLJoin(TestRepoURL, "media", "master")
test(
`<img src="Link">`,
@ -462,8 +468,7 @@ func TestRender_RelativeImages(t *testing.T) {
}
func Test_ParseClusterFuzz(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
var localMetas = map[string]string{
"user": "go-gitea",
@ -493,8 +498,7 @@ func Test_ParseClusterFuzz(t *testing.T) {
}
func TestIssue16020(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
setting.AppURL = TestAppURL
var localMetas = map[string]string{
"user": "go-gitea",