Fix panic bug in handling multiple references in commit (#13486)

* Fix panic bug in handling multiple references in commit The issue lay in determining the position of matches on a second run round a commit message in FindAllIssueReferences. Fix #13483 Signed-off-by: Andrew Thornton <art27@cantab.net> * Extract function and make testable Signed-off-by: Andrew Thornton <art27@cantab.net> * Fix the comment Signed-off-by: Andrew Thornton <art27@cantab.net> * cleaning up the comments a bit more Signed-off-by: Andrew Thornton <art27@cantab.net>
2020-11-09 22:57:47 +00:00 · 2020-11-09 22:57:47 +00:00 · 77e5081a2e
commit 77e5081a2e
parent ffa712e783
2 changed files with 95 additions and 29 deletions
--- a/modules/references/references.go
+++ b/modules/references/references.go
@ -235,40 +235,78 @@ func findAllIssueReferencesMarkdown(content string) []*rawReference {
 	return findAllIssueReferencesBytes(bcontent, links)
 }

+func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
+	// We will iterate through the content, rewrite and simplify full references.
+	//
+	// We want to transform something like:
+	//
+	// this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
+	// https://ourgitea.com/git/owner/repo/pulls/123456789
+	//
+	// Into something like:
+	//
+	// this is a #123456789, foo
+	// !123456789
+
+	pos := 0
+	for {
+		// re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
+		match := re.FindSubmatchIndex((*contentBytes)[pos:])
+		if match == nil {
+			break
+		}
+		// match is a bunch of indices into the content from pos onwards so
+		// to simplify things let's just add pos to all of the indices in match
+		for i := range match {
+			match[i] += pos
+		}
+
+		// match[0]-match[1] is whole string
+		// match[2]-match[3] is preamble
+
+		// move the position to the end of the preamble
+		pos = match[3]
+
+		// match[4]-match[5] is owner/repo
+		// now copy the owner/repo to end of the preamble
+		endPos := pos + match[5] - match[4]
+		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
+
+		// move the current position to the end of the newly copied owner/repo
+		pos = endPos
+
+		// Now set the issue/pull marker:
+		//
+		// match[6]-match[7] == 'issues'
+		(*contentBytes)[pos] = '#'
+		if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
+			(*contentBytes)[pos] = '!'
+		}
+		pos++
+
+		// Then add the issue/pull number
+		//
+		// match[8]-match[9] is the number
+		endPos = pos + match[9] - match[8]
+		copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
+
+		// Now copy what's left at the end of the string to the new end position
+		copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
+		// now we reset the length
+
+		// our new section has length endPos - match[3]
+		// our old section has length match[9] - match[3]
+		(*contentBytes) = (*contentBytes)[:len((*contentBytes))-match[9]+endPos]
+		pos = endPos
+	}
+}
+
 // FindAllIssueReferences returns a list of unvalidated references found in a string.
 func FindAllIssueReferences(content string) []IssueReference {
 	// Need to convert fully qualified html references to local system to #/! short codes
 	contentBytes := []byte(content)
 	if re := getGiteaIssuePullPattern(); re != nil {
-		pos := 0
-		for {
-			match := re.FindSubmatchIndex(contentBytes[pos:])
-			if match == nil {
-				break
-			}
-			// match[0]-match[1] is whole string
-			// match[2]-match[3] is preamble
-			pos += match[3]
-			// match[4]-match[5] is owner/repo
-			endPos := pos + match[5] - match[4]
-			copy(contentBytes[pos:endPos], contentBytes[match[4]:match[5]])
-			pos = endPos
-			// match[6]-match[7] == 'issues'
-			contentBytes[pos] = '#'
-			if string(contentBytes[match[6]:match[7]]) == "pulls" {
-				contentBytes[pos] = '!'
-			}
-			pos++
-			// match[8]-match[9] is the number
-			endPos = pos + match[9] - match[8]
-			copy(contentBytes[pos:endPos], contentBytes[match[8]:match[9]])
-			copy(contentBytes[endPos:], contentBytes[match[9]:])
-			// now we reset the length
-			// our new section has length endPos - match[3]
-			// our old section has length match[9] - match[3]
-			contentBytes = contentBytes[:len(contentBytes)-match[9]+endPos]
-			pos = endPos
-		}
+		convertFullHTMLReferencesToShortRefs(re, &contentBytes)
 	} else {
 		log.Debug("No GiteaIssuePullPattern pattern")
 	}
--- a/modules/references/references_test.go
+++ b/modules/references/references_test.go
@ -5,6 +5,7 @@
 package references

 import (
+	"regexp"
 	"testing"

 	"code.gitea.io/gitea/modules/setting"
@ -29,6 +30,26 @@ type testResult struct {
 	TimeLog        string
 }

+func TestConvertFullHTMLReferencesToShortRefs(t *testing.T) {
+	re := regexp.MustCompile(`(\s|^|\(|\[)` +
+		regexp.QuoteMeta("https://ourgitea.com/git/") +
+		`([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
+		`((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
+	test := `this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
+https://ourgitea.com/git/owner/repo/pulls/123456789
+  And https://ourgitea.com/git/owner/repo/pulls/123
+`
+	expect := `this is a owner/repo#123456789, foo
+owner/repo!123456789
+  And owner/repo!123
+`
+
+	contentBytes := []byte(test)
+	convertFullHTMLReferencesToShortRefs(re, &contentBytes)
+	result := string(contentBytes)
+	assert.EqualValues(t, expect, result)
+}
+
 func TestFindAllIssueReferences(t *testing.T) {

 	fixtures := []testFixture{
@ -106,6 +127,13 @@ func TestFindAllIssueReferences(t *testing.T) {
 				{202, "user4", "repo5", "202", true, XRefActionNone, nil, nil, ""},
 			},
 		},
+		{
+			"This http://gitea.com:3000/user4/repo5/pulls/202 yes. http://gitea.com:3000/user4/repo5/pulls/203 no",
+			[]testResult{
+				{202, "user4", "repo5", "202", true, XRefActionNone, nil, nil, ""},
+				{203, "user4", "repo5", "203", true, XRefActionNone, nil, nil, ""},
+			},
+		},
 		{
 			"This http://GiTeA.COM:3000/user4/repo6/pulls/205 yes.",
 			[]testResult{