Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)

* Fix a panic in NotifyCreateIssueComment (caused by string truncation) * more unit tests * refactor * fix some edge cases * use SplitStringAtByteN for comment content
2021-12-09 13:41:17 +08:00 · 2021-12-09 13:41:17 +08:00 · c7e23401a3
commit c7e23401a3
parent 183175263d
4 changed files with 104 additions and 17 deletions
--- a/models/repo_archiver.go
+++ b/models/repo_archiver.go
@ -6,7 +6,6 @@ package models
 import (
 	"code.gitea.io/gitea/models/db"
 	repo_model "code.gitea.io/gitea/models/repo"
 )
--- a/modules/notification/action/action.go
+++ b/modules/notification/action/action.go
@ -15,6 +15,7 @@ import (
 	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/notification/base"
 	"code.gitea.io/gitea/modules/repository"
 	"code.gitea.io/gitea/modules/util"
 )
 type actionNotifier struct {
@ -100,14 +101,15 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m
 		IsPrivate: issue.Repo.IsPrivate,
 	}
-	content := ""
+	truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200)
-
+	if truncatedRight != "" {
-	if len(comment.Content) > 200 {
+		// in case the content is in a Latin family language, we remove the last broken word.
-		content = comment.Content[:strings.LastIndex(comment.Content[0:200], " ")] + "…"
+		lastSpaceIdx := strings.LastIndex(truncatedContent, " ")
-	} else {
+		if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) {
-		content = comment.Content
+			truncatedContent = truncatedContent[:lastSpaceIdx] + "…"
 		}
-	act.Content = fmt.Sprintf("%d|%s", issue.Index, content)
+	}
 	act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent)
 	if issue.IsPull {
 		act.OpType = models.ActionCommentPull
--- a/modules/util/truncate.go
+++ b/modules/util/truncate.go
@ -6,20 +6,23 @@ package util
 import "unicode/utf8"
 // in UTF8 "…" is 3 bytes so doesn't really gain us anything...
 const utf8Ellipsis = "…"
 const asciiEllipsis = "..."
 // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
 func SplitStringAtByteN(input string, n int) (left, right string) {
 	if len(input) <= n {
-		left = input
+		return input, ""
 		return
 	}
 	if !utf8.ValidString(input) {
-		left = input[:n-3] + "..."
+		if n-3 < 0 {
-		right = "..." + input[n-3:]
+			return input, ""
-		return
+		}
 		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
 	}
 	// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
 	end := 0
 	for end <= n-3 {
 		_, size := utf8.DecodeRuneInString(input[end:])
@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) {
 		end += size
 	}
-	left = input[:end] + "…"
+	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
-	right = "…" + input[end:]
+}
-	return
+
 // SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.)
 func SplitStringAtRuneN(input string, n int) (left, right string) {
 	if !utf8.ValidString(input) {
 		if len(input) <= n || n-3 < 0 {
 			return input, ""
 		}
 		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
 	}
 	if utf8.RuneCountInString(input) <= n {
 		return input, ""
 	}
 	count := 0
 	end := 0
 	for count < n-1 {
 		_, size := utf8.DecodeRuneInString(input[end:])
 		end += size
 		count++
 	}
 	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
 }
--- a/modules/util/truncate_test.go
+++ b/modules/util/truncate_test.go
@ -0,0 +1,61 @@
 // Copyright 2021 The Gitea Authors. All rights reserved.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
 package util
 import (
 	"testing"
 	"github.com/stretchr/testify/assert"
 )
 func TestSplitString(t *testing.T) {
 	type testCase struct {
 		input    string
 		n        int
 		leftSub  string
 		ellipsis string
 	}
 	test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
 		for _, c := range tc {
 			l, r := f(c.input, c.n)
 			if c.ellipsis != "" {
 				assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
 				assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
 			} else {
 				assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
 				assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
 			}
 		}
 	}
 	tc := []*testCase{
 		{"abc123xyz", 0, "", utf8Ellipsis},
 		{"abc123xyz", 1, "", utf8Ellipsis},
 		{"abc123xyz", 4, "a", utf8Ellipsis},
 		{"啊bc123xyz", 4, "", utf8Ellipsis},
 		{"啊bc123xyz", 6, "啊", utf8Ellipsis},
 		{"啊bc", 5, "啊bc", ""},
 		{"啊bc", 6, "啊bc", ""},
 		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
 		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
 		{"\xef\x03", 1, "\xef\x03", ""},
 	}
 	test(tc, SplitStringAtByteN)
 	tc = []*testCase{
 		{"abc123xyz", 0, "", utf8Ellipsis},
 		{"abc123xyz", 1, "", utf8Ellipsis},
 		{"abc123xyz", 4, "abc", utf8Ellipsis},
 		{"啊bc123xyz", 4, "啊bc", utf8Ellipsis},
 		{"啊bc123xyz", 6, "啊bc12", utf8Ellipsis},
 		{"啊bc", 3, "啊bc", ""},
 		{"啊bc", 4, "啊bc", ""},
 		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
 		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
 		{"\xef\x03", 1, "\xef\x03", ""},
 	}
 	test(tc, SplitStringAtRuneN)
 }