diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index acd3bebb9f..6832207c0f 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -40,9 +40,11 @@ var (
// NewContext loads custom highlight map from local config
func NewContext() {
once.Do(func() {
- keys := setting.Cfg.Section("highlight.mapping").Keys()
- for i := range keys {
- highlightMapping[keys[i].Name()] = keys[i].Value()
+ if setting.Cfg != nil {
+ keys := setting.Cfg.Section("highlight.mapping").Keys()
+ for i := range keys {
+ highlightMapping[keys[i].Name()] = keys[i].Value()
+ }
}
// The size 512 is simply a conservative rule of thumb
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 6e8c149dab..6dd237bbc8 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -15,7 +15,6 @@ import (
"io"
"net/url"
"os"
- "regexp"
"sort"
"strings"
"time"
@@ -40,7 +39,7 @@ import (
"golang.org/x/text/transform"
)
-// DiffLineType represents the type of a DiffLine.
+// DiffLineType represents the type of DiffLine.
type DiffLineType uint8
// DiffLineType possible values.
@@ -51,7 +50,7 @@ const (
DiffLineSection
)
-// DiffFileType represents the type of a DiffFile.
+// DiffFileType represents the type of DiffFile.
type DiffFileType uint8
// DiffFileType possible values.
@@ -100,12 +99,12 @@ type DiffLineSectionInfo struct {
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20
-// GetType returns the type of a DiffLine.
+// GetType returns the type of DiffLine.
func (d *DiffLine) GetType() int {
return int(d.Type)
}
-// CanComment returns whether or not a line can get commented
+// CanComment returns whether a line can get commented
func (d *DiffLine) CanComment() bool {
return len(d.Comments) == 0 && d.Type != DiffLineSection
}
@@ -191,287 +190,13 @@ var (
codeTagSuffix = []byte(``)
)
-var (
- unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
- trailingSpanRegex = regexp.MustCompile(`]?$`)
- entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
-)
-
-// shouldWriteInline represents combinations where we manually write inline changes
-func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
- if true &&
- diff.Type == diffmatchpatch.DiffEqual ||
- diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
- diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
- return true
- }
- return false
-}
-
-func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
- // Create a new array to store our fixed up blocks
- fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
-
- // semantically label some numbers
- const insert, delete, equal = 0, 1, 2
-
- // record the positions of the last type of each block in the fixedup blocks
- last := []int{-1, -1, -1}
- operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
-
- // create a writer for insert and deletes
- toWrite := []strings.Builder{
- {},
- {},
- }
-
- // make some flags for insert and delete
- unfinishedTag := []bool{false, false}
- unfinishedEnt := []bool{false, false}
-
- // store stores the provided text in the writer for the typ
- store := func(text string, typ int) {
- (&(toWrite[typ])).WriteString(text)
- }
-
- // hasStored returns true if there is stored content
- hasStored := func(typ int) bool {
- return (&toWrite[typ]).Len() > 0
- }
-
- // stored will return that content
- stored := func(typ int) string {
- return (&toWrite[typ]).String()
- }
-
- // empty will empty the stored content
- empty := func(typ int) {
- (&toWrite[typ]).Reset()
- }
-
- // pop will remove the stored content appending to a diff block for that typ
- pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
- if hasStored(typ) {
- if last[typ] > last[equal] {
- fixedup[last[typ]].Text += stored(typ)
- } else {
- fixedup = append(fixedup, diffmatchpatch.Diff{
- Type: operation[typ],
- Text: stored(typ),
- })
- }
- empty(typ)
- }
- return fixedup
- }
-
- // Now we walk the provided diffs and check the type of each block in turn
- for _, diff := range diffs {
-
- typ := delete // flag for handling insert or delete typs
- switch diff.Type {
- case diffmatchpatch.DiffEqual:
- // First check if there is anything stored
- if hasStored(insert) || hasStored(delete) {
- // There are two reasons for storing content:
- // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
- if unfinishedEnt[insert] || unfinishedEnt[delete] {
- // we look for a ';' to finish an entity
- idx := strings.IndexRune(diff.Text, ';')
- if idx >= 0 {
- // if we find a ';' store the preceding content to both insert and delete
- store(diff.Text[:idx+1], insert)
- store(diff.Text[:idx+1], delete)
-
- // and remove it from this block
- diff.Text = diff.Text[idx+1:]
-
- // reset the ent flags
- unfinishedEnt[insert] = false
- unfinishedEnt[delete] = false
- } else {
- // otherwise store it all on insert and delete
- store(diff.Text, insert)
- store(diff.Text, delete)
- // and empty this block
- diff.Text = ""
- }
- }
- // 2. Unfinished Tag
- if unfinishedTag[insert] || unfinishedTag[delete] {
- // we look for a '>' to finish a tag
- idx := strings.IndexRune(diff.Text, '>')
- if idx >= 0 {
- store(diff.Text[:idx+1], insert)
- store(diff.Text[:idx+1], delete)
- diff.Text = diff.Text[idx+1:]
- unfinishedTag[insert] = false
- unfinishedTag[delete] = false
- } else {
- store(diff.Text, insert)
- store(diff.Text, delete)
- diff.Text = ""
- }
- }
-
- // If we've completed the required tag/entities
- if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
- // pop off the stack
- fixedup = pop(insert, fixedup)
- fixedup = pop(delete, fixedup)
- }
-
- // If that has left this diff block empty then shortcut
- if len(diff.Text) == 0 {
- continue
- }
- }
-
- // check if this block ends in an unfinished tag?
- idx := unfinishedtagRegex.FindStringIndex(diff.Text)
- if idx != nil {
- unfinishedTag[insert] = true
- unfinishedTag[delete] = true
- } else {
- // otherwise does it end in an unfinished entity?
- idx = entityRegex.FindStringIndex(diff.Text)
- if idx != nil {
- unfinishedEnt[insert] = true
- unfinishedEnt[delete] = true
- }
- }
-
- // If there is an unfinished component
- if idx != nil {
- // Store the fragment
- store(diff.Text[idx[0]:], insert)
- store(diff.Text[idx[0]:], delete)
- // and remove it from this block
- diff.Text = diff.Text[:idx[0]]
- }
-
- // If that hasn't left the block empty
- if len(diff.Text) > 0 {
- // store the position of the last equal block and store it in our diffs
- last[equal] = len(fixedup)
- fixedup = append(fixedup, diff)
- }
- continue
- case diffmatchpatch.DiffInsert:
- typ = insert
- fallthrough
- case diffmatchpatch.DiffDelete:
- // First check if there is anything stored for this type
- if hasStored(typ) {
- // if there is prepend it to this block, empty the storage and reset our flags
- diff.Text = stored(typ) + diff.Text
- empty(typ)
- unfinishedEnt[typ] = false
- unfinishedTag[typ] = false
- }
-
- // check if this block ends in an unfinished tag
- idx := unfinishedtagRegex.FindStringIndex(diff.Text)
- if idx != nil {
- unfinishedTag[typ] = true
- } else {
- // otherwise does it end in an unfinished entity
- idx = entityRegex.FindStringIndex(diff.Text)
- if idx != nil {
- unfinishedEnt[typ] = true
- }
- }
-
- // If there is an unfinished component
- if idx != nil {
- // Store the fragment
- store(diff.Text[idx[0]:], typ)
- // and remove it from this block
- diff.Text = diff.Text[:idx[0]]
- }
-
- // If that hasn't left the block empty
- if len(diff.Text) > 0 {
- // if the last block of this type was after the last equal block
- if last[typ] > last[equal] {
- // store this blocks content on that block
- fixedup[last[typ]].Text += diff.Text
- } else {
- // otherwise store the position of the last block of this type and store the block
- last[typ] = len(fixedup)
- fixedup = append(fixedup, diff)
- }
- }
- continue
- }
- }
-
- // pop off any remaining stored content
- fixedup = pop(insert, fixedup)
- fixedup = pop(delete, fixedup)
-
- return fixedup
-}
-
-func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline {
+func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
buf := bytes.NewBuffer(nil)
- match := ""
-
- diffs = fixupBrokenSpans(diffs)
-
+ // restore the line wrapper tags and , if necessary
+ for _, tag := range lineWrapperTags {
+ buf.WriteString(tag)
+ }
for _, diff := range diffs {
- if shouldWriteInline(diff, lineType) {
- if len(match) > 0 {
- diff.Text = match + diff.Text
- match = ""
- }
- // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
- // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
- // before writing so we don't try insert added/removed code spans in the middle of one of those
- // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
- // a line ending with an incomplete HTML entity or partial/opening .
-
- // EX:
- // diffs[{Type: dmp.DiffDelete, Text: "language}]
-
- // After first iteration
- // diffs[{Type: dmp.DiffDelete, Text: "language"}, //write out
- // {Type: dmp.DiffEqual, Text: ",}]
-
- // After second iteration
- // {Type: dmp.DiffEqual, Text: ""}, // write out
- // {Type: dmp.DiffDelete, Text: ",}]
-
- // Final
- // {Type: dmp.DiffDelete, Text: ",}]
- // end up writing ,
- // Instead of lass="p",
-
- m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
- if m != nil {
- match = diff.Text[m[0]:m[1]]
- diff.Text = strings.TrimSuffix(diff.Text, match)
- }
- m = entityRegex.FindStringSubmatchIndex(diff.Text)
- if m != nil {
- match = diff.Text[m[0]:m[1]]
- diff.Text = strings.TrimSuffix(diff.Text, match)
- }
- // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
- if strings.HasPrefix(diff.Text, "") {
- buf.WriteString("")
- diff.Text = strings.TrimPrefix(diff.Text, "")
- }
- // If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
- // The previous/next diff section will contain the rest of the tag that is missing here
- if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
- buf.WriteString(diff.Text)
- continue
- }
- }
switch {
case diff.Type == diffmatchpatch.DiffEqual:
buf.WriteString(diff.Text)
@@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
buf.Write(codeTagSuffix)
}
}
- return DiffInlineWithUnicodeEscape(template.HTML(buf.String()))
+ for range lineWrapperTags {
+ buf.WriteString("")
+ }
+ return buf.String()
}
// GetLine gets a specific line by type (add or del) and file line number
@@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
}
- diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
- diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
-
- return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
+ hcd := newHighlightCodeDiff()
+ diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
+ // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
+ // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
+ diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
+ return DiffInlineWithUnicodeEscape(template.HTML(diffHTML))
}
// DiffFile represents a file diff.
@@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
if char == '"' {
fmt.Fscanf(rd, "%q ", &name)
if len(name) == 0 {
- log.Error("Reader has no file name: %v", rd)
+ log.Error("Reader has no file name: reader=%+v", rd)
return "", true
}
@@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
}
}
if len(name) < 2 {
- log.Error("Unable to determine name from reader: %v", rd)
+ log.Error("Unable to determine name from reader: reader=%+v", rd)
return "", true
}
return name[2:], ambiguity
diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go
index caca0e91d8..e88d831759 100644
--- a/services/gitdiff/gitdiff_test.go
+++ b/services/gitdiff/gitdiff_test.go
@@ -7,7 +7,6 @@ package gitdiff
import (
"fmt"
- "html/template"
"strconv"
"strings"
"testing"
@@ -17,93 +16,27 @@ import (
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
- "code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/setting"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
- "gopkg.in/ini.v1"
)
-func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
- if s1 != string(s2) {
- t.Errorf("Did not receive expected results:\nExpected: %s\nActual: %s", s1, s2)
- }
-}
-
func TestDiffToHTML(t *testing.T) {
- setting.Cfg = ini.Empty()
- assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{
+ assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{
{Type: dmp.DiffEqual, Text: "foo "},
{Type: dmp.DiffInsert, Text: "bar"},
{Type: dmp.DiffDelete, Text: " baz"},
{Type: dmp.DiffEqual, Text: " biz"},
- }, DiffLineAdd).Content)
+ }, DiffLineAdd))
- assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{
+ assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{
{Type: dmp.DiffEqual, Text: "foo "},
{Type: dmp.DiffDelete, Text: "bar"},
{Type: dmp.DiffInsert, Text: " baz"},
{Type: dmp.DiffEqual, Text: " biz"},
- }, DiffLineDel).Content)
-
- assertEqual(t, "if !nohl && (lexer != nil || r.GuessLanguage) {", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: "if !nohl && (lexer != nil"},
- {Type: dmp.DiffInsert, Text: " || r.GuessLanguage)"},
- {Type: dmp.DiffEqual, Text: " {"},
- }, DiffLineAdd).Content)
-
- assertEqual(t, "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\", ge.BaseURL, ge.Owner, ge.Repo, from, milestoneID, time.Now().Format("2006-01-02"))", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: "tagURL := fmt.Sprintf("## [%s](%s/%s/%s/%s?q=&type=all&state=closed&milestone=%d) - %s", ge.Milestone\""},
- {Type: dmp.DiffInsert, Text: "f\">getGiteaTagURL(client"},
- {Type: dmp.DiffEqual, Text: ", ge.BaseURL, ge.Owner, ge.Repo, "},
- {Type: dmp.DiffDelete, Text: "from, milestoneID, time.Now().Format("2006-01-02")"},
- {Type: dmp.DiffInsert, Text: "ge.Milestone, from, milestoneID"},
- {Type: dmp.DiffEqual, Text: ")"},
- }, DiffLineDel).Content)
-
- assertEqual(t, "r.WrapperRenderer(w, language, true, attrs, false)", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: "r.WrapperRenderer(w, "},
- {Type: dmp.DiffDelete, Text: "language, true, attrs"},
- {Type: dmp.DiffEqual, Text: ", false)"},
- }, DiffLineDel).Content)
-
- assertEqual(t, "language, true, attrs, false)", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffInsert, Text: "language, true, attrs"},
- {Type: dmp.DiffEqual, Text: ", false)"},
- }, DiffLineAdd).Content)
-
- assertEqual(t, "print("// ", sys.argv)", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: "print"},
- {Type: dmp.DiffInsert, Text: "("},
- {Type: dmp.DiffEqual, Text: ""// ", sys.argv"},
- {Type: dmp.DiffInsert, Text: ")"},
- }, DiffLineAdd).Content)
-
- assertEqual(t, "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: "sh "},
- {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""},
- {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"},
- {Type: dmp.DiffEqual, Text: ";"},
- }, DiffLineAdd).Content)
-
- assertEqual(t, " <h4 class="release-list-title df ac">", diffToHTML("", []dmp.Diff{
- {Type: dmp.DiffEqual, Text: " <h"},
- {Type: dmp.DiffInsert, Text: "4 class="},
- {Type: dmp.DiffEqual, Text: "3"},
- {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""},
- {Type: dmp.DiffEqual, Text: ">"},
- }, DiffLineAdd).Content)
+ }, DiffLineDel))
}
func TestParsePatch_skipTo(t *testing.T) {
@@ -592,7 +525,6 @@ index 0000000..6bb8f39
if err != nil {
t.Errorf("ParsePatch failed: %s", err)
}
- println(result)
diff2 := `diff --git "a/A \\ B" "b/A \\ B"
--- "a/A \\ B"
@@ -712,18 +644,6 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
}
}
-func TestDiffToHTML_14231(t *testing.T) {
- setting.Cfg = ini.Empty()
- diffRecord := diffMatchPatch.DiffMain(highlight.Code("main.v", "", " run()\n"), highlight.Code("main.v", "", " run(db)\n"), true)
- diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
-
- expected := ` run(db)
-`
- output := diffToHTML("main.v", diffRecord, DiffLineAdd)
-
- assertEqual(t, expected, output.Content)
-}
-
func TestNoCrashes(t *testing.T) {
type testcase struct {
gitdiff string
diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go
new file mode 100644
index 0000000000..4ceada4d7e
--- /dev/null
+++ b/services/gitdiff/highlightdiff.go
@@ -0,0 +1,223 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "strings"
+
+ "code.gitea.io/gitea/modules/highlight"
+
+ "github.com/sergi/go-diff/diffmatchpatch"
+)
+
+// token is a html tag or entity, eg: "", "", "<"
+func extractHTMLToken(s string) (before, token, after string, valid bool) {
+ for pos1 := 0; pos1 < len(s); pos1++ {
+ if s[pos1] == '<' {
+ pos2 := strings.IndexByte(s[pos1:], '>')
+ if pos2 == -1 {
+ return "", "", s, false
+ }
+ return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true
+ } else if s[pos1] == '&' {
+ pos2 := strings.IndexByte(s[pos1:], ';')
+ if pos2 == -1 {
+ return "", "", s, false
+ }
+ return s[:pos1], s[pos1 : pos1+pos2+1], s[pos1+pos2+1:], true
+ }
+ }
+ return "", "", s, true
+}
+
+// highlightCodeDiff is used to do diff with highlighted HTML code.
+// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
+// The HTML tags and entities will be replaced by Unicode placeholders: "{TEXT}" => "\uE000{TEXT}\uE001"
+// These Unicode placeholders are friendly to the diff.
+// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities.
+// It's guaranteed that the tags in final diff result are paired correctly.
+type highlightCodeDiff struct {
+ placeholderBegin rune
+ placeholderMaxCount int
+ placeholderIndex int
+ placeholderTokenMap map[rune]string
+ tokenPlaceholderMap map[string]rune
+
+ placeholderOverflowCount int
+
+ lineWrapperTags []string
+}
+
+func newHighlightCodeDiff() *highlightCodeDiff {
+ return &highlightCodeDiff{
+ placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD)
+ placeholderMaxCount: 64000,
+ placeholderTokenMap: map[rune]string{},
+ tokenPlaceholderMap: map[string]rune{},
+ }
+}
+
+// nextPlaceholder returns 0 if no more placeholder can be used
+// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line
+// so the placeholderMaxCount is impossible to be exhausted in real cases.
+func (hcd *highlightCodeDiff) nextPlaceholder() rune {
+ for hcd.placeholderIndex < hcd.placeholderMaxCount {
+ r := hcd.placeholderBegin + rune(hcd.placeholderIndex)
+ hcd.placeholderIndex++
+ // only use non-existing (not used by code) rune as placeholders
+ if _, ok := hcd.placeholderTokenMap[r]; !ok {
+ return r
+ }
+ }
+ return 0 // no more available placeholder
+}
+
+func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool {
+ return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount)
+}
+
+func (hcd *highlightCodeDiff) collectUsedRunes(code string) {
+ for _, r := range code {
+ if hcd.isInPlaceholderRange(r) {
+ // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore.
+ hcd.placeholderTokenMap[r] = ""
+ }
+ }
+}
+
+func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff {
+ hcd.collectUsedRunes(codeA)
+ hcd.collectUsedRunes(codeB)
+
+ highlightCodeA := highlight.Code(filename, language, codeA)
+ highlightCodeB := highlight.Code(filename, language, codeB)
+
+ highlightCodeA = hcd.convertToPlaceholders(highlightCodeA)
+ highlightCodeB = hcd.convertToPlaceholders(highlightCodeB)
+
+ diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true)
+ diffs = diffMatchPatch.DiffCleanupEfficiency(diffs)
+
+ for i := range diffs {
+ hcd.recoverOneDiff(&diffs[i])
+ }
+ return diffs
+}
+
+// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
+func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
+ var tagStack []string
+ res := strings.Builder{}
+
+ firstRunForLineTags := hcd.lineWrapperTags == nil
+
+ var beforeToken, token string
+ var valid bool
+
+ // the standard chroma highlight HTML is " ... "
+ for {
+ beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode)
+ if !valid || token == "" {
+ break
+ }
+ // write the content before the token into result string, and consume the token in the string
+ res.WriteString(beforeToken)
+
+ // the line wrapper tags should be removed before diff
+ if strings.HasPrefix(token, `")
+ continue
+ }
+
+ var tokenInMap string
+ if strings.HasSuffix(token, "") { // for closing tag
+ if len(tagStack) == 0 {
+ break // invalid diff result, no opening tag but see closing tag
+ }
+ // make sure the closing tag in map is related to the open tag, to make the diff algorithm can match the opening/closing tags
+ // the closing tag will be recorded in the map by key "" for ""
+ tokenInMap = token + ""
+ tagStack = tagStack[:len(tagStack)-1]
+ } else if token[0] == '<' { // for opening tag
+ tokenInMap = token
+ tagStack = append(tagStack, token)
+ } else if token[0] == '&' { // for html entity
+ tokenInMap = token
+ } // else: impossible
+
+ // remember the placeholder and token in the map
+ placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap]
+ if !ok {
+ placeholder = hcd.nextPlaceholder()
+ if placeholder != 0 {
+ hcd.tokenPlaceholderMap[tokenInMap] = placeholder
+ hcd.placeholderTokenMap[placeholder] = tokenInMap
+ }
+ }
+
+ if placeholder != 0 {
+ res.WriteRune(placeholder) // use the placeholder to replace the token
+ } else {
+ // unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting
+ // usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma.
+ hcd.placeholderOverflowCount++
+ if strings.HasPrefix(token, "&") {
+ // when the token is a html entity, something must be outputted even if there is no placeholder.
+ res.WriteRune(0xFFFD) // replacement character TODO: how to handle this case more gracefully?
+ res.WriteString(token[1:]) // still output the entity code part, otherwise there will be no diff result.
+ }
+ }
+ }
+
+ // write the remaining string
+ res.WriteString(htmlCode)
+ return res.String()
+}
+
+func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
+ sb := strings.Builder{}
+ var tagStack []string
+
+ for _, r := range diff.Text {
+ token, ok := hcd.placeholderTokenMap[r]
+ if !ok || token == "" {
+ sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
+ continue
+ }
+ var tokenToRecover string
+ if strings.HasPrefix(token, "") { // for closing tag
+ // only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
+ tokenToRecover = token[:strings.IndexByte(token, '>')+1]
+ if len(tagStack) == 0 {
+ continue // if no opening tag in stack yet, skip the closing tag
+ }
+ tagStack = tagStack[:len(tagStack)-1]
+ } else if token[0] == '<' { // for opening tag
+ tokenToRecover = token
+ tagStack = append(tagStack, token)
+ } else if token[0] == '&' { // for html entity
+ tokenToRecover = token
+ } // else: impossible
+ sb.WriteString(tokenToRecover)
+ }
+
+ if len(tagStack) > 0 {
+ // close all opening tags
+ for i := len(tagStack) - 1; i >= 0; i-- {
+ tagToClose := tagStack[i]
+ // get the closing tag "" from "" or ""
+ pos := strings.IndexAny(tagToClose, " >")
+ if pos != -1 {
+ sb.WriteString("" + tagToClose[1:pos] + ">")
+ } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag
+ }
+ }
+
+ diff.Text = sb.String()
+}
diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go
new file mode 100644
index 0000000000..1cd78bc942
--- /dev/null
+++ b/services/gitdiff/highlightdiff_test.go
@@ -0,0 +1,126 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/sergi/go-diff/diffmatchpatch"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestDiffWithHighlight(t *testing.T) {
+ hcd := newHighlightCodeDiff()
+ diffs := hcd.diffWithHighlight(
+ "main.v", "",
+ " run('<>')\n",
+ " run(db)\n",
+ )
+
+ expected := ` run('<>')` + "\n"
+ output := diffToHTML(nil, diffs, DiffLineDel)
+ assert.Equal(t, expected, output)
+
+ expected = ` run(db)` + "\n"
+ output = diffToHTML(nil, diffs, DiffLineAdd)
+ assert.Equal(t, expected, output)
+
+ hcd = newHighlightCodeDiff()
+ hcd.placeholderTokenMap['O'] = ""
+ hcd.placeholderTokenMap['C'] = ""
+ diff := diffmatchpatch.Diff{}
+
+ diff.Text = "OC"
+ hcd.recoverOneDiff(&diff)
+ assert.Equal(t, "", diff.Text)
+
+ diff.Text = "O"
+ hcd.recoverOneDiff(&diff)
+ assert.Equal(t, "", diff.Text)
+
+ diff.Text = "C"
+ hcd.recoverOneDiff(&diff)
+ assert.Equal(t, "", diff.Text)
+}
+
+func TestDiffWithHighlightPlaceholder(t *testing.T) {
+ hcd := newHighlightCodeDiff()
+ diffs := hcd.diffWithHighlight(
+ "main.js", "",
+ "a='\U00100000'",
+ "a='\U0010FFFD''",
+ )
+ assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000])
+ assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD])
+
+ expected := fmt.Sprintf(`a='%s'`, "\U00100000")
+ output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel)
+ assert.Equal(t, expected, output)
+
+ hcd = newHighlightCodeDiff()
+ diffs = hcd.diffWithHighlight(
+ "main.js", "",
+ "a='\U00100000'",
+ "a='\U0010FFFD'",
+ )
+ expected = fmt.Sprintf(`a='%s'`, "\U0010FFFD")
+ output = diffToHTML(nil, diffs, DiffLineAdd)
+ assert.Equal(t, expected, output)
+}
+
+func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
+ hcd := newHighlightCodeDiff()
+ hcd.placeholderMaxCount = 0
+ diffs := hcd.diffWithHighlight(
+ "main.js", "",
+ "'",
+ ``,
+ )
+ output := diffToHTML(nil, diffs, DiffLineDel)
+ expected := fmt.Sprintf(`%s#39;`, "\uFFFD")
+ assert.Equal(t, expected, output)
+
+ hcd = newHighlightCodeDiff()
+ hcd.placeholderMaxCount = 0
+ diffs = hcd.diffWithHighlight(
+ "main.js", "",
+ "a < b",
+ "a > b",
+ )
+ output = diffToHTML(nil, diffs, DiffLineDel)
+ expected = fmt.Sprintf(`a %slt; b`, "\uFFFD")
+ assert.Equal(t, expected, output)
+
+ output = diffToHTML(nil, diffs, DiffLineAdd)
+ expected = fmt.Sprintf(`a %sgt; b`, "\uFFFD")
+ assert.Equal(t, expected, output)
+}
+
+func TestDiffWithHighlightTagMatch(t *testing.T) {
+ totalOverflow := 0
+ for i := 0; i < 100; i++ {
+ hcd := newHighlightCodeDiff()
+ hcd.placeholderMaxCount = i
+ diffs := hcd.diffWithHighlight(
+ "main.js", "",
+ "a='1'",
+ "b='2'",
+ )
+ totalOverflow += hcd.placeholderOverflowCount
+
+ output := diffToHTML(nil, diffs, DiffLineDel)
+ c1 := strings.Count(output, "