When creating line diffs do not split within an html entity (#13357) (#13375)

Backport #13357

* When creating line diffs do not split within an html entity

Fix #13342

Signed-off-by: Andrew Thornton <art27@cantab.net>

* Add test case

Signed-off-by: Andrew Thornton <art27@cantab.net>

* improve test

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: techknowlogick <techknowlogick@gitea.io>

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
zeripath 2020-10-31 19:30:23 +00:00 committed by GitHub
parent 52b4b984a5
commit 3f94dffca1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 114 additions and 0 deletions

View file

@ -290,6 +290,9 @@ func init() {
diffMatchPatch.DiffEditCost = 100 diffMatchPatch.DiffEditCost = 100
} }
var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`)
var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`)
// GetComputedInlineDiffFor computes inline diff for the given line. // GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML { func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
if setting.Git.DisableDiffHighlight { if setting.Git.DisableDiffHighlight {
@ -329,9 +332,90 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true) diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
// Now we need to clean up the split entities
diffRecord = unsplitEntities(diffRecord)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
} }
// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html
func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff {
// Unsplitting entities is simple...
//
// Iterate through all be the last records because if we're the last record then there's nothing we can do
for i := 0; i+1 < len(records); i++ {
record := &records[i]
// Look for an unterminated entity at the end of the line
unterminated := unterminatedEntityRE.FindString(record.Text)
if len(unterminated) == 0 {
continue
}
switch record.Type {
case diffmatchpatch.DiffEqual:
// If we're an diff equal we want to give this unterminated entity to our next delete and insert
record.Text = record.Text[0 : len(record.Text)-len(unterminated)]
records[i+1].Text = unterminated + records[i+1].Text
nextType := records[i+1].Type
if nextType == diffmatchpatch.DiffEqual {
continue
}
// if the next in line is a delete then we will want the thing after that to be an insert and so on.
oneAfterType := diffmatchpatch.DiffInsert
if nextType == diffmatchpatch.DiffInsert {
oneAfterType = diffmatchpatch.DiffDelete
}
if i+2 < len(records) && records[i+2].Type == oneAfterType {
records[i+2].Text = unterminated + records[i+2].Text
} else {
records = append(records[:i+2], append([]diffmatchpatch.Diff{
{
Type: oneAfterType,
Text: unterminated,
}}, records[i+2:]...)...)
}
case diffmatchpatch.DiffDelete:
fallthrough
case diffmatchpatch.DiffInsert:
// if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line
targetType := diffmatchpatch.DiffInsert
if record.Type == diffmatchpatch.DiffInsert {
targetType = diffmatchpatch.DiffDelete
}
next := &records[i+1]
if next.Type == diffmatchpatch.DiffEqual {
// if the next is an equal we need to snaffle the entity end off the start and add an delete/insert
if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 {
record.Text += terminal
next.Text = next.Text[len(terminal):]
records = append(records[:i+2], append([]diffmatchpatch.Diff{
{
Type: targetType,
Text: unterminated,
}}, records[i+2:]...)...)
}
} else if next.Type == targetType {
// if the next is an insert we need to snaffle the entity end off the one after that and add it to both.
if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual {
if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 {
record.Text += terminal
next.Text += terminal
records[i+2].Text = records[i+2].Text[len(terminal):]
}
}
}
}
}
return records
}
// DiffFile represents a file diff. // DiffFile represents a file diff.
type DiffFile struct { type DiffFile struct {
Name string Name string

View file

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"github.com/sergi/go-diff/diffmatchpatch"
dmp "github.com/sergi/go-diff/diffmatchpatch" dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"gopkg.in/ini.v1" "gopkg.in/ini.v1"
@ -26,6 +27,35 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
} }
} }
func TestUnsplitEntities(t *testing.T) {
left := "sh &#34;useradd -u 111 jenkins&#34;"
right := "sh &#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;"
diffRecord := diffMatchPatch.DiffMain(left, right, true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
// Now we need to clean up the split entities
diffRecord = unsplitEntities(diffRecord)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
leftRecombined := ""
rightRecombined := ""
for _, record := range diffRecord {
assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
switch record.Type {
case diffmatchpatch.DiffDelete:
leftRecombined += record.Text
case diffmatchpatch.DiffInsert:
rightRecombined += record.Text
default:
leftRecombined += record.Text
rightRecombined += record.Text
}
}
assert.EqualValues(t, left, leftRecombined)
assert.EqualValues(t, right, rightRecombined)
}
func TestDiffToHTML(t *testing.T) { func TestDiffToHTML(t *testing.T) {
setting.Cfg = ini.Empty() setting.Cfg = ini.Empty()
assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{ assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{