Convert EOL to UNIX-style to render MD properly (#8925)

* Convert EOL to UNIX-style to render MD properly

* Update modules/markup/markdown/markdown.go

Co-Authored-By: zeripath <art27@cantab.net>

* Fix lint optimization

* Check for empty content before conversion

* Update modules/util/util.go

Co-Authored-By: zeripath <art27@cantab.net>

* Improved checks and tests

* Add paragraph render test

* Improve speed even more, improve tests

* Small improvement by @gary-kim

* Fix test for DOS

* More improvements

* Restart CI
This commit is contained in:
guillep2k 2019-11-12 23:27:11 -03:00 committed by Antoine GIRARD
parent cda8de2004
commit 7b97e04555
4 changed files with 120 additions and 1 deletions

View file

@ -157,7 +157,8 @@ func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte {
exts |= blackfriday.HardLineBreak exts |= blackfriday.HardLineBreak
} }
body = blackfriday.Run(body, blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts)) // Need to normalize EOL to UNIX LF to have consistent results in rendering
body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts))
return markup.SanitizeBytes(body) return markup.SanitizeBytes(body)
} }

View file

@ -294,3 +294,25 @@ func TestTotal_RenderString(t *testing.T) {
assert.Equal(t, testCases[i+1], line) assert.Equal(t, testCases[i+1], line)
} }
} }
func TestRender_RenderParagraphs(t *testing.T) {
test := func(t *testing.T, str string, cnt int) {
unix := []byte(str)
res := string(RenderRaw(unix, "", false))
assert.Equal(t, strings.Count(res, "<p"), cnt)
mac := []byte(strings.ReplaceAll(str, "\n", "\r"))
res = string(RenderRaw(mac, "", false))
assert.Equal(t, strings.Count(res, "<p"), cnt)
dos := []byte(strings.ReplaceAll(str, "\n", "\r\n"))
res = string(RenderRaw(dos, "", false))
assert.Equal(t, strings.Count(res, "<p"), cnt)
}
test(t, "\nOne\nTwo\nThree", 1)
test(t, "\n\nOne\nTwo\nThree", 1)
test(t, "\n\nOne\nTwo\nThree\n\n\n", 1)
test(t, "A\n\nB\nC\n", 2)
test(t, "A\n\n\nB\nC\n", 2)
}

View file

@ -5,6 +5,7 @@
package util package util
import ( import (
"bytes"
"strings" "strings"
) )
@ -63,3 +64,39 @@ func Min(a, b int) int {
func IsEmptyString(s string) bool { func IsEmptyString(s string) bool {
return len(strings.TrimSpace(s)) == 0 return len(strings.TrimSpace(s)) == 0
} }
// NormalizeEOL will convert Windows (CRLF) and Mac (CR) EOLs to UNIX (LF)
func NormalizeEOL(input []byte) []byte {
var right, left, pos int
if right = bytes.IndexByte(input, '\r'); right == -1 {
return input
}
length := len(input)
tmp := make([]byte, length)
// We know that left < length because otherwise right would be -1 from IndexByte.
copy(tmp[pos:pos+right], input[left:left+right])
pos += right
tmp[pos] = '\n'
left += right + 1
pos++
for left < length {
if input[left] == '\n' {
left++
}
right = bytes.IndexByte(input[left:], '\r')
if right == -1 {
copy(tmp[pos:], input[left:])
pos += length - left
break
}
copy(tmp[pos:pos+right], input[left:left+right])
pos += right
tmp[pos] = '\n'
left += right + 1
pos++
}
return tmp[:pos]
}

View file

@ -5,6 +5,7 @@
package util package util
import ( import (
"strings"
"testing" "testing"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -94,3 +95,61 @@ func TestIsEmptyString(t *testing.T) {
assert.Equal(t, v.expected, IsEmptyString(v.s)) assert.Equal(t, v.expected, IsEmptyString(v.s))
} }
} }
func Test_NormalizeEOL(t *testing.T) {
data1 := []string{
"",
"This text starts with empty lines",
"another",
"",
"",
"",
"Some other empty lines in the middle",
"more.",
"And more.",
"Ends with empty lines too.",
"",
"",
"",
}
data2 := []string{
"This text does not start with empty lines",
"another",
"",
"",
"",
"Some other empty lines in the middle",
"more.",
"And more.",
"Ends without EOLtoo.",
}
buildEOLData := func(data []string, eol string) []byte {
return []byte(strings.Join(data, eol))
}
dos := buildEOLData(data1, "\r\n")
unix := buildEOLData(data1, "\n")
mac := buildEOLData(data1, "\r")
assert.Equal(t, unix, NormalizeEOL(dos))
assert.Equal(t, unix, NormalizeEOL(mac))
assert.Equal(t, unix, NormalizeEOL(unix))
dos = buildEOLData(data2, "\r\n")
unix = buildEOLData(data2, "\n")
mac = buildEOLData(data2, "\r")
assert.Equal(t, unix, NormalizeEOL(dos))
assert.Equal(t, unix, NormalizeEOL(mac))
assert.Equal(t, unix, NormalizeEOL(unix))
assert.Equal(t, []byte("one liner"), NormalizeEOL([]byte("one liner")))
assert.Equal(t, []byte("\n"), NormalizeEOL([]byte("\n")))
assert.Equal(t, []byte("\ntwo liner"), NormalizeEOL([]byte("\ntwo liner")))
assert.Equal(t, []byte("two liner\n"), NormalizeEOL([]byte("two liner\n")))
assert.Equal(t, []byte{}, NormalizeEOL([]byte{}))
assert.Equal(t, []byte("mix\nand\nmatch\n."), NormalizeEOL([]byte("mix\r\nand\rmatch\n.")))
}