Run processors on whole of text (#16155)
There is an inefficiency in the design of our processors which means that Emoji and other processors run in order n^2 time. This PR forces the processors to process the entirety of text node before passing back up. The fundamental inefficiency remains but it should be significantly ameliorated. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
6ad5d0a306
commit
0db1048c3a
3 changed files with 414 additions and 316 deletions
|
@ -6,6 +6,7 @@
|
||||||
package emoji
|
package emoji
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
|
||||||
if n.writecount == 2 {
|
if n.writecount == 2 {
|
||||||
n.idx = n.pos
|
n.idx = n.pos
|
||||||
n.end = n.pos + len(p)
|
n.end = n.pos + len(p)
|
||||||
|
n.pos += len(p)
|
||||||
|
return len(p), io.EOF
|
||||||
}
|
}
|
||||||
n.pos += len(p)
|
n.pos += len(p)
|
||||||
return len(p), nil
|
return len(p), nil
|
||||||
|
@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
|
||||||
if n.writecount == 2 {
|
if n.writecount == 2 {
|
||||||
n.idx = n.pos
|
n.idx = n.pos
|
||||||
n.end = n.pos + len(s)
|
n.end = n.pos + len(s)
|
||||||
|
n.pos += len(s)
|
||||||
|
return len(s), io.EOF
|
||||||
}
|
}
|
||||||
n.pos += len(s)
|
n.pos += len(s)
|
||||||
return len(s), nil
|
return len(s), nil
|
||||||
|
|
|
@ -89,6 +89,7 @@ func isLinkStr(link string) bool {
|
||||||
return validLinksPattern.MatchString(link)
|
return validLinksPattern.MatchString(link)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This function is not concurrent safe
|
||||||
func getIssueFullPattern() *regexp.Regexp {
|
func getIssueFullPattern() *regexp.Regexp {
|
||||||
if issueFullPattern == nil {
|
if issueFullPattern == nil {
|
||||||
issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
|
issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
|
||||||
|
@ -566,11 +567,16 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
start := 0
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next && start < len(node.Data) {
|
||||||
// We replace only the first mention; other mentions will be addressed later
|
// We replace only the first mention; other mentions will be addressed later
|
||||||
found, loc := references.FindFirstMentionBytes([]byte(node.Data))
|
found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
|
||||||
if !found {
|
if !found {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
loc.Start += start
|
||||||
|
loc.End += start
|
||||||
mention := node.Data[loc.Start:loc.End]
|
mention := node.Data[loc.Start:loc.End]
|
||||||
var teams string
|
var teams string
|
||||||
teams, ok := ctx.Metas["teams"]
|
teams, ok := ctx.Metas["teams"]
|
||||||
|
@ -582,10 +588,17 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
mentionOrgAndTeam := strings.Split(mention, "/")
|
mentionOrgAndTeam := strings.Split(mention, "/")
|
||||||
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
|
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
|
||||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
|
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
start = 0
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
return
|
start = loc.End
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
|
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
start = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
@ -593,6 +606,8 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
|
m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
|
@ -672,7 +687,7 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
||||||
switch ext := filepath.Ext(link); ext {
|
switch ext := filepath.Ext(link); ext {
|
||||||
// fast path: empty string, ignore
|
// fast path: empty string, ignore
|
||||||
case "":
|
case "":
|
||||||
break
|
// leave image as false
|
||||||
case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
|
case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
|
||||||
image = true
|
image = true
|
||||||
}
|
}
|
||||||
|
@ -748,12 +763,17 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
|
||||||
linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
|
linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
|
||||||
}
|
}
|
||||||
replaceContent(node, m[0], m[1], linkNode)
|
replaceContent(node, m[0], m[1], linkNode)
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if ctx.Metas == nil {
|
if ctx.Metas == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
|
m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
|
@ -771,23 +791,25 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
// TODO if m[4]:m[5] is not nil, then link is to a comment,
|
// TODO if m[4]:m[5] is not nil, then link is to a comment,
|
||||||
// and we should indicate that in the text somehow
|
// and we should indicate that in the text somehow
|
||||||
replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
|
replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
orgRepoID := matchOrg + "/" + matchRepo + id
|
orgRepoID := matchOrg + "/" + matchRepo + id
|
||||||
replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
|
replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
|
||||||
}
|
}
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if ctx.Metas == nil {
|
if ctx.Metas == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
found bool
|
found bool
|
||||||
ref *references.RenderizableReference
|
ref *references.RenderizableReference
|
||||||
)
|
)
|
||||||
|
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
_, exttrack := ctx.Metas["format"]
|
_, exttrack := ctx.Metas["format"]
|
||||||
alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
|
alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
|
||||||
|
|
||||||
|
@ -828,7 +850,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
|
||||||
if ref.Action == references.XRefActionNone {
|
if ref.Action == references.XRefActionNone {
|
||||||
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
|
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
|
||||||
return
|
node = node.NextSibling.NextSibling
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decorate action keywords if actionable
|
// Decorate action keywords if actionable
|
||||||
|
@ -846,6 +869,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
|
Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
|
||||||
}
|
}
|
||||||
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
|
replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
|
||||||
|
node = node.NextSibling.NextSibling.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fullSha1PatternProcessor renders SHA containing URLs
|
// fullSha1PatternProcessor renders SHA containing URLs
|
||||||
|
@ -853,6 +878,9 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if ctx.Metas == nil {
|
if ctx.Metas == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
|
m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
|
@ -897,14 +925,23 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
}
|
}
|
||||||
|
|
||||||
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
|
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// emojiShortCodeProcessor for rendering text like :smile: into emoji
|
// emojiShortCodeProcessor for rendering text like :smile: into emoji
|
||||||
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
|
start := 0
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next && start < len(node.Data) {
|
||||||
|
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
m[0] += start
|
||||||
|
m[1] += start
|
||||||
|
|
||||||
|
start = m[1]
|
||||||
|
|
||||||
alias := node.Data[m[0]:m[1]]
|
alias := node.Data[m[0]:m[1]]
|
||||||
alias = strings.ReplaceAll(alias, ":", "")
|
alias = strings.ReplaceAll(alias, ":", "")
|
||||||
|
@ -914,25 +951,39 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
s := strings.Join(setting.UI.Reactions, " ") + "gitea"
|
s := strings.Join(setting.UI.Reactions, " ") + "gitea"
|
||||||
if strings.Contains(s, alias) {
|
if strings.Contains(s, alias) {
|
||||||
replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
|
replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
|
||||||
return
|
node = node.NextSibling.NextSibling
|
||||||
|
start = 0
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
return
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
|
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
start = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// emoji processor to match emoji and add emoji class
|
// emoji processor to match emoji and add emoji class
|
||||||
func emojiProcessor(ctx *RenderContext, node *html.Node) {
|
func emojiProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
m := emoji.FindEmojiSubmatchIndex(node.Data)
|
start := 0
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next && start < len(node.Data) {
|
||||||
|
m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
m[0] += start
|
||||||
|
m[1] += start
|
||||||
|
|
||||||
codepoint := node.Data[m[0]:m[1]]
|
codepoint := node.Data[m[0]:m[1]]
|
||||||
|
start = m[1]
|
||||||
val := emoji.FromCode(codepoint)
|
val := emoji.FromCode(codepoint)
|
||||||
if val != nil {
|
if val != nil {
|
||||||
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
|
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
start = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -942,10 +993,17 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
|
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
|
|
||||||
|
start := 0
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next && start < len(node.Data) {
|
||||||
|
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
m[2] += start
|
||||||
|
m[3] += start
|
||||||
|
|
||||||
hash := node.Data[m[2]:m[3]]
|
hash := node.Data[m[2]:m[3]]
|
||||||
// The regex does not lie, it matches the hash pattern.
|
// The regex does not lie, it matches the hash pattern.
|
||||||
// However, a regex cannot know if a hash actually exists or not.
|
// However, a regex cannot know if a hash actually exists or not.
|
||||||
|
@ -959,32 +1017,46 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
|
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
|
||||||
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
|
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
|
||||||
}
|
}
|
||||||
return
|
start = m[3]
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
replaceContent(node, m[2], m[3],
|
replaceContent(node, m[2], m[3],
|
||||||
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
|
createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
|
||||||
|
start = 0
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// emailAddressProcessor replaces raw email addresses with a mailto: link.
|
// emailAddressProcessor replaces raw email addresses with a mailto: link.
|
||||||
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
|
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := emailRegex.FindStringSubmatchIndex(node.Data)
|
m := emailRegex.FindStringSubmatchIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
mail := node.Data[m[2]:m[3]]
|
mail := node.Data[m[2]:m[3]]
|
||||||
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
|
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
|
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
|
||||||
// markdown.
|
// markdown.
|
||||||
func linkProcessor(ctx *RenderContext, node *html.Node) {
|
func linkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
uri := node.Data[m[0]:m[1]]
|
uri := node.Data[m[0]:m[1]]
|
||||||
replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
|
replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func genDefaultLinkProcessor(defaultLink string) processor {
|
func genDefaultLinkProcessor(defaultLink string) processor {
|
||||||
|
@ -1008,12 +1080,17 @@ func genDefaultLinkProcessor(defaultLink string) processor {
|
||||||
|
|
||||||
// descriptionLinkProcessor creates links for DescriptionHTML
|
// descriptionLinkProcessor creates links for DescriptionHTML
|
||||||
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
|
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
|
next := node.NextSibling
|
||||||
|
for node != nil && node != next {
|
||||||
m := common.LinkRegex.FindStringIndex(node.Data)
|
m := common.LinkRegex.FindStringIndex(node.Data)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
uri := node.Data[m[0]:m[1]]
|
uri := node.Data[m[0]:m[1]]
|
||||||
replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
|
replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
|
||||||
|
node = node.NextSibling.NextSibling
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func createDescriptionLink(href, content string) *html.Node {
|
func createDescriptionLink(href, content string) *html.Node {
|
||||||
|
|
|
@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, data, res.String())
|
assert.Equal(t, data, res.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkEmojiPostprocess(b *testing.B) {
|
||||||
|
data := "🥰 "
|
||||||
|
for len(data) < 1<<16 {
|
||||||
|
data += data
|
||||||
|
}
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
var res strings.Builder
|
||||||
|
err := PostProcess(&RenderContext{
|
||||||
|
URLPrefix: "https://example.com",
|
||||||
|
Metas: localMetas,
|
||||||
|
}, strings.NewReader(data), &res)
|
||||||
|
assert.NoError(b, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Reference in a new issue