diff --git a/modules/emoji/emoji.go b/modules/emoji/emoji.go index 2a51e61fc..e4b9e5631 100644 --- a/modules/emoji/emoji.go +++ b/modules/emoji/emoji.go @@ -6,8 +6,10 @@ package emoji import ( + "sort" "strings" "sync" + "unicode/utf8" ) // Gemoji is a set of emoji data. @@ -48,6 +50,12 @@ func loadMap() { // process emoji codes and aliases codePairs := make([]string, 0) aliasPairs := make([]string, 0) + + // sort from largest to small so we match combined emoji first + sort.Slice(GemojiData, func(i, j int) bool { + return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji) + }) + for i, e := range GemojiData { if e.Emoji == "" || len(e.Aliases) == 0 { continue @@ -72,6 +80,7 @@ func loadMap() { codeReplacer = strings.NewReplacer(codePairs...) aliasReplacer = strings.NewReplacer(aliasPairs...) }) + } // FromCode retrieves the emoji data based on the provided unicode code (ie, @@ -117,3 +126,21 @@ func ReplaceAliases(s string) string { loadMap() return aliasReplacer.Replace(s) } + +// FindEmojiSubmatchIndex returns index pair of longest emoji in a string +func FindEmojiSubmatchIndex(s string) []int { + loadMap() + + // if rune and string length are the same then no emoji will be present + // similar performance when there is unicode present but almost 200% faster when not + if utf8.RuneCountInString(s) == len(s) { + return nil + } + for j := range GemojiData { + i := strings.Index(s, GemojiData[j].Emoji) + if i != -1 { + return []int{i, i + len(GemojiData[j].Emoji)} + } + } + return nil +} diff --git a/modules/markup/html.go b/modules/markup/html.go index 8fbfee6a5..41248654d 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -65,10 +65,6 @@ var ( // EmojiShortCodeRegex find emoji by alias like :smile: EmojiShortCodeRegex = regexp.MustCompile(`\:[\w\+\-]+\:{1}`) - - // find emoji literal: search all emoji hex range as many times as they appear as - // some emojis (skin color etc..) are just two or more chained together - emojiRegex = regexp.MustCompile(`[\x{1F000}-\x{1FFFF}|\x{2000}-\x{32ff}|\x{fe4e5}-\x{fe4ee}|\x{200D}|\x{FE0F}|\x{e0000}-\x{e007f}]+`) ) // CSS class for action keywords (e.g. "closes: #1") @@ -922,8 +918,7 @@ func emojiShortCodeProcessor(ctx *postProcessCtx, node *html.Node) { // emoji processor to match emoji and add emoji class func emojiProcessor(ctx *postProcessCtx, node *html.Node) { - m := emojiRegex.FindStringSubmatchIndex(node.Data) - + m := emoji.FindEmojiSubmatchIndex(node.Data) if m == nil { return } diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 65d2d327d..686057d11 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -263,7 +263,9 @@ func TestRender_emoji(t *testing.T) { test( "Some text with :smile: in the middle", `
Some text with 😄 in the middle
`) - + test( + "Some text with 😄😄 2 emoji next to each other", + `Some text with 😄😄 2 emoji next to each other
`) // should match nothing test( "2001:0db8:85a3:0000:0000:8a2e:0370:7334",