Improve SHA1 link detection (#6526)

This improves the SHA1 link detection to not pick up extraneous
non-whitespace characters at the end of the URL. The '.' is a special
case handled in code itself because of missing regexp lookahead
support.

Regex test cases: https://regex101.com/r/xUMlqh/3
This commit is contained in:
silverwind 2019-04-06 20:28:45 +02:00 committed by Lauris BH
parent 0bdd81df9d
commit 2242a9f82e
2 changed files with 39 additions and 24 deletions

View file

@ -54,7 +54,7 @@ var (
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern allows to split url containing SHA into parts
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
if m == nil {
return
}
// take out what's relevant
urlFull := node.Data[m[0]:m[1]]
hash := node.Data[m[2]:m[3]]
text := base.ShortSha(node.Data[m[2]:m[3]])
var subtree, line string
// optional, we do them depending on the length.
if m[7] > 0 {
line = node.Data[m[6]:m[7]]
}
// 3rd capture group matches a optional path
subpath := ""
if m[5] > 0 {
subtree = node.Data[m[4]:m[5]]
subpath = node.Data[m[4]:m[5]]
}
text := base.ShortSha(hash)
if subtree != "" {
text += "/" + subtree
}
if line != "" {
text += " ("
text += line
text += ")"
// 4th capture group matches a optional url hash
hash := ""
if m[7] > 0 {
hash = node.Data[m[6]:m[7]][1:]
}
replaceContent(node, m[0], m[1], createLink(urlFull, text))
start := m[0]
end := m[1]
// If url ends in '.', it's very likely that it is not part of the
// actual url but used to finish a sentence.
if strings.HasSuffix(urlFull, ".") {
end--
urlFull = urlFull[:len(urlFull)-1]
if hash != "" {
hash = hash[:len(hash)-1]
} else if subpath != "" {
subpath = subpath[:len(subpath)-1]
}
}
if subpath != "" {
text += subpath
}
if hash != "" {
text += " (" + hash + ")"
}
replaceContent(node, start, end, createLink(urlFull, text))
}
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that

View file

@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
testCases := map[string][]string{
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"test/unit/event.js",
"L2703",
"/test/unit/event.js",
"#L2703",
},
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"test/unit/event.js",
"/test/unit/event.js",
"",
},
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
},
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
"0705be475092aede1eddae01319ec931fb9c65fc",
"src",
"/src",
"",
},
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"",
"diff-2",
"#diff-2",
},
}