Speed up enry.IsVendor
(#15213)
`enry.IsVendor` is kinda slow as it simply iterates across all regexps. This PR ajdusts the regexps to combine them to make this process a little quicker. Related #15143 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
43fb4921e3
commit
ff460ca74d
6 changed files with 116 additions and 4 deletions
70
modules/analyze/vendor.go
Normal file
70
modules/analyze/vendor.go
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package analyze
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-enry/go-enry/v2/data"
|
||||||
|
)
|
||||||
|
|
||||||
|
var isVendorRegExp *regexp.Regexp
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
matchers := data.VendorMatchers
|
||||||
|
|
||||||
|
caretStrings := make([]string, 0, 10)
|
||||||
|
caretShareStrings := make([]string, 0, 10)
|
||||||
|
|
||||||
|
matcherStrings := make([]string, 0, len(matchers))
|
||||||
|
for _, matcher := range matchers {
|
||||||
|
str := matcher.String()
|
||||||
|
if str[0] == '^' {
|
||||||
|
caretStrings = append(caretStrings, str[1:])
|
||||||
|
} else if str[0:5] == "(^|/)" {
|
||||||
|
caretShareStrings = append(caretShareStrings, str[5:])
|
||||||
|
} else {
|
||||||
|
matcherStrings = append(matcherStrings, str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(caretShareStrings)
|
||||||
|
sort.Strings(caretStrings)
|
||||||
|
sort.Strings(matcherStrings)
|
||||||
|
|
||||||
|
sb := &strings.Builder{}
|
||||||
|
sb.WriteString("(?:^(?:")
|
||||||
|
sb.WriteString(caretStrings[0])
|
||||||
|
for _, matcher := range caretStrings[1:] {
|
||||||
|
sb.WriteString(")|(?:")
|
||||||
|
sb.WriteString(matcher)
|
||||||
|
}
|
||||||
|
sb.WriteString("))")
|
||||||
|
sb.WriteString("|")
|
||||||
|
sb.WriteString("(?:(?:^|/)(?:")
|
||||||
|
sb.WriteString(caretShareStrings[0])
|
||||||
|
for _, matcher := range caretShareStrings[1:] {
|
||||||
|
sb.WriteString(")|(?:")
|
||||||
|
sb.WriteString(matcher)
|
||||||
|
}
|
||||||
|
sb.WriteString("))")
|
||||||
|
sb.WriteString("|")
|
||||||
|
sb.WriteString("(?:")
|
||||||
|
sb.WriteString(matcherStrings[0])
|
||||||
|
for _, matcher := range matcherStrings[1:] {
|
||||||
|
sb.WriteString(")|(?:")
|
||||||
|
sb.WriteString(matcher)
|
||||||
|
}
|
||||||
|
sb.WriteString(")")
|
||||||
|
combined := sb.String()
|
||||||
|
isVendorRegExp = regexp.MustCompile(combined)
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsVendor returns whether or not path is a vendor path.
|
||||||
|
func IsVendor(path string) bool {
|
||||||
|
return isVendorRegExp.MatchString(path)
|
||||||
|
}
|
42
modules/analyze/vendor_test.go
Normal file
42
modules/analyze/vendor_test.go
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package analyze
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestIsVendor(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
path string
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"cache/", true},
|
||||||
|
{"random/cache/", true},
|
||||||
|
{"cache", false},
|
||||||
|
{"dependencies/", true},
|
||||||
|
{"Dependencies/", true},
|
||||||
|
{"dependency/", false},
|
||||||
|
{"dist/", true},
|
||||||
|
{"dist", false},
|
||||||
|
{"random/dist/", true},
|
||||||
|
{"random/dist", false},
|
||||||
|
{"deps/", true},
|
||||||
|
{"configure", true},
|
||||||
|
{"a/configure", true},
|
||||||
|
{"config.guess", true},
|
||||||
|
{"config.guess/", false},
|
||||||
|
{".vscode/", true},
|
||||||
|
{"doc/_build/", true},
|
||||||
|
{"a/docs/_build/", true},
|
||||||
|
{"a/dasdocs/_build-vsdoc.js", true},
|
||||||
|
{"a/dasdocs/_build-vsdoc.j", false},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.path, func(t *testing.T) {
|
||||||
|
if got := IsVendor(tt.path); got != tt.want {
|
||||||
|
t.Errorf("IsVendor() = %v, want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -43,7 +43,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
|
|
||||||
sizes := make(map[string]int64)
|
sizes := make(map[string]int64)
|
||||||
err = tree.Files().ForEach(func(f *object.File) error {
|
err = tree.Files().ForEach(func(f *object.File) error {
|
||||||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
||||||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,7 +67,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
||||||
for _, f := range entries {
|
for _, f := range entries {
|
||||||
contentBuf.Reset()
|
contentBuf.Reset()
|
||||||
content = contentBuf.Bytes()
|
content = contentBuf.Bytes()
|
||||||
if f.Size() == 0 || enry.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
|
if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) ||
|
||||||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
|
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
@ -178,7 +178,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) {
|
||||||
|
|
||||||
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
|
func (b *BleveIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
|
||||||
// Ignore vendored files in code search
|
// Ignore vendored files in code search
|
||||||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
|
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,7 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
|
||||||
|
|
||||||
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
|
func (b *ElasticSearchIndexer) addUpdate(batchWriter *io.PipeWriter, batchReader *bufio.Reader, sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
|
||||||
// Ignore vendored files in code search
|
// Ignore vendored files in code search
|
||||||
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
|
if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in a new issue