[PERFORMANCE] git check-attr on bare repo if supported

This commit is contained in:
oliverpool 2024-03-24 12:44:30 +01:00
parent d5a3f14063
commit 3c81f7478c
17 changed files with 450 additions and 386 deletions

View file

@ -36,6 +36,7 @@ var (
SupportProcReceive bool // >= 2.29 SupportProcReceive bool // >= 2.29
SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an experimental curiosity SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an experimental curiosity
InvertedGitFlushEnv bool // 2.43.1 InvertedGitFlushEnv bool // 2.43.1
SupportCheckAttrOnBare bool // >= 2.40
gitVersion *version.Version gitVersion *version.Version
) )
@ -187,6 +188,7 @@ func InitFull(ctx context.Context) (err error) {
} }
SupportProcReceive = CheckGitVersionAtLeast("2.29") == nil SupportProcReceive = CheckGitVersionAtLeast("2.29") == nil
SupportHashSha256 = CheckGitVersionAtLeast("2.42") == nil && !isGogit SupportHashSha256 = CheckGitVersionAtLeast("2.42") == nil && !isGogit
SupportCheckAttrOnBare = CheckGitVersionAtLeast("2.40") == nil
if SupportHashSha256 { if SupportHashSha256 {
SupportedObjectFormats = append(SupportedObjectFormats, Sha256ObjectFormat) SupportedObjectFormats = append(SupportedObjectFormats, Sha256ObjectFormat)
} else { } else {

View file

@ -7,65 +7,147 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"io"
"os" "os"
"strings"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/optional"
) )
// CheckAttributeOpts represents the possible options to CheckAttribute var LinguistAttributes = []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}
type CheckAttributeOpts struct {
CachedOnly bool // GitAttribute exposes an attribute from the .gitattribute file
AllAttributes bool type GitAttribute string //nolint:revive
Attributes []string
Filenames []string // IsSpecified returns true if the gitattribute is set and not empty
IndexFile string func (ca GitAttribute) IsSpecified() bool {
WorkTree string return ca != "" && ca != "unspecified"
} }
// CheckAttribute return the Blame object of file // String returns the value of the attribute or "" if unspecified
func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { func (ca GitAttribute) String() string {
env := []string{} if !ca.IsSpecified() {
return ""
if len(opts.IndexFile) > 0 {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
} }
if len(opts.WorkTree) > 0 { return string(ca)
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
} }
if len(env) > 0 { // Prefix returns the value of the attribute before any question mark '?'
env = append(os.Environ(), env...) //
// sometimes used within gitlab-language: https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
func (ca GitAttribute) Prefix() string {
s := ca.String()
if i := strings.IndexByte(s, '?'); i >= 0 {
return s[:i]
}
return s
} }
stdOut := new(bytes.Buffer) // Bool returns true if "set"/"true", false if "unset"/"false", none otherwise
stdErr := new(bytes.Buffer) func (ca GitAttribute) Bool() optional.Option[bool] {
switch ca {
cmd := NewCommand(repo.Ctx, "check-attr", "-z") case "set", "true":
return optional.Some(true)
if opts.AllAttributes { case "unset", "false":
cmd.AddArguments("-a") return optional.Some(false)
} else {
for _, attribute := range opts.Attributes {
if attribute != "" {
cmd.AddDynamicArguments(attribute)
} }
return optional.None[bool]()
}
// GitAttributeFirst returns the first specified attribute
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeFirst(treeish, filename string, attributes ...string) (GitAttribute, error) {
values, err := repo.GitAttributes(treeish, filename, attributes...)
if err != nil {
return "", err
}
for _, a := range attributes {
if values[a].IsSpecified() {
return values[a], nil
}
}
return "", nil
}
func (repo *Repository) gitCheckAttrCommand(treeish string, attributes ...string) (*Command, *RunOpts, context.CancelFunc, error) {
if len(attributes) == 0 {
return nil, nil, nil, fmt.Errorf("no provided attributes to check-attr")
}
env := os.Environ()
var deleteTemporaryFile context.CancelFunc
// git < 2.40 cannot run check-attr on bare repo, but needs INDEX + WORK_TREE
hasIndex := treeish == ""
if !hasIndex && !SupportCheckAttrOnBare {
indexFilename, worktree, cancel, err := repo.ReadTreeToTemporaryIndex(treeish)
if err != nil {
return nil, nil, nil, err
}
deleteTemporaryFile = cancel
env = append(env, "GIT_INDEX_FILE="+indexFilename, "GIT_WORK_TREE="+worktree)
hasIndex = true
// clear treeish to read from provided index/work_tree
treeish = ""
}
ctx, cancel := context.WithCancel(repo.Ctx)
if deleteTemporaryFile != nil {
ctxCancel := cancel
cancel = func() {
ctxCancel()
deleteTemporaryFile()
} }
} }
if opts.CachedOnly { cmd := NewCommand(ctx, "check-attr", "-z")
if hasIndex {
cmd.AddArguments("--cached") cmd.AddArguments("--cached")
} }
cmd.AddDashesAndList(opts.Filenames...) if len(treeish) > 0 {
cmd.AddArguments("--source")
cmd.AddDynamicArguments(treeish)
}
cmd.AddDynamicArguments(attributes...)
if err := cmd.Run(&RunOpts{ // Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
if InvertedGitFlushEnv {
env = append(env, "GIT_FLUSH=0")
} else {
env = append(env, "GIT_FLUSH=1")
}
return cmd, &RunOpts{
Env: env, Env: env,
Dir: repo.Path, Dir: repo.Path,
Stdout: stdOut, }, cancel, nil
Stderr: stdErr, }
}); err != nil {
// GitAttributes returns gitattribute.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributes(treeish, filename string, attributes ...string) (map[string]GitAttribute, error) {
cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil {
return nil, err
}
defer cancel()
stdOut := new(bytes.Buffer)
runOpts.Stdout = stdOut
stdErr := new(bytes.Buffer)
runOpts.Stderr = stdErr
cmd.AddDashesAndList(filename)
if err := cmd.Run(runOpts); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String()) return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
} }
@ -76,155 +158,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return nil, fmt.Errorf("wrong number of fields in return from check-attr") return nil, fmt.Errorf("wrong number of fields in return from check-attr")
} }
name2attribute2info := make(map[string]map[string]string) values := make(map[string]GitAttribute, len(attributes))
for ; len(fields) >= 3; fields = fields[3:] {
for i := 0; i < (len(fields) / 3); i++ { // filename := string(fields[0])
filename := string(fields[3*i]) attribute := string(fields[1])
attribute := string(fields[3*i+1]) value := string(fields[2])
info := string(fields[3*i+2]) values[attribute] = GitAttribute(value)
attribute2info := name2attribute2info[filename]
if attribute2info == nil {
attribute2info = make(map[string]string)
} }
attribute2info[attribute] = info return values, nil
name2attribute2info[filename] = attribute2info
}
return name2attribute2info, nil
}
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Attributes []string
Repo *Repository
IndexFile string
WorkTree string
stdinReader io.ReadCloser
stdinWriter *os.File
stdOut attributeWriter
cmd *Command
env []string
ctx context.Context
cancel context.CancelFunc
}
// Init initializes the CheckAttributeReader
func (c *CheckAttributeReader) Init(ctx context.Context) error {
if len(c.Attributes) == 0 {
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple)
lw.closed = make(chan struct{})
c.stdOut = lw
c.stdOut.Close()
return fmt.Errorf("no provided Attributes to check")
}
c.ctx, c.cancel = context.WithCancel(ctx)
c.cmd = NewCommand(c.ctx, "check-attr", "--stdin", "-z")
if len(c.IndexFile) > 0 {
c.cmd.AddArguments("--cached")
c.env = append(c.env, "GIT_INDEX_FILE="+c.IndexFile)
}
if len(c.WorkTree) > 0 {
c.env = append(c.env, "GIT_WORK_TREE="+c.WorkTree)
}
// Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
if InvertedGitFlushEnv {
c.env = append(c.env, "GIT_FLUSH=0")
} else {
c.env = append(c.env, "GIT_FLUSH=1")
}
c.cmd.AddDynamicArguments(c.Attributes...)
var err error
c.stdinReader, c.stdinWriter, err = os.Pipe()
if err != nil {
c.cancel()
return err
}
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, 5)
lw.closed = make(chan struct{})
c.stdOut = lw
return nil
}
// Run run cmd
func (c *CheckAttributeReader) Run() error {
defer func() {
_ = c.stdinReader.Close()
_ = c.stdOut.Close()
}()
stdErr := new(bytes.Buffer)
err := c.cmd.Run(&RunOpts{
Env: c.env,
Dir: c.Repo.Path,
Stdin: c.stdinReader,
Stdout: c.stdOut,
Stderr: stdErr,
})
if err != nil && // If there is an error we need to return but:
c.ctx.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed
return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
}
return nil
}
// CheckPath check attr for given path
func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err error) {
defer func() {
if err != nil && err != c.ctx.Err() {
log.Error("Unexpected error when checking path %s in %s. Error: %v", path, c.Repo.Path, err)
}
}()
select {
case <-c.ctx.Done():
return nil, c.ctx.Err()
default:
}
if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil {
defer c.Close()
return nil, err
}
rs = make(map[string]string)
for range c.Attributes {
select {
case attr, ok := <-c.stdOut.ReadAttribute():
if !ok {
return nil, c.ctx.Err()
}
rs[attr.Attribute] = attr.Value
case <-c.ctx.Done():
return nil, c.ctx.Err()
}
}
return rs, nil
}
// Close close pip after use
func (c *CheckAttributeReader) Close() error {
c.cancel()
err := c.stdinWriter.Close()
return err
}
type attributeWriter interface {
io.WriteCloser
ReadAttribute() <-chan attributeTriple
} }
type attributeTriple struct { type attributeTriple struct {
@ -275,10 +216,6 @@ func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) {
return len(p), nil return len(p), nil
} }
func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple {
return wr.attributes
}
func (wr *nulSeparatedAttributeWriter) Close() error { func (wr *nulSeparatedAttributeWriter) Close() error {
select { select {
case <-wr.closed: case <-wr.closed:
@ -290,49 +227,87 @@ func (wr *nulSeparatedAttributeWriter) Close() error {
return nil return nil
} }
// Create a check attribute reader for the current repository and provided commit ID // GitAttributeChecker creates an AttributeChecker for the given repository and provided commit ID.
func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeReader, context.CancelFunc) { //
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) // If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeChecker(treeish string, attributes ...string) (AttributeChecker, error) {
cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil { if err != nil {
return nil, func() {} return AttributeChecker{}, err
} }
checker := &CheckAttributeReader{ ac := AttributeChecker{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}, attributeNumber: len(attributes),
Repo: repo, ctx: cmd.parentContext,
IndexFile: indexFilename, cancel: cancel, // will be cancelled on Close
WorkTree: worktree,
} }
ctx, cancel := context.WithCancel(repo.Ctx)
if err := checker.Init(ctx); err != nil { stdinReader, stdinWriter, err := os.Pipe()
log.Error("Unable to open checker for %s. Error: %v", commitID, err) if err != nil {
} else { ac.cancel()
return AttributeChecker{}, err
}
ac.stdinWriter = stdinWriter // will be closed on Close
lw := new(nulSeparatedAttributeWriter)
lw.attributes = make(chan attributeTriple, len(attributes))
lw.closed = make(chan struct{})
ac.attributesCh = lw.attributes
cmd.AddArguments("--stdin")
go func() { go func() {
err := checker.Run() defer stdinReader.Close()
if err != nil && err != ctx.Err() { defer lw.Close()
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
stdErr := new(bytes.Buffer)
runOpts.Stdin = stdinReader
runOpts.Stdout = lw
runOpts.Stderr = stdErr
err := cmd.Run(runOpts)
if err != nil && // If there is an error we need to return but:
cmd.parentContext.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed
log.Error("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String())
} }
cancel()
}() }()
}
deferable := func() { return ac, nil
_ = checker.Close()
cancel()
deleteTemporaryFile()
} }
return checker, deferable type AttributeChecker struct {
ctx context.Context
cancel context.CancelFunc
stdinWriter *os.File
attributeNumber int
attributesCh <-chan attributeTriple
} }
// true if "set"/"true", false if "unset"/"false", none otherwise func (ac AttributeChecker) CheckPath(path string) (map[string]GitAttribute, error) {
func attributeToBool(attr map[string]string, name string) optional.Option[bool] { if err := ac.ctx.Err(); err != nil {
if value, has := attr[name]; has && value != "unspecified" { return nil, err
switch value { }
case "set", "true":
return optional.Some(true) if _, err := ac.stdinWriter.Write([]byte(path + "\x00")); err != nil {
case "unset", "false": return nil, err
return optional.Some(false) }
rs := make(map[string]GitAttribute)
for i := 0; i < ac.attributeNumber; i++ {
select {
case attr, ok := <-ac.attributesCh:
if !ok {
return nil, ac.ctx.Err()
}
rs[attr.Attribute] = GitAttribute(attr.Value)
case <-ac.ctx.Done():
return nil, ac.ctx.Err()
} }
} }
return optional.None[bool]() return rs, nil
}
func (ac AttributeChecker) Close() error {
ac.cancel()
return ac.stdinWriter.Close()
} }

View file

@ -4,10 +4,14 @@
package git package git
import ( import (
"path/filepath"
"testing" "testing"
"time" "time"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
@ -22,7 +26,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
assert.Len(t, testStr, n) assert.Len(t, testStr, n)
assert.NoError(t, err) assert.NoError(t, err)
select { select {
case attr := <-wr.ReadAttribute(): case attr := <-wr.attributes:
assert.Equal(t, ".gitignore\"\n", attr.Filename) assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute) assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value) assert.Equal(t, "unspecified", attr.Value)
@ -36,7 +40,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
select { select {
case attr := <-wr.ReadAttribute(): case attr := <-wr.attributes:
assert.Equal(t, ".gitignore\"\n", attr.Filename) assert.Equal(t, ".gitignore\"\n", attr.Filename)
assert.Equal(t, "linguist-vendored", attr.Attribute) assert.Equal(t, "linguist-vendored", attr.Attribute)
assert.Equal(t, "unspecified", attr.Value) assert.Equal(t, "unspecified", attr.Value)
@ -51,14 +55,14 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
select { select {
case <-wr.ReadAttribute(): case <-wr.attributes:
assert.FailNow(t, "There should not be an attribute ready to read") assert.FailNow(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond): case <-time.After(100 * time.Millisecond):
} }
_, err = wr.Write([]byte("attribute\x00")) _, err = wr.Write([]byte("attribute\x00"))
assert.NoError(t, err) assert.NoError(t, err)
select { select {
case <-wr.ReadAttribute(): case <-wr.attributes:
assert.FailNow(t, "There should not be an attribute ready to read") assert.FailNow(t, "There should not be an attribute ready to read")
case <-time.After(100 * time.Millisecond): case <-time.After(100 * time.Millisecond):
} }
@ -66,28 +70,28 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
_, err = wr.Write([]byte("value\x00")) _, err = wr.Write([]byte("value\x00"))
assert.NoError(t, err) assert.NoError(t, err)
attr := <-wr.ReadAttribute() attr := <-wr.attributes
assert.Equal(t, "incomplete-filename", attr.Filename) assert.Equal(t, "incomplete-filename", attr.Filename)
assert.Equal(t, "attribute", attr.Attribute) assert.Equal(t, "attribute", attr.Attribute)
assert.Equal(t, "value", attr.Value) assert.Equal(t, "value", attr.Value)
_, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00")) _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00"))
assert.NoError(t, err) assert.NoError(t, err)
attr = <-wr.ReadAttribute() attr = <-wr.attributes
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{ assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor", Filename: "shouldbe.vendor",
Attribute: "linguist-vendored", Attribute: "linguist-vendored",
Value: "set", Value: "set",
}, attr) }, attr)
attr = <-wr.ReadAttribute() attr = <-wr.attributes
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{ assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor", Filename: "shouldbe.vendor",
Attribute: "linguist-generated", Attribute: "linguist-generated",
Value: "unspecified", Value: "unspecified",
}, attr) }, attr)
attr = <-wr.ReadAttribute() attr = <-wr.attributes
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, attributeTriple{ assert.EqualValues(t, attributeTriple{
Filename: "shouldbe.vendor", Filename: "shouldbe.vendor",
@ -95,3 +99,112 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) {
Value: "unspecified", Value: "unspecified",
}, attr) }, attr)
} }
func TestGitAttributeBareNonBare(t *testing.T) {
if !SupportCheckAttrOnBare {
t.Skip("git check-attr supported on bare repo starting with git 2.40")
}
repoPath := filepath.Join(testReposDir, "language_stats_repo")
gitRepo, err := openRepositoryWithDefaultContext(repoPath)
require.NoError(t, err)
defer gitRepo.Close()
for _, commitID := range []string{
"8fee858da5796dfb37704761701bb8e800ad9ef3",
"341fca5b5ea3de596dc483e54c2db28633cd2f97",
} {
t.Run("GitAttributeChecker/"+commitID, func(t *testing.T) {
bareChecker, err := gitRepo.GitAttributeChecker(commitID, LinguistAttributes...)
assert.NoError(t, err)
t.Cleanup(func() { bareChecker.Close() })
bareStats, err := bareChecker.CheckPath("i-am-a-python.p")
assert.NoError(t, err)
defer test.MockVariableValue(&SupportCheckAttrOnBare, false)()
cloneChecker, err := gitRepo.GitAttributeChecker(commitID, LinguistAttributes...)
assert.NoError(t, err)
t.Cleanup(func() { cloneChecker.Close() })
cloneStats, err := cloneChecker.CheckPath("i-am-a-python.p")
assert.NoError(t, err)
assert.EqualValues(t, cloneStats, bareStats)
})
t.Run("GitAttributes/"+commitID, func(t *testing.T) {
bareStats, err := gitRepo.GitAttributes(commitID, "i-am-a-python.p", LinguistAttributes...)
assert.NoError(t, err)
defer test.MockVariableValue(&SupportCheckAttrOnBare, false)()
cloneStats, err := gitRepo.GitAttributes(commitID, "i-am-a-python.p", LinguistAttributes...)
assert.NoError(t, err)
assert.EqualValues(t, cloneStats, bareStats)
})
}
}
func TestGitAttributes(t *testing.T) {
repoPath := filepath.Join(testReposDir, "language_stats_repo")
gitRepo, err := openRepositoryWithDefaultContext(repoPath)
require.NoError(t, err)
defer gitRepo.Close()
attr, err := gitRepo.GitAttributes("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", LinguistAttributes...)
assert.NoError(t, err)
assert.EqualValues(t, map[string]GitAttribute{
"gitlab-language": "unspecified",
"linguist-detectable": "unspecified",
"linguist-documentation": "unspecified",
"linguist-generated": "unspecified",
"linguist-language": "Python",
"linguist-vendored": "unspecified",
}, attr)
attr, err = gitRepo.GitAttributes("341fca5b5ea3de596dc483e54c2db28633cd2f97", "i-am-a-python.p", LinguistAttributes...)
assert.NoError(t, err)
assert.EqualValues(t, map[string]GitAttribute{
"gitlab-language": "unspecified",
"linguist-detectable": "unspecified",
"linguist-documentation": "unspecified",
"linguist-generated": "unspecified",
"linguist-language": "Cobra",
"linguist-vendored": "unspecified",
}, attr)
}
func TestGitAttributeFirst(t *testing.T) {
repoPath := filepath.Join(testReposDir, "language_stats_repo")
gitRepo, err := openRepositoryWithDefaultContext(repoPath)
require.NoError(t, err)
defer gitRepo.Close()
t.Run("first is specified", func(t *testing.T) {
language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "linguist-language", "gitlab-language")
assert.NoError(t, err)
assert.Equal(t, "Python", language.String())
})
t.Run("second is specified", func(t *testing.T) {
language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "gitlab-language", "linguist-language")
assert.NoError(t, err)
assert.Equal(t, "Python", language.String())
})
t.Run("none is specified", func(t *testing.T) {
language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "linguist-detectable", "gitlab-language", "non-existing")
assert.NoError(t, err)
assert.Equal(t, "", language.String())
})
}
func TestGitAttributeStruct(t *testing.T) {
assert.Equal(t, "", GitAttribute("").String())
assert.Equal(t, "", GitAttribute("unspecified").String())
assert.Equal(t, "python", GitAttribute("python").String())
assert.Equal(t, "text?token=Error", GitAttribute("text?token=Error").String())
assert.Equal(t, "text", GitAttribute("text?token=Error").Prefix())
}

View file

@ -8,8 +8,8 @@ package git
import ( import (
"bytes" "bytes"
"cmp"
"io" "io"
"strings"
"code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
@ -61,8 +61,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err return nil, err
} }
checker, deferable := repo.CheckAttributeReader(commitID) checker, err := repo.GitAttributeChecker(commitID, LinguistAttributes...)
defer deferable() if err != nil {
return nil, err
}
defer checker.Close()
contentBuf := bytes.Buffer{} contentBuf := bytes.Buffer{}
var content []byte var content []byte
@ -102,14 +105,16 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
isDocumentation := optional.None[bool]() isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]() isDetectable := optional.None[bool]()
if checker != nil {
attrs, err := checker.CheckPath(f.Name()) attrs, err := checker.CheckPath(f.Name())
if err == nil { if err == nil {
isVendored = attributeToBool(attrs, "linguist-vendored") isVendored = attrs["linguist-vendored"].Bool()
isGenerated = attributeToBool(attrs, "linguist-generated") isGenerated = attrs["linguist-generated"].Bool()
isDocumentation = attributeToBool(attrs, "linguist-documentation") isDocumentation = attrs["linguist-documentation"].Bool()
isDetectable = attributeToBool(attrs, "linguist-detectable") isDetectable = attrs["linguist-detectable"].Bool()
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { if language := cmp.Or(
attrs["linguist-language"].String(),
attrs["gitlab-language"].Prefix(),
); language != "" {
// group languages, such as Pug -> HTML; SCSS -> CSS // group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language) group := enry.GetLanguageGroup(language)
if len(group) != 0 { if len(group) != 0 {
@ -119,24 +124,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// this language will always be added to the size // this language will always be added to the size
sizes[language] += f.Size() sizes[language] += f.Size()
continue continue
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
// this language will always be added to the size
sizes[language] += f.Size()
continue
}
}
} }
} }

View file

@ -1,5 +1,5 @@
[core] [core]
repositoryformatversion = 0 repositoryformatversion = 0
filemode = true filemode = true
bare = false bare = true
logallrefupdates = true logallrefupdates = true

View file

@ -1 +1,2 @@
0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton <art27@cantab.net> 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats 0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton <art27@cantab.net> 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats
8fee858da5796dfb37704761701bb8e800ad9ef3 341fca5b5ea3de596dc483e54c2db28633cd2f97 oliverpool <git@olivier.pfad.fr> 1711278775 +0100 push

View file

@ -1 +1,2 @@
0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton <art27@cantab.net> 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats 0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton <art27@cantab.net> 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats
8fee858da5796dfb37704761701bb8e800ad9ef3 341fca5b5ea3de596dc483e54c2db28633cd2f97 oliverpool <git@olivier.pfad.fr> 1711278775 +0100 push

View file

@ -1 +1 @@
8fee858da5796dfb37704761701bb8e800ad9ef3 341fca5b5ea3de596dc483e54c2db28633cd2f97

View file

@ -145,30 +145,13 @@ func LFSLocks(ctx *context.Context) {
return return
} }
name2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{ ctx.Data["Lockables"], err = lockablesGitAttributes(gitRepo, lfsLocks)
Attributes: []string{"lockable"},
Filenames: filenames,
CachedOnly: true,
})
if err != nil { if err != nil {
log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err) log.Error("Unable to get lockablesGitAttributes in %s (%v)", tmpBasePath, err)
ctx.ServerError("LFSLocks", err) ctx.ServerError("LFSLocks", err)
return return
} }
lockables := make([]bool, len(lfsLocks))
for i, lock := range lfsLocks {
attribute2info, has := name2attribute2info[lock.Path]
if !has {
continue
}
if attribute2info["lockable"] != "set" {
continue
}
lockables[i] = true
}
ctx.Data["Lockables"] = lockables
filelist, err := gitRepo.LsFiles(filenames...) filelist, err := gitRepo.LsFiles(filenames...)
if err != nil { if err != nil {
log.Error("Unable to lsfiles in %s (%v)", tmpBasePath, err) log.Error("Unable to lsfiles in %s (%v)", tmpBasePath, err)
@ -189,6 +172,24 @@ func LFSLocks(ctx *context.Context) {
ctx.HTML(http.StatusOK, tplSettingsLFSLocks) ctx.HTML(http.StatusOK, tplSettingsLFSLocks)
} }
func lockablesGitAttributes(gitRepo *git.Repository, lfsLocks []*git_model.LFSLock) ([]bool, error) {
checker, err := gitRepo.GitAttributeChecker("", "lockable")
if err != nil {
return nil, fmt.Errorf("could not GitAttributeChecker: %w", err)
}
defer checker.Close()
lockables := make([]bool, len(lfsLocks))
for i, lock := range lfsLocks {
attrs, err := checker.CheckPath(lock.Path)
if err != nil {
return nil, fmt.Errorf("could not CheckPath(%s): %w", lock.Path, err)
}
lockables[i] = attrs["lockable"].Bool().Value()
}
return lockables, nil
}
// LFSLockFile locks a file // LFSLockFile locks a file
func LFSLockFile(ctx *context.Context) { func LFSLockFile(ctx *context.Context) {
if !setting.LFS.StartServer { if !setting.LFS.StartServer {

View file

@ -643,17 +643,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
} }
if ctx.Repo.GitRepo != nil { if ctx.Repo.GitRepo != nil {
checker, deferable := ctx.Repo.GitRepo.CheckAttributeReader(ctx.Repo.CommitID) attrs, err := ctx.Repo.GitRepo.GitAttributes(ctx.Repo.CommitID, ctx.Repo.TreePath, "linguist-vendored", "linguist-generated")
if checker != nil { if err != nil {
defer deferable() log.Error("GitAttributes(%s, %s) failed: %v", ctx.Repo.CommitID, ctx.Repo.TreePath, err)
attrs, err := checker.CheckPath(ctx.Repo.TreePath) } else {
if err == nil { ctx.Data["IsVendored"] = attrs["linguist-vendored"].Bool().Value()
vendored, has := attrs["linguist-vendored"] ctx.Data["IsGenerated"] = attrs["linguist-generated"].Bool().Value()
ctx.Data["IsVendored"] = has && (vendored == "set" || vendored == "true")
generated, has := attrs["linguist-generated"]
ctx.Data["IsGenerated"] = has && (generated == "set" || generated == "true")
}
} }
} }

View file

@ -7,6 +7,7 @@ package gitdiff
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"cmp"
"context" "context"
"fmt" "fmt"
"html" "html"
@ -1172,38 +1173,32 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
} }
diff.Start = opts.SkipTo diff.Start = opts.SkipTo
checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID) checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...)
defer deferable() if err != nil {
return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err)
}
defer checker.Close()
for _, diffFile := range diff.Files { for _, diffFile := range diff.Files {
gotVendor := false gotVendor := false
gotGenerated := false gotGenerated := false
if checker != nil {
attrs, err := checker.CheckPath(diffFile.Name) attrs, err := checker.CheckPath(diffFile.Name)
if err == nil { if err != nil {
if vendored, has := attrs["linguist-vendored"]; has { log.Error("checker.CheckPath(%s) failed: %v", diffFile.Name, err)
if vendored == "set" || vendored == "true" {
diffFile.IsVendored = true
gotVendor = true
} else { } else {
gotVendor = vendored == "false" vendored := attrs["linguist-vendored"].Bool()
} diffFile.IsVendored = vendored.Value()
} gotVendor = vendored.Has()
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" { generated := attrs["linguist-generated"].Bool()
diffFile.IsGenerated = true diffFile.IsGenerated = generated.Value()
gotGenerated = true gotGenerated = generated.Has()
} else {
gotGenerated = generated == "false" diffFile.Language = cmp.Or(
} attrs["linguist-language"].String(),
} attrs["gitlab-language"].Prefix(),
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { )
diffFile.Language = language
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
diffFile.Language = language
}
}
} }
if !gotVendor { if !gotVendor {

View file

@ -273,31 +273,6 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
// TryGetContentLanguage tries to get the (linguist) language of the file content // TryGetContentLanguage tries to get the (linguist) language of the file content
func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) { func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) {
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(commitID) attribute, err := gitRepo.GitAttributeFirst(commitID, treePath, "linguist-language", "gitlab-language")
if err != nil { return attribute.Prefix(), err
return "", err
}
defer deleteTemporaryFile()
filename2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{
CachedOnly: true,
Attributes: []string{"linguist-language", "gitlab-language"},
Filenames: []string{treePath},
IndexFile: indexFilename,
WorkTree: worktree,
})
if err != nil {
return "", err
}
language := filename2attribute2info[treePath]["linguist-language"]
if language == "" || language == "unspecified" {
language = filename2attribute2info[treePath]["gitlab-language"]
}
if language == "unspecified" {
language = ""
}
return language, nil
} }

View file

@ -400,16 +400,12 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
var lfsMetaObject *git_model.LFSMetaObject var lfsMetaObject *git_model.LFSMetaObject
if setting.LFS.StartServer && hasOldBranch { if setting.LFS.StartServer && hasOldBranch {
// Check there is no way this can return multiple infos // Check there is no way this can return multiple infos
filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{ filterAttribute, err := t.gitRepo.GitAttributeFirst("", file.Options.treePath, "filter")
Attributes: []string{"filter"},
Filenames: []string{file.Options.treePath},
CachedOnly: true,
})
if err != nil { if err != nil {
return err return err
} }
if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" { if filterAttribute == "lfs" {
// OK so we are supposed to LFS this data! // OK so we are supposed to LFS this data!
pointer, err := lfs.GeneratePointer(treeObjectContentReader) pointer, err := lfs.GeneratePointer(treeObjectContentReader)
if err != nil { if err != nil {

View file

@ -105,24 +105,10 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
} }
} }
var filename2attribute2info map[string]map[string]string
if setting.LFS.StartServer {
filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
Attributes: []string{"filter"},
Filenames: names,
CachedOnly: true,
})
if err != nil {
return err
}
}
// Copy uploaded files into repository. // Copy uploaded files into repository.
for i := range infos { if err := copyUploadedLFSFilesIntoRepository(infos, t, opts.TreePath); err != nil {
if err := copyUploadedLFSFileIntoRepository(&infos[i], filename2attribute2info, t, opts.TreePath); err != nil {
return err return err
} }
}
// Now write the tree // Now write the tree
treeHash, err := t.WriteTree() treeHash, err := t.WriteTree()
@ -169,7 +155,44 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
return repo_model.DeleteUploads(ctx, uploads...) return repo_model.DeleteUploads(ctx, uploads...)
} }
func copyUploadedLFSFileIntoRepository(info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error { func copyUploadedLFSFilesIntoRepository(infos []uploadInfo, t *TemporaryUploadRepository, treePath string) error {
var storeInLFSFunc func(string) (bool, error)
if setting.LFS.StartServer {
checker, err := t.gitRepo.GitAttributeChecker("", "filter")
if err != nil {
return err
}
defer checker.Close()
storeInLFSFunc = func(name string) (bool, error) {
attrs, err := checker.CheckPath(name)
if err != nil {
return false, fmt.Errorf("could not CheckPath(%s): %w", name, err)
}
return attrs["filter"] == "lfs", nil
}
}
// Copy uploaded files into repository.
for i, info := range infos {
storeInLFS := false
if storeInLFSFunc != nil {
var err error
storeInLFS, err = storeInLFSFunc(info.upload.Name)
if err != nil {
return err
}
}
if err := copyUploadedLFSFileIntoRepository(&infos[i], storeInLFS, t, treePath); err != nil {
return err
}
}
return nil
}
func copyUploadedLFSFileIntoRepository(info *uploadInfo, storeInLFS bool, t *TemporaryUploadRepository, treePath string) error {
file, err := os.Open(info.upload.LocalPath()) file, err := os.Open(info.upload.LocalPath())
if err != nil { if err != nil {
return err return err
@ -177,7 +200,7 @@ func copyUploadedLFSFileIntoRepository(info *uploadInfo, filename2attribute2info
defer file.Close() defer file.Close()
var objectHash string var objectHash string
if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" { if storeInLFS {
// Handle LFS // Handle LFS
// FIXME: Inefficient! this should probably happen in models.Upload // FIXME: Inefficient! this should probably happen in models.Upload
pointer, err := lfs.GeneratePointer(file) pointer, err := lfs.GeneratePointer(file)