Merge pull request '[v7.0/forgejo] Refactor LFS GC functions' (#3072) from bp-v7.0/forgejo-7ffa7f5 into v7.0/forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3072
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
This commit is contained in:
Gusted 2024-04-06 09:34:01 +00:00
commit d2a7905d9d
5 changed files with 132 additions and 56 deletions

View file

@ -0,0 +1,7 @@
-
id: 1000
oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82
size: 25
repository_id: 1
created_unix: 1712309123

View file

@ -337,32 +337,29 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error { func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
batchSize := setting.Database.IterateBufferSize batchSize := setting.Database.IterateBufferSize
sess := db.GetEngine(ctx) sess := db.GetEngine(ctx)
id := int64(0) var start int
type RepositoryCount struct { type RepositoryCount struct {
RepositoryID int64 RepositoryID int64
Count int64 Count int64
} }
for { for {
counts := make([]*RepositoryCount, 0, batchSize) counts := make([]*RepositoryCount, 0, batchSize)
sess.Select("repository_id, COUNT(id) AS count"). if err := sess.Select("repository_id, COUNT(id) AS count").
Table("lfs_meta_object"). Table("lfs_meta_object").
Where("repository_id > ?", id).
GroupBy("repository_id"). GroupBy("repository_id").
OrderBy("repository_id ASC") OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil {
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
return err return err
} }
if len(counts) == 0 { if len(counts) == 0 {
return nil return nil
} }
start += len(counts)
for _, count := range counts { for _, count := range counts {
if err := f(ctx, count.RepositoryID, count.Count); err != nil { if err := f(ctx, count.RepositoryID, count.Count); err != nil {
return err return err
} }
} }
id = counts[len(counts)-1].RepositoryID
} }
} }
@ -370,25 +367,17 @@ func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx cont
type IterateLFSMetaObjectsForRepoOptions struct { type IterateLFSMetaObjectsForRepoOptions struct {
OlderThan timeutil.TimeStamp OlderThan timeutil.TimeStamp
UpdatedLessRecentlyThan timeutil.TimeStamp UpdatedLessRecentlyThan timeutil.TimeStamp
OrderByUpdated bool
LoopFunctionAlwaysUpdates bool
} }
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo // IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error { func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
var start int
batchSize := setting.Database.IterateBufferSize batchSize := setting.Database.IterateBufferSize
engine := db.GetEngine(ctx) engine := db.GetEngine(ctx)
type CountLFSMetaObject struct {
Count int64
LFSMetaObject `xorm:"extends"`
}
id := int64(0) id := int64(0)
for { for {
beans := make([]*CountLFSMetaObject, 0, batchSize) beans := make([]*LFSMetaObject, 0, batchSize)
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*").
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
Where("`lfs_meta_object`.repository_id = ?", repoID) Where("`lfs_meta_object`.repository_id = ?", repoID)
if !opts.OlderThan.IsZero() { if !opts.OlderThan.IsZero() {
@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
if !opts.UpdatedLessRecentlyThan.IsZero() { if !opts.UpdatedLessRecentlyThan.IsZero() {
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan) sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
} }
sess.GroupBy("`lfs_meta_object`.id") sess.GroupBy("`lfs_meta_object`.id").
if opts.OrderByUpdated { And("`lfs_meta_object`.id > ?", id).
sess.OrderBy("`lfs_meta_object`.updated_unix ASC") OrderBy("`lfs_meta_object`.id ASC")
} else {
sess.And("`lfs_meta_object`.id > ?", id) if err := sess.Limit(batchSize, 0).Find(&beans); err != nil {
sess.OrderBy("`lfs_meta_object`.id ASC")
}
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
return err return err
} }
if len(beans) == 0 { if len(beans) == 0 {
return nil return nil
} }
if !opts.LoopFunctionAlwaysUpdates {
start += len(beans)
}
for _, bean := range beans { for _, bean := range beans {
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { if err := f(ctx, bean); err != nil {
return err return err
} }
} }

101
models/git/lfs_test.go Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"context"
"path/filepath"
"testing"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
)
func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) {
defer unittest.OverrideFixtures(
unittest.FixturesOptions{
Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"),
Base: setting.AppWorkPath,
Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"},
},
)()
assert.NoError(t, unittest.PrepareTestDatabase())
type repocount struct {
repoid int64
count int64
}
expected := []repocount{{1, 1}, {54, 4}}
t.Run("Normal batch size", func(t *testing.T) {
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
cases := []repocount{}
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
cases = append(cases, repocount{repoID, count})
return nil
})
assert.NoError(t, err)
assert.EqualValues(t, expected, cases)
})
t.Run("Low batch size", func(t *testing.T) {
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
cases := []repocount{}
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
cases = append(cases, repocount{repoID, count})
return nil
})
assert.NoError(t, err)
assert.EqualValues(t, expected, cases)
})
}
func TestIterateLFSMetaObjectsForRepo(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
expectedIDs := []int64{1, 2, 3, 4}
t.Run("Normal batch size", func(t *testing.T) {
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
actualIDs := []int64{}
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
actualIDs = append(actualIDs, lo.ID)
return nil
}, &IterateLFSMetaObjectsForRepoOptions{})
assert.NoError(t, err)
assert.EqualValues(t, expectedIDs, actualIDs)
})
t.Run("Low batch size", func(t *testing.T) {
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
actualIDs := []int64{}
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
actualIDs = append(actualIDs, lo.ID)
return nil
}, &IterateLFSMetaObjectsForRepoOptions{})
assert.NoError(t, err)
assert.EqualValues(t, expectedIDs, actualIDs)
t.Run("Batch handles updates", func(t *testing.T) {
actualIDs := []int64{}
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
actualIDs = append(actualIDs, lo.ID)
_, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID)
assert.NoError(t, err)
return nil
}, &IterateLFSMetaObjectsForRepoOptions{})
assert.NoError(t, err)
assert.EqualValues(t, expectedIDs, actualIDs)
})
})
}

View file

@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
OlderThan: time.Now().Add(-24 * time.Hour * 7), OlderThan: time.Now().Add(-24 * time.Hour * 7),
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC // We don't set the UpdatedLessRecentlyThan because we want to do a full GC
}); err != nil { }); err != nil {
logger.Error("Couldn't garabage collect LFS objects: %v", err)
return err return err
} }

View file

@ -5,7 +5,6 @@ package repository
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"time" "time"
@ -25,8 +24,6 @@ type GarbageCollectLFSMetaObjectsOptions struct {
AutoFix bool AutoFix bool
OlderThan time.Time OlderThan time.Time
UpdatedLessRecentlyThan time.Time UpdatedLessRecentlyThan time.Time
NumberToCheckPerRepo int64
ProportionToCheckPerRepo float64
} }
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories // GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
return err return err
} }
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
opts.NumberToCheckPerRepo = newMinimum
}
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts) return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
}) })
} }
@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
defer gitRepo.Close() defer gitRepo.Close()
store := lfs.NewContentStore() store := lfs.NewContentStore()
errStop := errors.New("STOPERR")
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName) objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error {
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
return errStop
}
total++ total++
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent())) pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
@ -125,14 +115,8 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
// unassociated LFS object is genuinely unassociated. // unassociated LFS object is genuinely unassociated.
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()), OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()), UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
OrderByUpdated: true,
LoopFunctionAlwaysUpdates: true,
}) })
if err != nil {
if err == errStop {
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
return nil
} else if err != nil {
return err return err
} }
return nil return nil