From c772934ff623b3a76efbe306f597695330a71287 Mon Sep 17 00:00:00 2001 From: zeripath Date: Tue, 15 Nov 2022 08:08:59 +0000 Subject: [PATCH] Adjust gitea doctor --run storages to check all storage types (#21785) The doctor check `storages` currently only checks the attachment storage. This PR adds some basic garbage collection functionality for the other types of storage. Signed-off-by: Andrew Thornton Co-authored-by: Lunny Xiao --- models/git/lfs.go | 6 +- models/packages/package_blob.go | 7 + models/repo/archiver.go | 36 +++++ models/repo/attachment.go | 6 +- models/repo/avatar.go | 7 + models/user/avatar.go | 7 + modules/doctor/storage.go | 232 ++++++++++++++++++++++++++---- modules/git/repo_archive.go | 12 ++ modules/packages/content_store.go | 12 ++ routers/web/repo/lfs.go | 2 +- 10 files changed, 296 insertions(+), 31 deletions(-) diff --git a/models/git/lfs.go b/models/git/lfs.go index 74721dabb1..87e07d7a5e 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -235,9 +235,9 @@ func LFSObjectAccessible(user *user_model.User, oid string) (bool, error) { return count > 0, err } -// LFSObjectIsAssociated checks if a provided Oid is associated -func LFSObjectIsAssociated(oid string) (bool, error) { - return db.GetEngine(db.DefaultContext).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}) +// ExistsLFSObject checks if a provided Oid exists within the DB +func ExistsLFSObject(ctx context.Context, oid string) (bool, error) { + return db.GetEngine(ctx).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}) } // LFSAutoAssociate auto associates accessible LFSMetaObjects diff --git a/models/packages/package_blob.go b/models/packages/package_blob.go index 8c701d4285..fcb71a96ec 100644 --- a/models/packages/package_blob.go +++ b/models/packages/package_blob.go @@ -62,6 +62,13 @@ func GetBlobByID(ctx context.Context, blobID int64) (*PackageBlob, error) { return pb, nil } +// ExistPackageBlobWithSHA returns if a package blob exists with the provided sha +func ExistPackageBlobWithSHA(ctx context.Context, blobSha256 string) (bool, error) { + return db.GetEngine(ctx).Exist(&PackageBlob{ + HashSHA256: blobSha256, + }) +} + // FindExpiredUnreferencedBlobs gets all blobs without associated files older than the specific duration func FindExpiredUnreferencedBlobs(ctx context.Context, olderThan time.Duration) ([]*PackageBlob, error) { pbs := make([]*PackageBlob, 0, 10) diff --git a/models/repo/archiver.go b/models/repo/archiver.go index 003911943f..84358ce0dc 100644 --- a/models/repo/archiver.go +++ b/models/repo/archiver.go @@ -7,11 +7,14 @@ package repo import ( "context" "fmt" + "strconv" + "strings" "time" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" "xorm.io/builder" ) @@ -44,6 +47,28 @@ func (archiver *RepoArchiver) RelativePath() string { return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()) } +// repoArchiverForRelativePath takes a relativePath created from (archiver *RepoArchiver) RelativePath() and creates a shell repoArchiver struct representing it +func repoArchiverForRelativePath(relativePath string) (*RepoArchiver, error) { + parts := strings.SplitN(relativePath, "/", 3) + if len(parts) != 3 { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + repoID, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + nameExts := strings.SplitN(parts[2], ".", 2) + if len(nameExts) != 2 { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + + return &RepoArchiver{ + RepoID: repoID, + CommitID: parts[1] + nameExts[0], + Type: git.ToArchiveType(nameExts[1]), + }, nil +} + var delRepoArchiver = new(RepoArchiver) // DeleteRepoArchiver delete archiver @@ -65,6 +90,17 @@ func GetRepoArchiver(ctx context.Context, repoID int64, tp git.ArchiveType, comm return nil, nil } +// ExistsRepoArchiverWithStoragePath checks if there is a RepoArchiver for a given storage path +func ExistsRepoArchiverWithStoragePath(ctx context.Context, storagePath string) (bool, error) { + // We need to invert the path provided func (archiver *RepoArchiver) RelativePath() above + archiver, err := repoArchiverForRelativePath(storagePath) + if err != nil { + return false, err + } + + return db.GetEngine(ctx).Exist(archiver) +} + // AddRepoArchiver adds an archiver func AddRepoArchiver(ctx context.Context, archiver *RepoArchiver) error { _, err := db.GetEngine(ctx).Insert(archiver) diff --git a/models/repo/attachment.go b/models/repo/attachment.go index 180d7730ba..df7528df09 100644 --- a/models/repo/attachment.go +++ b/models/repo/attachment.go @@ -122,9 +122,9 @@ func GetAttachmentsByUUIDs(ctx context.Context, uuids []string) ([]*Attachment, return attachments, db.GetEngine(ctx).In("uuid", uuids).Find(&attachments) } -// ExistAttachmentsByUUID returns true if attachment is exist by given UUID -func ExistAttachmentsByUUID(uuid string) (bool, error) { - return db.GetEngine(db.DefaultContext).Where("`uuid`=?", uuid).Exist(new(Attachment)) +// ExistAttachmentsByUUID returns true if attachment exists with the given UUID +func ExistAttachmentsByUUID(ctx context.Context, uuid string) (bool, error) { + return db.GetEngine(ctx).Where("`uuid`=?", uuid).Exist(new(Attachment)) } // GetAttachmentsByIssueID returns all attachments of an issue. diff --git a/models/repo/avatar.go b/models/repo/avatar.go index 1bc37598fe..84b9f5ac21 100644 --- a/models/repo/avatar.go +++ b/models/repo/avatar.go @@ -24,6 +24,13 @@ func (repo *Repository) CustomAvatarRelativePath() string { return repo.Avatar } +// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar +func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) { + // See func (repo *Repository) CustomAvatarRelativePath() + // repo.Avatar is used directly as the storage path - therefore we can check for existence directly using the path + return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(Repository)) +} + // RelAvatarLink returns a relative link to the repository's avatar. func (repo *Repository) RelAvatarLink() string { return repo.relAvatarLink(db.DefaultContext) diff --git a/models/user/avatar.go b/models/user/avatar.go index 102206f3a2..f523766746 100644 --- a/models/user/avatar.go +++ b/models/user/avatar.go @@ -111,3 +111,10 @@ func (u *User) IsUploadAvatarChanged(data []byte) bool { avatarID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%d-%x", u.ID, md5.Sum(data))))) return u.Avatar != avatarID } + +// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar +func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) { + // See func (u *User) CustomAvatarRelativePath() + // u.Avatar is used directly as the storage path - therefore we can check for existence directly using the path + return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(User)) +} diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go index dafd989cf0..8ae9168ea6 100644 --- a/modules/doctor/storage.go +++ b/modules/doctor/storage.go @@ -6,71 +6,255 @@ package doctor import ( "context" + "errors" + "io/fs" + "strings" - repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/log" + packages_module "code.gitea.io/gitea/modules/packages" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) -func checkAttachmentStorageFiles(logger log.Logger, autofix bool) error { - var total, garbageNum int - var deletePaths []string - if err := storage.Attachments.IterateObjects(func(p string, obj storage.Object) error { +type commonStorageCheckOptions struct { + storer storage.ObjectStorage + isOrphaned func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) + name string +} + +func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error { + totalCount, orphanedCount := 0, 0 + totalSize, orphanedSize := int64(0), int64(0) + + var pathsToDelete []string + if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error { defer obj.Close() - total++ + totalCount++ stat, err := obj.Stat() if err != nil { return err } - exist, err := repo_model.ExistAttachmentsByUUID(stat.Name()) + totalSize += stat.Size() + + orphaned, err := opts.isOrphaned(p, obj, stat) if err != nil { return err } - if !exist { - garbageNum++ + if orphaned { + orphanedCount++ + orphanedSize += stat.Size() if autofix { - deletePaths = append(deletePaths, p) + pathsToDelete = append(pathsToDelete, p) } } return nil }); err != nil { - logger.Error("storage.Attachments.IterateObjects failed: %v", err) + logger.Error("Error whilst iterating %s storage: %v", opts.name, err) return err } - if garbageNum > 0 { + if orphanedCount > 0 { if autofix { var deletedNum int - for _, p := range deletePaths { - if err := storage.Attachments.Delete(p); err != nil { - log.Error("Delete attachment %s failed: %v", p, err) + for _, p := range pathsToDelete { + if err := opts.storer.Delete(p); err != nil { + log.Error("Error whilst deleting %s from %s storage: %v", p, opts.name, err) } else { deletedNum++ } } - logger.Info("%d missed information attachment detected, %d deleted.", garbageNum, deletedNum) + logger.Info("Deleted %d/%d orphaned %s(s)", deletedNum, orphanedCount, opts.name) } else { - logger.Warn("Checked %d attachment, %d missed information.", total, garbageNum) + logger.Warn("Found %d/%d (%s/%s) orphaned %s(s)", orphanedCount, totalCount, base.FileSize(orphanedSize), base.FileSize(totalSize), opts.name) } + } else { + logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name) } return nil } -func checkStorageFiles(ctx context.Context, logger log.Logger, autofix bool) error { - if err := storage.Init(); err != nil { - logger.Error("storage.Init failed: %v", err) - return err +type checkStorageOptions struct { + All bool + Attachments bool + LFS bool + Avatars bool + RepoAvatars bool + RepoArchives bool + Packages bool +} + +// checkStorage will return a doctor check function to check the requested storage types for "orphaned" stored object/files and optionally delete them +func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger log.Logger, autofix bool) error { + return func(ctx context.Context, logger log.Logger, autofix bool) error { + if err := storage.Init(); err != nil { + logger.Error("storage.Init failed: %v", err) + return err + } + + if opts.Attachments || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Attachments, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistAttachmentsByUUID(ctx, stat.Name()) + return !exists, err + }, + name: "attachment", + }); err != nil { + return err + } + } + + if opts.LFS || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.LFS, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + // The oid of an LFS stored object is the name but with all the path.Separators removed + oid := strings.ReplaceAll(path, "/", "") + exists, err := git.ExistsLFSObject(ctx, oid) + return !exists, err + }, + name: "LFS file", + }); err != nil { + return err + } + } + + if opts.Avatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Avatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := user.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err + }, + name: "avatar", + }); err != nil { + return err + } + } + + if opts.RepoAvatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err + }, + name: "repo avatar", + }); err != nil { + return err + } + } + + if opts.RepoArchives || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path) + if err == nil || errors.Is(err, util.ErrInvalidArgument) { + // invalid arguments mean that the object is not a valid repo archiver and it should be removed + return !exists, nil + } + return !exists, err + }, + name: "repo archive", + }); err != nil { + return err + } + } + + if opts.Packages || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Packages, + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + key, err := packages_module.RelativePathToKey(path) + if err != nil { + // If there is an error here then the relative path does not match a valid package + // Therefore it is orphaned by default + return true, nil + } + + exists, err := packages.ExistPackageBlobWithSHA(ctx, string(key)) + + return !exists, err + }, + name: "package blob", + }); err != nil { + return err + } + } + + return nil } - return checkAttachmentStorageFiles(logger, autofix) } func init() { Register(&Check{ - Title: "Check if there is garbage storage files", + Title: "Check if there are orphaned storage files", Name: "storages", IsDefault: false, - Run: checkStorageFiles, + Run: checkStorage(&checkStorageOptions{All: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned attachments in storage", + Name: "storage-attachments", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Attachments: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned lfs files in storage", + Name: "storage-lfs", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{LFS: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned avatars in storage", + Name: "storage-avatars", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned archives in storage", + Name: "storage-archives", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{RepoArchives: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are orphaned package blobs in storage", + Name: "storage-packages", + IsDefault: false, + Run: checkStorage(&checkStorageOptions{Packages: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, diff --git a/modules/git/repo_archive.go b/modules/git/repo_archive.go index a0cbfba5d9..13be2004ca 100644 --- a/modules/git/repo_archive.go +++ b/modules/git/repo_archive.go @@ -38,6 +38,18 @@ func (a ArchiveType) String() string { return "unknown" } +func ToArchiveType(s string) ArchiveType { + switch s { + case "zip": + return ZIP + case "tar.gz": + return TARGZ + case "bundle": + return BUNDLE + } + return 0 +} + // CreateArchive create archive content to the target path func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error { if format.String() == "unknown" { diff --git a/modules/packages/content_store.go b/modules/packages/content_store.go index a3a5d1a666..be416ac269 100644 --- a/modules/packages/content_store.go +++ b/modules/packages/content_store.go @@ -7,8 +7,10 @@ package packages import ( "io" "path" + "strings" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) // BlobHash256Key is the key to address a blob content @@ -45,3 +47,13 @@ func (s *ContentStore) Delete(key BlobHash256Key) error { func KeyToRelativePath(key BlobHash256Key) string { return path.Join(string(key)[0:2], string(key)[2:4], string(key)) } + +// RelativePathToKey converts a relative path aa/bb/aabb000000... to the sha256 key aabb000000... +func RelativePathToKey(relativePath string) (BlobHash256Key, error) { + parts := strings.SplitN(relativePath, "/", 3) + if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] { + return "", util.ErrInvalidArgument + } + + return BlobHash256Key(parts[2]), nil +} diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 67cb6837a5..9bf4307bfe 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -478,7 +478,7 @@ func LFSPointerFiles(ctx *context.Context) { return err } if !result.Associatable { - associated, err := git_model.LFSObjectIsAssociated(pointerBlob.Oid) + associated, err := git_model.ExistsLFSObject(ctx, pointerBlob.Oid) if err != nil { return err }