Add finalizers to ensure that repos are closed and blobreaders are closed (#19495) (#19496)

It may be prudent to add runtime finalizers to the git.Repository and
git.blobReader objects to absolutely ensure that these are both properly
cancelled, cleaned and closed out.

This commit is a backport of an extract from #19448

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2022-05-02 13:44:45 +01:00 committed by GitHub
parent 35a7db49b4
commit 88da50674f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 157 additions and 18 deletions

View file

@ -12,8 +12,11 @@ import (
"bytes" "bytes"
"io" "io"
"math" "math"
"runtime"
"sync"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
) )
// Blob represents a Git object. // Blob represents a Git object.
@ -54,11 +57,15 @@ func (b *Blob) DataAsync() (io.ReadCloser, error) {
return io.NopCloser(bytes.NewReader(bs)), err return io.NopCloser(bytes.NewReader(bs)), err
} }
return &blobReader{ br := &blobReader{
repo: b.repo,
rd: rd, rd: rd,
n: size, n: size,
cancel: cancel, cancel: cancel,
}, nil }
runtime.SetFinalizer(br, (*blobReader).finalizer)
return br, nil
} }
// Size returns the uncompressed size of the blob // Size returns the uncompressed size of the blob
@ -86,6 +93,10 @@ func (b *Blob) Size() int64 {
} }
type blobReader struct { type blobReader struct {
lock sync.Mutex
closed bool
repo *Repository
rd *bufio.Reader rd *bufio.Reader
n int64 n int64
cancel func() cancel func()
@ -104,27 +115,57 @@ func (b *blobReader) Read(p []byte) (n int, err error) {
} }
// Close implements io.Closer // Close implements io.Closer
func (b *blobReader) Close() error { func (b *blobReader) Close() (err error) {
b.lock.Lock()
defer b.lock.Unlock()
if b.closed {
return
}
return b.close()
}
func (b *blobReader) close() (err error) {
defer b.cancel() defer b.cancel()
b.closed = true
if b.n > 0 { if b.n > 0 {
var n int
for b.n > math.MaxInt32 { for b.n > math.MaxInt32 {
n, err := b.rd.Discard(math.MaxInt32) n, err = b.rd.Discard(math.MaxInt32)
b.n -= int64(n) b.n -= int64(n)
if err != nil { if err != nil {
return err return
} }
b.n -= math.MaxInt32 b.n -= math.MaxInt32
} }
n, err := b.rd.Discard(int(b.n)) n, err = b.rd.Discard(int(b.n))
b.n -= int64(n) b.n -= int64(n)
if err != nil { if err != nil {
return err return
} }
} }
if b.n == 0 { if b.n == 0 {
_, err := b.rd.Discard(1) _, err = b.rd.Discard(1)
b.n-- b.n--
return err return
} }
return nil return
}
func (b *blobReader) finalizer() error {
if b == nil {
return nil
}
b.lock.Lock()
defer b.lock.Unlock()
if b.closed {
return nil
}
pid := ""
if b.repo.Ctx != nil {
pid = " from PID: " + string(process.GetPID(b.repo.Ctx))
}
log.Error("Finalizer running on unclosed blobReader%s: %s%s", pid, b.repo.Path)
return b.close()
} }

View file

@ -12,8 +12,12 @@ import (
"context" "context"
"errors" "errors"
"path/filepath" "path/filepath"
"runtime"
"sync"
"code.gitea.io/gitea/modules/log"
gitealog "code.gitea.io/gitea/modules/log" gitealog "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"github.com/go-git/go-billy/v5/osfs" "github.com/go-git/go-billy/v5/osfs"
@ -28,6 +32,9 @@ type Repository struct {
tagCache *ObjectCache tagCache *ObjectCache
lock sync.Mutex
closed bool
gogitRepo *gogit.Repository gogitRepo *gogit.Repository
gogitStorage *filesystem.Storage gogitStorage *filesystem.Storage
gpgSettings *GPGSettings gpgSettings *GPGSettings
@ -63,23 +70,57 @@ func OpenRepositoryCtx(ctx context.Context, repoPath string) (*Repository, error
return nil, err return nil, err
} }
return &Repository{ repo := &Repository{
Path: repoPath, Path: repoPath,
gogitRepo: gogitRepo, gogitRepo: gogitRepo,
gogitStorage: storage, gogitStorage: storage,
tagCache: newObjectCache(), tagCache: newObjectCache(),
Ctx: ctx, Ctx: ctx,
}, nil }
runtime.SetFinalizer(repo, (*Repository).finalizer)
return repo, nil
} }
// Close this repository, in particular close the underlying gogitStorage if this is not nil // Close this repository, in particular close the underlying gogitStorage if this is not nil
func (repo *Repository) Close() { func (repo *Repository) Close() (err error) {
if repo == nil || repo.gogitStorage == nil { if repo == nil {
return return
} }
if err := repo.gogitStorage.Close(); err != nil { repo.lock.Lock()
defer repo.lock.Unlock()
return repo.close()
}
func (repo *Repository) close() (err error) {
repo.closed = true
if repo.gogitStorage == nil {
return
}
err = repo.gogitStorage.Close()
if err != nil {
gitealog.Error("Error closing storage: %v", err) gitealog.Error("Error closing storage: %v", err)
} }
return
}
func (repo *Repository) finalizer() error {
if repo == nil {
return nil
}
repo.lock.Lock()
defer repo.lock.Unlock()
if !repo.closed {
pid := ""
if repo.Ctx != nil {
pid = " from PID: " + string(process.GetPID(repo.Ctx))
}
log.Error("Finalizer running on unclosed repository%s: %s%s", pid, repo.Path)
}
// We still need to run the close fn as it may be possible to reopen the gogitrepo after close
return repo.close()
} }
// GoGitRepo gets the go-git repo representation // GoGitRepo gets the go-git repo representation

View file

@ -13,8 +13,11 @@ import (
"context" "context"
"errors" "errors"
"path/filepath" "path/filepath"
"runtime"
"sync"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
) )
// Repository represents a Git repository. // Repository represents a Git repository.
@ -25,6 +28,10 @@ type Repository struct {
gpgSettings *GPGSettings gpgSettings *GPGSettings
lock sync.Mutex
closed bool
batchCancel context.CancelFunc batchCancel context.CancelFunc
batchReader *bufio.Reader batchReader *bufio.Reader
batchWriter WriteCloserError batchWriter WriteCloserError
@ -64,29 +71,57 @@ func OpenRepositoryCtx(ctx context.Context, repoPath string) (*Repository, error
repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(ctx, repoPath) repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(ctx, repoPath)
repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(ctx, repo.Path) repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(ctx, repo.Path)
runtime.SetFinalizer(repo, (*Repository).finalizer)
return repo, nil return repo, nil
} }
// CatFileBatch obtains a CatFileBatch for this repository // CatFileBatch obtains a CatFileBatch for this repository
func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { func (repo *Repository) CatFileBatch(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
if repo.batchCancel == nil || repo.batchReader.Buffered() > 0 { repo.lock.Lock()
defer repo.lock.Unlock()
if repo.closed || repo.batchReader.Buffered() > 0 {
log.Debug("Opening temporary cat file batch for: %s", repo.Path) log.Debug("Opening temporary cat file batch for: %s", repo.Path)
return CatFileBatch(ctx, repo.Path) return CatFileBatch(ctx, repo.Path)
} }
if repo.batchCancel == nil {
repo.batchWriter, repo.batchReader, repo.batchCancel = CatFileBatch(ctx, repo.Path)
}
return repo.batchWriter, repo.batchReader, func() {} return repo.batchWriter, repo.batchReader, func() {}
} }
// CatFileBatchCheck obtains a CatFileBatchCheck for this repository // CatFileBatchCheck obtains a CatFileBatchCheck for this repository
func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) { func (repo *Repository) CatFileBatchCheck(ctx context.Context) (WriteCloserError, *bufio.Reader, func()) {
if repo.checkCancel == nil || repo.checkReader.Buffered() > 0 { repo.lock.Lock()
defer repo.lock.Unlock()
if repo.closed || repo.checkReader.Buffered() > 0 {
log.Debug("Opening temporary cat file batch-check: %s", repo.Path) log.Debug("Opening temporary cat file batch-check: %s", repo.Path)
return CatFileBatchCheck(ctx, repo.Path) return CatFileBatchCheck(ctx, repo.Path)
} }
if repo.checkCancel == nil {
repo.checkWriter, repo.checkReader, repo.checkCancel = CatFileBatchCheck(ctx, repo.Path)
}
return repo.checkWriter, repo.checkReader, func() {} return repo.checkWriter, repo.checkReader, func() {}
} }
// Close this repository, in particular close the underlying gogitStorage if this is not nil // Close this repository, in particular close the underlying gogitStorage if this is not nil
func (repo *Repository) Close() { func (repo *Repository) Close() (err error) {
if repo == nil {
return
}
repo.lock.Lock()
defer repo.lock.Unlock()
return repo.close()
}
func (repo *Repository) close() (err error) {
if repo == nil { if repo == nil {
return return
} }
@ -102,4 +137,26 @@ func (repo *Repository) Close() {
repo.checkReader = nil repo.checkReader = nil
repo.checkWriter = nil repo.checkWriter = nil
} }
repo.closed = true
return
}
func (repo *Repository) finalizer() (err error) {
if repo == nil {
return
}
repo.lock.Lock()
defer repo.lock.Unlock()
if repo.closed {
return nil
}
if repo.batchCancel != nil || repo.checkCancel != nil {
pid := ""
if repo.Ctx != nil {
pid = " from PID: " + string(process.GetPID(repo.Ctx))
}
log.Error("Finalizer running on unclosed repository%s: %s%s", pid, repo.Path)
}
return repo.close()
} }