forgejo/services/mirror/mirror_push.go

279 lines
7.6 KiB
Go
Raw Permalink Normal View History

// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package mirror
import (
"context"
"errors"
"fmt"
"io"
"regexp"
"strings"
"time"
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
var stripExitStatus = regexp.MustCompile(`exit status \d+ - `)
// AddPushMirrorRemote registers the push mirror remote.
func AddPushMirrorRemote(ctx context.Context, m *repo_model.PushMirror, addr string) error {
addRemoteAndConfig := func(addr, path string) error {
cmd := git.NewCommand(ctx, "remote", "add", "--mirror=push").AddDynamicArguments(m.RemoteName, addr)
if strings.Contains(addr, "://") && strings.Contains(addr, "@") {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, util.SanitizeCredentialURLs(addr), path))
} else {
cmd.SetDescription(fmt.Sprintf("remote add %s --mirror=push %s [repo_path: %s]", m.RemoteName, addr, path))
}
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
if _, _, err := git.NewCommand(ctx, "config", "--add").AddDynamicArguments("remote."+m.RemoteName+".push", "+refs/heads/*:refs/heads/*").RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 02:30:43 +00:00
if _, _, err := git.NewCommand(ctx, "config", "--add").AddDynamicArguments("remote."+m.RemoteName+".push", "+refs/tags/*:refs/tags/*").RunStdString(&git.RunOpts{Dir: path}); err != nil {
return err
}
return nil
}
if err := addRemoteAndConfig(addr, m.Repo.RepoPath()); err != nil {
return err
}
if m.Repo.HasWiki() {
wikiRemoteURL := repository.WikiRemoteURL(ctx, addr)
if len(wikiRemoteURL) > 0 {
if err := addRemoteAndConfig(wikiRemoteURL, m.Repo.WikiPath()); err != nil {
return err
}
}
}
return nil
}
// RemovePushMirrorRemote removes the push mirror remote.
func RemovePushMirrorRemote(ctx context.Context, m *repo_model.PushMirror) error {
cmd := git.NewCommand(ctx, "remote", "rm").AddDynamicArguments(m.RemoteName)
_ = m.GetRepository(ctx)
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: m.Repo.RepoPath()}); err != nil {
return err
}
if m.Repo.HasWiki() {
if _, _, err := cmd.RunStdString(&git.RunOpts{Dir: m.Repo.WikiPath()}); err != nil {
// The wiki remote may not exist
log.Warn("Wiki Remote[%d] could not be removed: %v", m.ID, err)
}
}
return nil
}
// SyncPushMirror starts the sync of the push mirror and schedules the next run.
func SyncPushMirror(ctx context.Context, mirrorID int64) bool {
log.Trace("SyncPushMirror [mirror: %d]", mirrorID)
defer func() {
err := recover()
if err == nil {
return
}
// There was a panic whilst syncPushMirror...
log.Error("PANIC whilst syncPushMirror[%d] Panic: %v\nStacktrace: %s", mirrorID, err, log.Stack(2))
}()
// TODO: Handle "!exist" better
m, exist, err := db.GetByID[repo_model.PushMirror](ctx, mirrorID)
if err != nil || !exist {
log.Error("GetPushMirrorByID [%d]: %v", mirrorID, err)
return false
}
_ = m.GetRepository(ctx)
m.LastError = ""
ctx, _, finished := process.GetManager().AddContext(ctx, fmt.Sprintf("Syncing PushMirror %s/%s to %s", m.Repo.OwnerName, m.Repo.Name, m.RemoteName))
defer finished()
log.Trace("SyncPushMirror [mirror: %d][repo: %-v]: Running Sync", m.ID, m.Repo)
err = runPushSync(ctx, m)
if err != nil {
log.Error("SyncPushMirror [mirror: %d][repo: %-v]: %v", m.ID, m.Repo, err)
m.LastError = stripExitStatus.ReplaceAllLiteralString(err.Error(), "")
}
m.LastUpdateUnix = timeutil.TimeStampNow()
if err := repo_model.UpdatePushMirror(ctx, m); err != nil {
log.Error("UpdatePushMirror [%d]: %v", m.ID, err)
return false
}
log.Trace("SyncPushMirror [mirror: %d][repo: %-v]: Finished", m.ID, m.Repo)
return err == nil
}
func runPushSync(ctx context.Context, m *repo_model.PushMirror) error {
timeout := time.Duration(setting.Git.Timeout.Mirror) * time.Second
performPush := func(repo *repo_model.Repository, isWiki bool) error {
path := repo.RepoPath()
if isWiki {
path = repo.WikiPath()
}
remoteURL, err := git.GetRemoteURL(ctx, path, m.RemoteName)
if err != nil {
log.Error("GetRemoteAddress(%s) Error %v", path, err)
return errors.New("Unexpected error")
}
if setting.LFS.StartServer {
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
var gitRepo *git.Repository
if isWiki {
gitRepo, err = gitrepo.OpenWikiRepository(ctx, repo)
} else {
gitRepo, err = gitrepo.OpenRepository(ctx, repo)
}
if err != nil {
log.Error("OpenRepository: %v", err)
return errors.New("Unexpected error")
}
defer gitRepo.Close()
endpoint := lfs.DetermineEndpoint(remoteURL.String(), "")
lfsClient := lfs.NewClient(endpoint, nil)
if err := pushAllLFSObjects(ctx, gitRepo, lfsClient); err != nil {
return util.SanitizeErrorCredentialURLs(err)
}
}
log.Trace("Pushing %s mirror[%d] remote %s", path, m.ID, m.RemoteName)
if err := git.Push(ctx, path, git.PushOptions{
Remote: m.RemoteName,
Force: true,
Mirror: true,
Timeout: timeout,
}); err != nil {
log.Error("Error pushing %s mirror[%d] remote %s: %v", path, m.ID, m.RemoteName, err)
return util.SanitizeErrorCredentialURLs(err)
}
return nil
}
err := performPush(m.Repo, false)
if err != nil {
return err
}
if m.Repo.HasWiki() {
_, err := git.GetRemoteAddress(ctx, m.Repo.WikiPath(), m.RemoteName)
if err == nil {
err := performPush(m.Repo, true)
if err != nil {
return err
}
} else {
log.Trace("Skipping wiki: No remote configured")
}
}
return nil
}
func pushAllLFSObjects(ctx context.Context, gitRepo *git.Repository, lfsClient lfs.Client) error {
contentStore := lfs.NewContentStore()
pointerChan := make(chan lfs.PointerBlob)
errChan := make(chan error, 1)
go lfs.SearchPointerBlobs(ctx, gitRepo, pointerChan, errChan)
uploadObjects := func(pointers []lfs.Pointer) error {
err := lfsClient.Upload(ctx, pointers, func(p lfs.Pointer, objectError error) (io.ReadCloser, error) {
if objectError != nil {
return nil, objectError
}
content, err := contentStore.Get(p)
if err != nil {
log.Error("Error reading LFS object %v: %v", p, err)
}
return content, err
})
if err != nil {
select {
case <-ctx.Done():
return nil
default:
}
}
return err
}
var batch []lfs.Pointer
for pointerBlob := range pointerChan {
exists, err := contentStore.Exists(pointerBlob.Pointer)
if err != nil {
log.Error("Error checking if LFS object %v exists: %v", pointerBlob.Pointer, err)
return err
}
if !exists {
log.Trace("Skipping missing LFS object %v", pointerBlob.Pointer)
continue
}
batch = append(batch, pointerBlob.Pointer)
if len(batch) >= lfsClient.BatchSize() {
if err := uploadObjects(batch); err != nil {
return err
}
batch = nil
}
}
if len(batch) > 0 {
if err := uploadObjects(batch); err != nil {
return err
}
}
err, has := <-errChan
if has {
log.Error("Error enumerating LFS objects for repository: %v", err)
return err
}
return nil
}
func syncPushMirrorWithSyncOnCommit(ctx context.Context, repoID int64) {
pushMirrors, err := repo_model.GetPushMirrorsSyncedOnCommit(ctx, repoID)
if err != nil {
log.Error("repo_model.GetPushMirrorsSyncedOnCommit failed: %v", err)
return
}
for _, mirror := range pushMirrors {
AddPushMirrorToQueue(mirror.ID)
}
}