Improve checkIfPRContentChanged (#22611)
The code for checking if a commit has caused a change in a PR is extremely inefficient and affects the head repository instead of using a temporary repository. This PR therefore makes several significant improvements: * A temporary repo like that used in merging. * The diff code is then significant improved to use a three-way diff instead of comparing diffs (possibly binary) line-by-line - in memory... Ref #22578 Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
e9cd18b557
commit
78e6b21c1a
2 changed files with 59 additions and 51 deletions
|
@ -4,6 +4,7 @@
|
||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,3 +18,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrNotEmpty is an error reported when there is a non-empty reader
|
||||||
|
var ErrNotEmpty = errors.New("not-empty")
|
||||||
|
|
||||||
|
// IsEmptyReader reads a reader and ensures it is empty
|
||||||
|
func IsEmptyReader(r io.Reader) (err error) {
|
||||||
|
var buf [1]byte
|
||||||
|
|
||||||
|
for {
|
||||||
|
n, err := r.Read(buf[:])
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n > 0 {
|
||||||
|
return ErrNotEmpty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -4,14 +4,12 @@
|
||||||
package pull
|
package pull
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"code.gitea.io/gitea/models"
|
"code.gitea.io/gitea/models"
|
||||||
"code.gitea.io/gitea/models/db"
|
"code.gitea.io/gitea/models/db"
|
||||||
|
@ -29,6 +27,7 @@ import (
|
||||||
repo_module "code.gitea.io/gitea/modules/repository"
|
repo_module "code.gitea.io/gitea/modules/repository"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/sync"
|
"code.gitea.io/gitea/modules/sync"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
issue_service "code.gitea.io/gitea/services/issue"
|
issue_service "code.gitea.io/gitea/services/issue"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
|
||||||
// checkIfPRContentChanged checks if diff to target branch has changed by push
|
// checkIfPRContentChanged checks if diff to target branch has changed by push
|
||||||
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
|
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
|
||||||
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
|
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
|
||||||
if err = pr.LoadHeadRepo(ctx); err != nil {
|
tmpBasePath, err := createTemporaryRepo(ctx, pr)
|
||||||
return false, fmt.Errorf("LoadHeadRepo: %w", err)
|
if err != nil {
|
||||||
} else if pr.HeadRepo == nil {
|
log.Error("CreateTemporaryRepo: %v", err)
|
||||||
// corrupt data assumed changed
|
return false, err
|
||||||
return true, nil
|
|
||||||
}
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
|
||||||
|
log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if err = pr.LoadBaseRepo(ctx); err != nil {
|
tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
|
||||||
return false, fmt.Errorf("LoadBaseRepo: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("OpenRepository: %w", err)
|
return false, fmt.Errorf("OpenRepository: %w", err)
|
||||||
}
|
}
|
||||||
defer headGitRepo.Close()
|
defer tmpRepo.Close()
|
||||||
|
|
||||||
// Add a temporary remote.
|
// Find the merge-base
|
||||||
tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
|
_, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
|
||||||
if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
|
|
||||||
return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
|
|
||||||
log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
// To synchronize repo and get a base ref
|
|
||||||
_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("GetMergeBase: %w", err)
|
return false, fmt.Errorf("GetMergeBase: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
diffBefore := &bytes.Buffer{}
|
cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
|
||||||
diffAfter := &bytes.Buffer{}
|
stdoutReader, stdoutWriter, err := os.Pipe()
|
||||||
if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
|
if err != nil {
|
||||||
// If old commit not found, assume changed.
|
return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
|
||||||
log.Debug("GetDiffFromMergeBase: %v", err)
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
|
|
||||||
// New commit should be found
|
|
||||||
return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
diffBeforeLines := bufio.NewScanner(diffBefore)
|
if err := cmd.Run(&git.RunOpts{
|
||||||
diffAfterLines := bufio.NewScanner(diffAfter)
|
Dir: tmpBasePath,
|
||||||
|
Stdout: stdoutWriter,
|
||||||
for diffBeforeLines.Scan() && diffAfterLines.Scan() {
|
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
|
||||||
if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
|
_ = stdoutWriter.Close()
|
||||||
// file hashes can change without the diff changing
|
defer func() {
|
||||||
continue
|
_ = stdoutReader.Close()
|
||||||
} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
|
}()
|
||||||
// the location of the difference may change
|
return util.IsEmptyReader(stdoutReader)
|
||||||
continue
|
},
|
||||||
} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
|
}); err != nil {
|
||||||
|
if err == util.ErrNotEmpty {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if diffBeforeLines.Scan() || diffAfterLines.Scan() {
|
log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
|
||||||
// Diffs not of equal length
|
newCommitID, oldCommitID, base,
|
||||||
return true, nil
|
pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
|
||||||
|
err)
|
||||||
|
|
||||||
|
return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, nil
|
return false, nil
|
||||||
|
|
Loading…
Reference in a new issue