Fix one performance/correctness regression in #6478 found on Rails repository. (#6686)

* Fix flaw in the commit history lookup that caused unnecessary traversal
when the repository contains a lot of merge commits.

Also return the merge commit as the changed one if the file or
directory was changed as part of the merge, eg. through conflict
resolution.

Signed-off-by: Filip Navara <filip.navara@gmail.com>

* Perform history simplification. If a file is present on multiple parents
in a merge commit follow only the first parent.
This commit is contained in:
Filip Navara 2019-04-21 10:49:06 +02:00 committed by Lunny Xiao
parent 04ff3dd510
commit b83114f140

View file

@ -147,12 +147,6 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
break break
} }
current := cIn.(*commitAndPaths) current := cIn.(*commitAndPaths)
currentID := current.commit.ID()
if seen[currentID] {
continue
}
seen[currentID] = true
// Load the parent commits for the one we are currently examining // Load the parent commits for the one we are currently examining
numParents := current.commit.NumParents() numParents := current.commit.NumParents()
@ -166,8 +160,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
} }
// Examine the current commit and set of interesting paths // Examine the current commit and set of interesting paths
numOfParentsWithPath := make([]int, len(current.paths)) pathUnchanged := make([]bool, len(current.paths))
pathChanged := make([]bool, len(current.paths))
parentHashes := make([]map[string]plumbing.Hash, len(parents)) parentHashes := make([]map[string]plumbing.Hash, len(parents))
for j, parent := range parents { for j, parent := range parents {
parentHashes[j], err = getFileHashes(parent, treePath, current.paths) parentHashes[j], err = getFileHashes(parent, treePath, current.paths)
@ -176,42 +169,32 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
} }
for i, path := range current.paths { for i, path := range current.paths {
if parentHashes[j][path] != plumbing.ZeroHash { if parentHashes[j][path] == current.hashes[path] {
numOfParentsWithPath[i]++ pathUnchanged[i] = true
if parentHashes[j][path] != current.hashes[path] {
pathChanged[i] = true
}
} }
} }
} }
var remainingPaths []string var remainingPaths []string
for i, path := range current.paths { for i, path := range current.paths {
switch numOfParentsWithPath[i] { // The results could already contain some newer change for the same path,
case 0: // so don't override that and bail out on the file early.
// The path didn't exist in any parent, so it must have been created by
// this commit. The results could already contain some newer change from
// different path, so don't override that.
if result[path] == nil { if result[path] == nil {
result[path] = current.commit if pathUnchanged[i] {
} // The path existed with the same hash in at least one parent so it could
case 1: // not have been changed in this commit directly.
// The file is present on exactly one parent, so check if it was changed remainingPaths = append(remainingPaths, path)
// and save the revision if it did.
if pathChanged[i] {
if result[path] == nil {
result[path] = current.commit
}
} else { } else {
remainingPaths = append(remainingPaths, path) // There are few possible cases how can we get here:
// - The path didn't exist in any parent, so it must have been created by
// this commit.
// - The path did exist in the parent commit, but the hash of the file has
// changed.
// - We are looking at a merge commit and the hash of the file doesn't
// match any of the hashes being merged. This is more common for directories,
// but it can also happen if a file is changed through conflict resolution.
result[path] = current.commit
} }
default:
// The file is present on more than one of the parent paths, so this is
// a merge. We have to examine all the parent trees to find out where
// the change occurred. pathChanged[i] would tell us that the file was
// changed during the merge, but it wouldn't tell us the relevant commit
// that introduced it.
remainingPaths = append(remainingPaths, path)
} }
} }
@ -222,18 +205,30 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
if seen[parent.ID()] { if seen[parent.ID()] {
continue continue
} }
seen[parent.ID()] = true
// Combine remainingPath with paths available on the parent branch // Combine remainingPath with paths available on the parent branch
// and make union of them // and make union of them
var remainingPathsForParent []string var remainingPathsForParent []string
var newRemainingPaths []string
for _, path := range remainingPaths { for _, path := range remainingPaths {
if parentHashes[j][path] != plumbing.ZeroHash { if parentHashes[j][path] == current.hashes[path] {
remainingPathsForParent = append(remainingPathsForParent, path) remainingPathsForParent = append(remainingPathsForParent, path)
} else {
newRemainingPaths = append(newRemainingPaths, path)
} }
} }
if remainingPathsForParent != nil {
heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]}) heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
} }
if len(newRemainingPaths) == 0 {
break
} else {
remainingPaths = newRemainingPaths
}
}
} }
} }