Migrated Repository will show modifications when possible (#17191)

* Read patches to get history
This commit is contained in:
99rgosse 2021-12-23 09:32:29 +01:00 committed by GitHub
parent ba6efb105a
commit e0cf3d86c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 188 additions and 3 deletions

View file

@ -6,9 +6,14 @@
package git package git
import ( import (
"bufio"
"bytes" "bytes"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os"
"path/filepath"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
@ -188,6 +193,8 @@ func GetDiffShortStat(repoPath string, args ...string) (numFiles, totalAdditions
var shortStatFormat = regexp.MustCompile( var shortStatFormat = regexp.MustCompile(
`\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`) `\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`)
func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) { func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) {
if len(stdout) == 0 || stdout == "\n" { if len(stdout) == 0 || stdout == "\n" {
return 0, 0, 0, nil return 0, 0, 0, nil
@ -267,3 +274,57 @@ func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) err
} }
return err return err
} }
// ReadPullHead will fetch a pull ref if possible or return an error
func (repo *Repository) ReadPullHead(prID int64) (commitSHA string, err error) {
headPath := fmt.Sprintf("refs/pull/%d/head", prID)
fullHeadPath := filepath.Join(repo.Path, headPath)
loadHead, err := os.Open(fullHeadPath)
if err != nil {
return "", err
}
defer loadHead.Close()
// Read only the first line of the patch - usually it contains the first commit made in patch
scanner := bufio.NewScanner(loadHead)
scanner.Scan()
commitHead := scanner.Text()
if len(commitHead) != 40 {
return "", errors.New("head file doesn't contain valid commit ID")
}
return commitHead, nil
}
// ReadPatchCommit will check if a diff patch exists and return stats
func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) {
// Migrated repositories download patches to "pulls" location
patchFile := fmt.Sprintf("pulls/%d.patch", prID)
loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile))
if err != nil {
return "", err
}
defer loadPatch.Close()
// Read only the first line of the patch - usually it contains the first commit made in patch
scanner := bufio.NewScanner(loadPatch)
scanner.Scan()
// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text())
if len(commitSHAGroups) != 0 {
commitSHA = commitSHAGroups[1]
} else {
return "", errors.New("patch file doesn't contain valid commit ID")
}
return commitSHA, nil
}
// WritePullHead will populate a PR head retrieved from patch file
func (repo *Repository) WritePullHead(prID int64, commitSHA string) error {
headPath := fmt.Sprintf("refs/pull/%d", prID)
fullHeadPath := filepath.Join(repo.Path, headPath)
// Create missing directory just in case
if err := os.MkdirAll(fullHeadPath, os.ModePerm); err != nil {
return err
}
commitBytes := []byte(commitSHA)
pullPath := filepath.Join(fullHeadPath, "head")
return ioutil.WriteFile(pullPath, commitBytes, os.ModePerm)
}

View file

@ -8,6 +8,7 @@ import (
"bytes" "bytes"
"io" "io"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
@ -18,11 +19,11 @@ import (
func TestGetFormatPatch(t *testing.T) { func TestGetFormatPatch(t *testing.T) {
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
clonedPath, err := cloneRepo(bareRepo1Path, testReposDir, "repo1_TestGetFormatPatch") clonedPath, err := cloneRepo(bareRepo1Path, testReposDir, "repo1_TestGetFormatPatch")
assert.NoError(t, err)
defer util.RemoveAll(clonedPath) defer util.RemoveAll(clonedPath)
repo, err := OpenRepository(clonedPath)
assert.NoError(t, err) assert.NoError(t, err)
repo, err := OpenRepository(clonedPath)
defer repo.Close() defer repo.Close()
assert.NoError(t, err)
rd := &bytes.Buffer{} rd := &bytes.Buffer{}
err = repo.GetPatch("8d92fc95^", "8d92fc95", rd) err = repo.GetPatch("8d92fc95^", "8d92fc95", rd)
assert.NoError(t, err) assert.NoError(t, err)
@ -32,3 +33,49 @@ func TestGetFormatPatch(t *testing.T) {
assert.Regexp(t, "^From 8d92fc95", patch) assert.Regexp(t, "^From 8d92fc95", patch)
assert.Contains(t, patch, "Subject: [PATCH] Add file2.txt") assert.Contains(t, patch, "Subject: [PATCH] Add file2.txt")
} }
func TestReadPatch(t *testing.T) {
// Ensure we can read the patch files
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
repo, err := OpenRepository(bareRepo1Path)
defer repo.Close()
assert.NoError(t, err)
// This patch doesn't exist
noFile, err := repo.ReadPatchCommit(0)
assert.Error(t, err)
// This patch is an empty one (sometimes it's a 404)
noCommit, err := repo.ReadPatchCommit(1)
assert.Error(t, err)
// This patch is legit and should return a commit
oldCommit, err := repo.ReadPatchCommit(2)
assert.NoError(t, err)
assert.Empty(t, noFile)
assert.Empty(t, noCommit)
assert.Len(t, oldCommit, 40)
assert.True(t, oldCommit == "6e8e2a6f9efd71dbe6917816343ed8415ad696c3")
}
func TestReadWritePullHead(t *testing.T) {
// Ensure we can write SHA1 head corresponding to PR and open them
bareRepo1Path := filepath.Join(testReposDir, "repo1_bare")
repo, err := OpenRepository(bareRepo1Path)
assert.NoError(t, err)
defer repo.Close()
// Try to open non-existing Pull
_, err = repo.ReadPullHead(0)
assert.Error(t, err)
// Write a fake sha1 with only 40 zeros
newCommit := strings.Repeat("0", 40)
err = repo.WritePullHead(1, newCommit)
assert.NoError(t, err)
headFile := filepath.Join(repo.Path, "refs/pull/1/head")
// Remove file after the test
defer util.Remove(headFile)
assert.FileExists(t, headFile)
// Read the file created
headContents, err := repo.ReadPullHead(1)
assert.NoError(t, err)
assert.Len(t, string(headContents), 40)
assert.True(t, string(headContents) == newCommit)
}

View file

@ -0,0 +1,39 @@
From 6e8e2a6f9efd71dbe6917816343ed8415ad696c3 Mon Sep 17 00:00:00 2001
From: 99rgosse <renaud@mycompany.com>
Date: Fri, 26 Mar 2021 12:44:22 +0000
Subject: [PATCH] Update gitea_import_actions.py
---
gitea_import_actions.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/gitea_import_actions.py b/gitea_import_actions.py
index f0d72cd..7b31963 100644
--- a/gitea_import_actions.py
+++ b/gitea_import_actions.py
@@ -3,14 +3,14 @@
# git log --pretty=format:'%H,%at,%s' --date=default > /tmp/commit.log
# to get the commits logfile for a repository
-import mysql.connector as mariadb
+import psycopg2
# set the following variables to fit your need...
USERID = 1
REPOID = 1
BRANCH = "master"
-mydb = mariadb.connect(
+mydb = psycopg2.connect(
host="localhost",
user="user",
passwd="password",
@@ -31,4 +31,4 @@ with open("/tmp/commit.log") as f:
mydb.commit()
-print("actions inserted.")
\ No newline at end of file
+print("actions inserted.")
--
GitLab

View file

@ -320,8 +320,46 @@ func PrepareMergedViewPullInfo(ctx *context.Context, issue *models.Issue) *git.C
setMergeTarget(ctx, pull) setMergeTarget(ctx, pull)
ctx.Data["HasMerged"] = true ctx.Data["HasMerged"] = true
var baseCommit string
// Some migrated PR won't have any Base SHA and lose history, try to get one
if pull.MergeBase == "" {
var commitSHA, parentCommit string
// If there is a head or a patch file, and it is readable, grab info
commitSHA, err := ctx.Repo.GitRepo.ReadPullHead(pull.Index)
if err != nil {
// Head File does not exist, try the patch
commitSHA, err = ctx.Repo.GitRepo.ReadPatchCommit(pull.Index)
if err == nil {
// Recreate pull head in files for next time
if err := ctx.Repo.GitRepo.WritePullHead(pull.Index, commitSHA); err != nil {
log.Error("Could not write head file", err)
}
} else {
// There is no history available
log.Trace("No history file available for PR %d", pull.Index)
}
}
if commitSHA != "" {
// Get immediate parent of the first commit in the patch, grab history back
parentCommit, err = git.NewCommandContext(ctx, "rev-list", "-1", "--skip=1", commitSHA).RunInDir(ctx.Repo.GitRepo.Path)
if err == nil {
parentCommit = strings.TrimSpace(parentCommit)
}
// Special case on Git < 2.25 that doesn't fail on immediate empty history
if err != nil || parentCommit == "" {
log.Info("No known parent commit for PR %d, error: %v", pull.Index, err)
// bring at least partial history if it can work
parentCommit = commitSHA
}
}
baseCommit = parentCommit
} else {
// Keep an empty history or original commit
baseCommit = pull.MergeBase
}
compareInfo, err := ctx.Repo.GitRepo.GetCompareInfo(ctx.Repo.Repository.RepoPath(), compareInfo, err := ctx.Repo.GitRepo.GetCompareInfo(ctx.Repo.Repository.RepoPath(),
pull.MergeBase, pull.GetGitRefName(), true, false) baseCommit, pull.GetGitRefName(), true, false)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "unknown revision or path not in the working tree") { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "unknown revision or path not in the working tree") {
ctx.Data["IsPullRequestBroken"] = true ctx.Data["IsPullRequestBroken"] = true