From e0cf3d86c44fde99b49f12c7a1386cbf433a0207 Mon Sep 17 00:00:00 2001 From: 99rgosse <61579380+99rgosse@users.noreply.github.com> Date: Thu, 23 Dec 2021 09:32:29 +0100 Subject: [PATCH] Migrated Repository will show modifications when possible (#17191) * Read patches to get history --- modules/git/repo_compare.go | 61 ++++++++++++++++++++++++ modules/git/repo_compare_test.go | 51 +++++++++++++++++++- modules/git/tests/repos/repo1_bare/pulls/1.patch | 0 modules/git/tests/repos/repo1_bare/pulls/2.patch | 39 +++++++++++++++ routers/web/repo/pull.go | 40 +++++++++++++++- 5 files changed, 188 insertions(+), 3 deletions(-) create mode 100644 modules/git/tests/repos/repo1_bare/pulls/1.patch create mode 100644 modules/git/tests/repos/repo1_bare/pulls/2.patch diff --git a/modules/git/repo_compare.go b/modules/git/repo_compare.go index 992a70733b..3690b74532 100644 --- a/modules/git/repo_compare.go +++ b/modules/git/repo_compare.go @@ -6,9 +6,14 @@ package git import ( + "bufio" "bytes" + "errors" "fmt" "io" + "io/ioutil" + "os" + "path/filepath" "regexp" "strconv" "strings" @@ -188,6 +193,8 @@ func GetDiffShortStat(repoPath string, args ...string) (numFiles, totalAdditions var shortStatFormat = regexp.MustCompile( `\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`) +var patchCommits = regexp.MustCompile(`^From\s(\w+)\s`) + func parseDiffStat(stdout string) (numFiles, totalAdditions, totalDeletions int, err error) { if len(stdout) == 0 || stdout == "\n" { return 0, 0, 0, nil @@ -267,3 +274,57 @@ func (repo *Repository) GetDiffFromMergeBase(base, head string, w io.Writer) err } return err } + +// ReadPullHead will fetch a pull ref if possible or return an error +func (repo *Repository) ReadPullHead(prID int64) (commitSHA string, err error) { + headPath := fmt.Sprintf("refs/pull/%d/head", prID) + fullHeadPath := filepath.Join(repo.Path, headPath) + loadHead, err := os.Open(fullHeadPath) + if err != nil { + return "", err + } + defer loadHead.Close() + // Read only the first line of the patch - usually it contains the first commit made in patch + scanner := bufio.NewScanner(loadHead) + scanner.Scan() + commitHead := scanner.Text() + if len(commitHead) != 40 { + return "", errors.New("head file doesn't contain valid commit ID") + } + return commitHead, nil +} + +// ReadPatchCommit will check if a diff patch exists and return stats +func (repo *Repository) ReadPatchCommit(prID int64) (commitSHA string, err error) { + // Migrated repositories download patches to "pulls" location + patchFile := fmt.Sprintf("pulls/%d.patch", prID) + loadPatch, err := os.Open(filepath.Join(repo.Path, patchFile)) + if err != nil { + return "", err + } + defer loadPatch.Close() + // Read only the first line of the patch - usually it contains the first commit made in patch + scanner := bufio.NewScanner(loadPatch) + scanner.Scan() + // Parse the Patch stats, sometimes Migration returns a 404 for the patch file + commitSHAGroups := patchCommits.FindStringSubmatch(scanner.Text()) + if len(commitSHAGroups) != 0 { + commitSHA = commitSHAGroups[1] + } else { + return "", errors.New("patch file doesn't contain valid commit ID") + } + return commitSHA, nil +} + +// WritePullHead will populate a PR head retrieved from patch file +func (repo *Repository) WritePullHead(prID int64, commitSHA string) error { + headPath := fmt.Sprintf("refs/pull/%d", prID) + fullHeadPath := filepath.Join(repo.Path, headPath) + // Create missing directory just in case + if err := os.MkdirAll(fullHeadPath, os.ModePerm); err != nil { + return err + } + commitBytes := []byte(commitSHA) + pullPath := filepath.Join(fullHeadPath, "head") + return ioutil.WriteFile(pullPath, commitBytes, os.ModePerm) +} diff --git a/modules/git/repo_compare_test.go b/modules/git/repo_compare_test.go index ecd1aa4e6d..4790170d10 100644 --- a/modules/git/repo_compare_test.go +++ b/modules/git/repo_compare_test.go @@ -8,6 +8,7 @@ import ( "bytes" "io" "path/filepath" + "strings" "testing" "code.gitea.io/gitea/modules/util" @@ -18,11 +19,11 @@ import ( func TestGetFormatPatch(t *testing.T) { bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") clonedPath, err := cloneRepo(bareRepo1Path, testReposDir, "repo1_TestGetFormatPatch") - assert.NoError(t, err) defer util.RemoveAll(clonedPath) - repo, err := OpenRepository(clonedPath) assert.NoError(t, err) + repo, err := OpenRepository(clonedPath) defer repo.Close() + assert.NoError(t, err) rd := &bytes.Buffer{} err = repo.GetPatch("8d92fc95^", "8d92fc95", rd) assert.NoError(t, err) @@ -32,3 +33,49 @@ func TestGetFormatPatch(t *testing.T) { assert.Regexp(t, "^From 8d92fc95", patch) assert.Contains(t, patch, "Subject: [PATCH] Add file2.txt") } + +func TestReadPatch(t *testing.T) { + // Ensure we can read the patch files + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + repo, err := OpenRepository(bareRepo1Path) + defer repo.Close() + assert.NoError(t, err) + // This patch doesn't exist + noFile, err := repo.ReadPatchCommit(0) + assert.Error(t, err) + // This patch is an empty one (sometimes it's a 404) + noCommit, err := repo.ReadPatchCommit(1) + assert.Error(t, err) + // This patch is legit and should return a commit + oldCommit, err := repo.ReadPatchCommit(2) + assert.NoError(t, err) + + assert.Empty(t, noFile) + assert.Empty(t, noCommit) + assert.Len(t, oldCommit, 40) + assert.True(t, oldCommit == "6e8e2a6f9efd71dbe6917816343ed8415ad696c3") +} + +func TestReadWritePullHead(t *testing.T) { + // Ensure we can write SHA1 head corresponding to PR and open them + bareRepo1Path := filepath.Join(testReposDir, "repo1_bare") + repo, err := OpenRepository(bareRepo1Path) + assert.NoError(t, err) + defer repo.Close() + // Try to open non-existing Pull + _, err = repo.ReadPullHead(0) + assert.Error(t, err) + // Write a fake sha1 with only 40 zeros + newCommit := strings.Repeat("0", 40) + err = repo.WritePullHead(1, newCommit) + assert.NoError(t, err) + headFile := filepath.Join(repo.Path, "refs/pull/1/head") + // Remove file after the test + defer util.Remove(headFile) + assert.FileExists(t, headFile) + // Read the file created + headContents, err := repo.ReadPullHead(1) + assert.NoError(t, err) + assert.Len(t, string(headContents), 40) + assert.True(t, string(headContents) == newCommit) +} diff --git a/modules/git/tests/repos/repo1_bare/pulls/1.patch b/modules/git/tests/repos/repo1_bare/pulls/1.patch new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/git/tests/repos/repo1_bare/pulls/2.patch b/modules/git/tests/repos/repo1_bare/pulls/2.patch new file mode 100644 index 0000000000..caab605e9a --- /dev/null +++ b/modules/git/tests/repos/repo1_bare/pulls/2.patch @@ -0,0 +1,39 @@ +From 6e8e2a6f9efd71dbe6917816343ed8415ad696c3 Mon Sep 17 00:00:00 2001 +From: 99rgosse +Date: Fri, 26 Mar 2021 12:44:22 +0000 +Subject: [PATCH] Update gitea_import_actions.py + +--- + gitea_import_actions.py | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/gitea_import_actions.py b/gitea_import_actions.py +index f0d72cd..7b31963 100644 +--- a/gitea_import_actions.py ++++ b/gitea_import_actions.py +@@ -3,14 +3,14 @@ + # git log --pretty=format:'%H,%at,%s' --date=default > /tmp/commit.log + # to get the commits logfile for a repository + +-import mysql.connector as mariadb ++import psycopg2 + + # set the following variables to fit your need... + USERID = 1 + REPOID = 1 + BRANCH = "master" + +-mydb = mariadb.connect( ++mydb = psycopg2.connect( + host="localhost", + user="user", + passwd="password", +@@ -31,4 +31,4 @@ with open("/tmp/commit.log") as f: + + mydb.commit() + +-print("actions inserted.") +\ No newline at end of file ++print("actions inserted.") +-- +GitLab diff --git a/routers/web/repo/pull.go b/routers/web/repo/pull.go index bf80a7e5df..ba775d761b 100644 --- a/routers/web/repo/pull.go +++ b/routers/web/repo/pull.go @@ -320,8 +320,46 @@ func PrepareMergedViewPullInfo(ctx *context.Context, issue *models.Issue) *git.C setMergeTarget(ctx, pull) ctx.Data["HasMerged"] = true + var baseCommit string + // Some migrated PR won't have any Base SHA and lose history, try to get one + if pull.MergeBase == "" { + var commitSHA, parentCommit string + // If there is a head or a patch file, and it is readable, grab info + commitSHA, err := ctx.Repo.GitRepo.ReadPullHead(pull.Index) + if err != nil { + // Head File does not exist, try the patch + commitSHA, err = ctx.Repo.GitRepo.ReadPatchCommit(pull.Index) + if err == nil { + // Recreate pull head in files for next time + if err := ctx.Repo.GitRepo.WritePullHead(pull.Index, commitSHA); err != nil { + log.Error("Could not write head file", err) + } + } else { + // There is no history available + log.Trace("No history file available for PR %d", pull.Index) + } + } + if commitSHA != "" { + // Get immediate parent of the first commit in the patch, grab history back + parentCommit, err = git.NewCommandContext(ctx, "rev-list", "-1", "--skip=1", commitSHA).RunInDir(ctx.Repo.GitRepo.Path) + if err == nil { + parentCommit = strings.TrimSpace(parentCommit) + } + // Special case on Git < 2.25 that doesn't fail on immediate empty history + if err != nil || parentCommit == "" { + log.Info("No known parent commit for PR %d, error: %v", pull.Index, err) + // bring at least partial history if it can work + parentCommit = commitSHA + } + } + baseCommit = parentCommit + } else { + // Keep an empty history or original commit + baseCommit = pull.MergeBase + } + compareInfo, err := ctx.Repo.GitRepo.GetCompareInfo(ctx.Repo.Repository.RepoPath(), - pull.MergeBase, pull.GetGitRefName(), true, false) + baseCommit, pull.GetGitRefName(), true, false) if err != nil { if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "unknown revision or path not in the working tree") { ctx.Data["IsPullRequestBroken"] = true