Use batch insert on migrating repository to make the process faster (#7050)

* Use batch insert on migrating repository to make the process faster

* fix lint

* fix tests

* fix comments
This commit is contained in:
Lunny Xiao 2019-06-29 21:38:22 +08:00 committed by GitHub
parent e463bdaf8d
commit 462284e2f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 343 additions and 242 deletions

View file

@ -6,38 +6,58 @@ package models
import "github.com/go-xorm/xorm"
// InsertIssue insert one issue to database
func InsertIssue(issue *Issue, labelIDs []int64) error {
// InsertMilestones creates milestones of repository.
func InsertMilestones(ms ...*Milestone) (err error) {
if len(ms) == 0 {
return nil
}
sess := x.NewSession()
if err := sess.Begin(); err != nil {
defer sess.Close()
if err = sess.Begin(); err != nil {
return err
}
if err := insertIssue(sess, issue, labelIDs); err != nil {
// to return the id, so we should not use batch insert
for _, m := range ms {
if _, err = sess.NoAutoTime().Insert(m); err != nil {
return err
}
}
if _, err = sess.Exec("UPDATE `repository` SET num_milestones = num_milestones + ? WHERE id = ?", len(ms), ms[0].RepoID); err != nil {
return err
}
return sess.Commit()
}
func insertIssue(sess *xorm.Session, issue *Issue, labelIDs []int64) error {
if issue.MilestoneID > 0 {
sess.Incr("num_issues")
if issue.IsClosed {
sess.Incr("num_closed_issues")
// InsertIssues insert issues to database
func InsertIssues(issues ...*Issue) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
}
if _, err := sess.ID(issue.MilestoneID).NoAutoTime().Update(new(Milestone)); err != nil {
for _, issue := range issues {
if err := insertIssue(sess, issue); err != nil {
return err
}
}
return sess.Commit()
}
func insertIssue(sess *xorm.Session, issue *Issue) error {
if _, err := sess.NoAutoTime().Insert(issue); err != nil {
return err
}
var issueLabels = make([]IssueLabel, 0, len(labelIDs))
for _, labelID := range labelIDs {
var issueLabels = make([]IssueLabel, 0, len(issue.Labels))
var labelIDs = make([]int64, 0, len(issue.Labels))
for _, label := range issue.Labels {
issueLabels = append(issueLabels, IssueLabel{
IssueID: issue.ID,
LabelID: labelID,
LabelID: label.ID,
})
labelIDs = append(labelIDs, label.ID)
}
if _, err := sess.Insert(issueLabels); err != nil {
return err
@ -61,12 +81,20 @@ func insertIssue(sess *xorm.Session, issue *Issue, labelIDs []int64) error {
if issue.IsClosed {
sess.Incr("num_closed_issues")
}
if _, err := sess.In("id", labelIDs).Update(new(Label)); err != nil {
if _, err := sess.In("id", labelIDs).NoAutoTime().Update(new(Label)); err != nil {
return err
}
if issue.MilestoneID > 0 {
if _, err := sess.ID(issue.MilestoneID).SetExpr("completeness", "num_closed_issues * 100 / num_issues").Update(new(Milestone)); err != nil {
sess.Incr("num_issues")
if issue.IsClosed {
sess.Incr("num_closed_issues")
}
if _, err := sess.ID(issue.MilestoneID).
SetExpr("completeness", "num_closed_issues * 100 / num_issues").
NoAutoTime().
Update(new(Milestone)); err != nil {
return err
}
}
@ -74,64 +102,64 @@ func insertIssue(sess *xorm.Session, issue *Issue, labelIDs []int64) error {
return nil
}
// InsertComment inserted a comment
func InsertComment(comment *Comment) error {
// InsertIssueComments inserts many comments of issues.
func InsertIssueComments(comments []*Comment) error {
if len(comments) == 0 {
return nil
}
var issueIDs = make(map[int64]bool)
for _, comment := range comments {
issueIDs[comment.IssueID] = true
}
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
if _, err := sess.NoAutoTime().Insert(comment); err != nil {
if _, err := sess.NoAutoTime().Insert(comments); err != nil {
return err
}
if _, err := sess.ID(comment.IssueID).Incr("num_comments").Update(new(Issue)); err != nil {
for issueID := range issueIDs {
if _, err := sess.Exec("UPDATE issue set num_comments = (SELECT count(*) FROM comment WHERE issue_id = ?) WHERE id = ?", issueID, issueID); err != nil {
return err
}
}
return sess.Commit()
}
// InsertPullRequest inserted a pull request
func InsertPullRequest(pr *PullRequest, labelIDs []int64) error {
// InsertPullRequests inserted pull requests
func InsertPullRequests(prs ...*PullRequest) error {
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
if err := insertIssue(sess, pr.Issue, labelIDs); err != nil {
for _, pr := range prs {
if err := insertIssue(sess, pr.Issue); err != nil {
return err
}
pr.IssueID = pr.Issue.ID
if _, err := sess.NoAutoTime().Insert(pr); err != nil {
return err
}
}
return sess.Commit()
}
// MigrateRelease migrates release
func MigrateRelease(rel *Release) error {
// InsertReleases migrates release
func InsertReleases(rels ...*Release) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
}
var oriRel = Release{
RepoID: rel.RepoID,
TagName: rel.TagName,
}
exist, err := sess.Get(&oriRel)
if err != nil {
return err
}
if !exist {
for _, rel := range rels {
if _, err := sess.NoAutoTime().Insert(rel); err != nil {
return err
}
} else {
rel.ID = oriRel.ID
if _, err := sess.ID(rel.ID).Cols("target, title, note, is_tag, num_commits").Update(rel); err != nil {
return err
}
}
for i := 0; i < len(rel.Attachments); i++ {
rel.Attachments[i].ReleaseID = rel.ID
@ -140,6 +168,7 @@ func MigrateRelease(rel *Release) error {
if _, err := sess.NoAutoTime().Insert(rel.Attachments); err != nil {
return err
}
}
return sess.Commit()
}

View file

@ -9,6 +9,7 @@ import "time"
// Comment is a standard comment information
type Comment struct {
IssueIndex int64
PosterName string
PosterEmail string
Created time.Time

View file

@ -5,14 +5,14 @@
package base
// Uploader uploads all the informations
// Uploader uploads all the informations of one repository
type Uploader interface {
CreateRepo(repo *Repository, includeWiki bool) error
CreateMilestone(milestone *Milestone) error
CreateRelease(release *Release) error
CreateLabel(label *Label) error
CreateIssue(issue *Issue) error
CreateComment(issueNumber int64, comment *Comment) error
CreatePullRequest(pr *PullRequest) error
CreateMilestones(milestones ...*Milestone) error
CreateReleases(releases ...*Release) error
CreateLabels(labels ...*Label) error
CreateIssues(issues ...*Issue) error
CreateComments(comments ...*Comment) error
CreatePullRequests(prs ...*PullRequest) error
Rollback() error
}

View file

@ -76,8 +76,10 @@ func (g *GiteaLocalUploader) CreateRepo(repo *base.Repository, includeWiki bool)
return err
}
// CreateMilestone creates milestone
func (g *GiteaLocalUploader) CreateMilestone(milestone *base.Milestone) error {
// CreateMilestones creates milestones
func (g *GiteaLocalUploader) CreateMilestones(milestones ...*base.Milestone) error {
var mss = make([]*models.Milestone, 0, len(milestones))
for _, milestone := range milestones {
var deadline util.TimeStamp
if milestone.Deadline != nil {
deadline = util.TimeStamp(milestone.Deadline.Unix())
@ -95,33 +97,46 @@ func (g *GiteaLocalUploader) CreateMilestone(milestone *base.Milestone) error {
if ms.IsClosed && milestone.Closed != nil {
ms.ClosedDateUnix = util.TimeStamp(milestone.Closed.Unix())
}
err := models.NewMilestone(&ms)
mss = append(mss, &ms)
}
err := models.InsertMilestones(mss...)
if err != nil {
return err
}
for _, ms := range mss {
g.milestones.Store(ms.Name, ms.ID)
}
return nil
}
// CreateLabel creates label
func (g *GiteaLocalUploader) CreateLabel(label *base.Label) error {
var lb = models.Label{
// CreateLabels creates labels
func (g *GiteaLocalUploader) CreateLabels(labels ...*base.Label) error {
var lbs = make([]*models.Label, 0, len(labels))
for _, label := range labels {
lbs = append(lbs, &models.Label{
RepoID: g.repo.ID,
Name: label.Name,
Description: label.Description,
Color: fmt.Sprintf("#%s", label.Color),
})
}
err := models.NewLabel(&lb)
err := models.NewLabels(lbs...)
if err != nil {
return err
}
g.labels.Store(lb.Name, lb.ID)
for _, lb := range lbs {
g.labels.Store(lb.Name, lb)
}
return nil
}
// CreateRelease creates release
func (g *GiteaLocalUploader) CreateRelease(release *base.Release) error {
// CreateReleases creates releases
func (g *GiteaLocalUploader) CreateReleases(releases ...*base.Release) error {
var rels = make([]*models.Release, 0, len(releases))
for _, release := range releases {
var rel = models.Release{
RepoID: g.repo.ID,
PublisherID: g.doer.ID,
@ -181,28 +196,30 @@ func (g *GiteaLocalUploader) CreateRelease(release *base.Release) error {
rel.Attachments = append(rel.Attachments, &attach)
}
return models.MigrateRelease(&rel)
rels = append(rels, &rel)
}
return models.InsertReleases(rels...)
}
// CreateIssue creates issue
func (g *GiteaLocalUploader) CreateIssue(issue *base.Issue) error {
var labelIDs []int64
// CreateIssues creates issues
func (g *GiteaLocalUploader) CreateIssues(issues ...*base.Issue) error {
var iss = make([]*models.Issue, 0, len(issues))
for _, issue := range issues {
var labels []*models.Label
for _, label := range issue.Labels {
id, ok := g.labels.Load(label.Name)
if !ok {
return fmt.Errorf("Label %s missing when create issue", label.Name)
lb, ok := g.labels.Load(label.Name)
if ok {
labels = append(labels, lb.(*models.Label))
}
labelIDs = append(labelIDs, id.(int64))
}
var milestoneID int64
if issue.Milestone != "" {
milestone, ok := g.milestones.Load(issue.Milestone)
if !ok {
return fmt.Errorf("Milestone %s missing when create issue", issue.Milestone)
}
if ok {
milestoneID = milestone.(int64)
}
}
var is = models.Issue{
RepoID: g.repo.ID,
@ -214,100 +231,125 @@ func (g *GiteaLocalUploader) CreateIssue(issue *base.Issue) error {
IsClosed: issue.State == "closed",
IsLocked: issue.IsLocked,
MilestoneID: milestoneID,
Labels: labels,
CreatedUnix: util.TimeStamp(issue.Created.Unix()),
}
if issue.Closed != nil {
is.ClosedUnix = util.TimeStamp(issue.Closed.Unix())
}
// TODO: add reactions
iss = append(iss, &is)
}
err := models.InsertIssue(&is, labelIDs)
err := models.InsertIssues(iss...)
if err != nil {
return err
}
g.issues.Store(issue.Number, is.ID)
// TODO: add reactions
return err
for _, is := range iss {
g.issues.Store(is.Index, is.ID)
}
return nil
}
// CreateComment creates comment
func (g *GiteaLocalUploader) CreateComment(issueNumber int64, comment *base.Comment) error {
// CreateComments creates comments of issues
func (g *GiteaLocalUploader) CreateComments(comments ...*base.Comment) error {
var cms = make([]*models.Comment, 0, len(comments))
for _, comment := range comments {
var issueID int64
if issueIDStr, ok := g.issues.Load(issueNumber); !ok {
issue, err := models.GetIssueByIndex(g.repo.ID, issueNumber)
if issueIDStr, ok := g.issues.Load(comment.IssueIndex); !ok {
issue, err := models.GetIssueByIndex(g.repo.ID, comment.IssueIndex)
if err != nil {
return err
}
issueID = issue.ID
g.issues.Store(issueNumber, issueID)
g.issues.Store(comment.IssueIndex, issueID)
} else {
issueID = issueIDStr.(int64)
}
var cm = models.Comment{
cms = append(cms, &models.Comment{
IssueID: issueID,
Type: models.CommentTypeComment,
PosterID: g.doer.ID,
Content: comment.Content,
CreatedUnix: util.TimeStamp(comment.Created.Unix()),
}
err := models.InsertComment(&cm)
})
// TODO: Reactions
return err
}
// CreatePullRequest creates pull request
func (g *GiteaLocalUploader) CreatePullRequest(pr *base.PullRequest) error {
var labelIDs []int64
for _, label := range pr.Labels {
id, ok := g.labels.Load(label.Name)
if !ok {
return fmt.Errorf("Label %s missing when create issue", label.Name)
return models.InsertIssueComments(cms)
}
// CreatePullRequests creates pull requests
func (g *GiteaLocalUploader) CreatePullRequests(prs ...*base.PullRequest) error {
var gprs = make([]*models.PullRequest, 0, len(prs))
for _, pr := range prs {
gpr, err := g.newPullRequest(pr)
if err != nil {
return err
}
gprs = append(gprs, gpr)
}
if err := models.InsertPullRequests(gprs...); err != nil {
return err
}
for _, pr := range gprs {
g.issues.Store(pr.Issue.Index, pr.Issue.ID)
}
return nil
}
func (g *GiteaLocalUploader) newPullRequest(pr *base.PullRequest) (*models.PullRequest, error) {
var labels []*models.Label
for _, label := range pr.Labels {
lb, ok := g.labels.Load(label.Name)
if ok {
labels = append(labels, lb.(*models.Label))
}
labelIDs = append(labelIDs, id.(int64))
}
var milestoneID int64
if pr.Milestone != "" {
milestone, ok := g.milestones.Load(pr.Milestone)
if !ok {
return fmt.Errorf("Milestone %s missing when create issue", pr.Milestone)
}
if ok {
milestoneID = milestone.(int64)
}
}
// download patch file
resp, err := http.Get(pr.PatchURL)
if err != nil {
return err
return nil, err
}
defer resp.Body.Close()
pullDir := filepath.Join(g.repo.RepoPath(), "pulls")
if err = os.MkdirAll(pullDir, os.ModePerm); err != nil {
return err
return nil, err
}
f, err := os.Create(filepath.Join(pullDir, fmt.Sprintf("%d.patch", pr.Number)))
if err != nil {
return err
return nil, err
}
defer f.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
return err
return nil, err
}
// set head information
pullHead := filepath.Join(g.repo.RepoPath(), "refs", "pull", fmt.Sprintf("%d", pr.Number))
if err := os.MkdirAll(pullHead, os.ModePerm); err != nil {
return err
return nil, err
}
p, err := os.Create(filepath.Join(pullHead, "head"))
if err != nil {
return err
return nil, err
}
defer p.Close()
_, err = p.WriteString(pr.Head.SHA)
if err != nil {
return err
return nil, err
}
var head = "unknown repository"
@ -333,16 +375,16 @@ func (g *GiteaLocalUploader) CreatePullRequest(pr *base.PullRequest) error {
} else {
headBranch := filepath.Join(g.repo.RepoPath(), "refs", "heads", pr.Head.OwnerName, pr.Head.Ref)
if err := os.MkdirAll(filepath.Dir(headBranch), os.ModePerm); err != nil {
return err
return nil, err
}
b, err := os.Create(headBranch)
if err != nil {
return err
return nil, err
}
defer b.Close()
_, err = b.WriteString(pr.Head.SHA)
if err != nil {
return err
return nil, err
}
head = pr.Head.OwnerName + "/" + pr.Head.Ref
}
@ -373,6 +415,7 @@ func (g *GiteaLocalUploader) CreatePullRequest(pr *base.PullRequest) error {
IsPull: true,
IsClosed: pr.State == "closed",
IsLocked: pr.IsLocked,
Labels: labels,
CreatedUnix: util.TimeStamp(pr.Created.Unix()),
},
}
@ -389,7 +432,7 @@ func (g *GiteaLocalUploader) CreatePullRequest(pr *base.PullRequest) error {
// TODO: reactions
// TODO: assignees
return models.InsertPullRequest(&pullRequest, labelIDs)
return &pullRequest, nil
}
// Rollback when migrating failed, this will rollback all the changes.

View file

@ -358,6 +358,7 @@ func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, er
reactions = convertGithubReactions(comment.Reactions)
}
allComments = append(allComments, &base.Comment{
IssueIndex: issueNumber,
PosterName: *comment.User.Login,
PosterEmail: email,
Content: *comment.Body,

View file

@ -269,6 +269,7 @@ func TestGitHubDownloadRepo(t *testing.T) {
assert.EqualValues(t, 35, len(comments))
assert.EqualValues(t, []*base.Comment{
{
IssueIndex: 6,
PosterName: "bkcsoft",
Created: time.Date(2016, 11, 02, 18, 59, 48, 0, time.UTC),
Content: `I would prefer a solution that is in the backend, unless it's required to have it update without reloading. Unfortunately I can't seem to find anything that does that :unamused:
@ -286,6 +287,7 @@ Also this would _require_ caching, since it will fetch huge amounts of data from
},
},
{
IssueIndex: 6,
PosterName: "joubertredrat",
Created: time.Date(2016, 11, 02, 19, 16, 56, 0, time.UTC),
Content: `Yes, this plugin build on front-end, with backend I don't know too, but we can consider make component for this.
@ -303,6 +305,7 @@ In my case I use ajax to get data, but build on frontend anyway
},
},
{
IssueIndex: 6,
PosterName: "xinity",
Created: time.Date(2016, 11, 03, 13, 04, 56, 0, time.UTC),
Content: `following @bkcsoft retention strategy in cache is a must if we don't want gitea to waste ressources.

View file

@ -91,12 +91,10 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err
}
for _, milestone := range milestones {
if err := uploader.CreateMilestone(milestone); err != nil {
if err := uploader.CreateMilestones(milestones...); err != nil {
return err
}
}
}
if opts.Labels {
log.Trace("migrating labels")
@ -105,12 +103,10 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err
}
for _, label := range labels {
if err := uploader.CreateLabel(label); err != nil {
if err := uploader.CreateLabels(labels...); err != nil {
return err
}
}
}
if opts.Releases {
log.Trace("migrating releases")
@ -119,12 +115,10 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err
}
for _, release := range releases {
if err := uploader.CreateRelease(release); err != nil {
if err := uploader.CreateReleases(releases...); err != nil {
return err
}
}
}
if opts.Issues {
log.Trace("migrating issues and comments")
@ -137,8 +131,9 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
if !opts.IgnoreIssueAuthor {
issue.Content = fmt.Sprintf("Author: @%s \n\n%s", issue.PosterName, issue.Content)
}
}
if err := uploader.CreateIssue(issue); err != nil {
if err := uploader.CreateIssues(issues...); err != nil {
return err
}
@ -146,6 +141,8 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
continue
}
var allComments = make([]*base.Comment, 0, 100)
for _, issue := range issues {
comments, err := downloader.GetComments(issue.Number)
if err != nil {
return err
@ -154,9 +151,20 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
if !opts.IgnoreIssueAuthor {
comment.Content = fmt.Sprintf("Author: @%s \n\n%s", comment.PosterName, comment.Content)
}
if err := uploader.CreateComment(issue.Number, comment); err != nil {
}
allComments = append(allComments, comments...)
if len(allComments) >= 100 {
if err := uploader.CreateComments(allComments...); err != nil {
return err
}
allComments = make([]*base.Comment, 0, 100)
}
}
if len(allComments) > 0 {
if err := uploader.CreateComments(allComments...); err != nil {
return err
}
}
@ -178,13 +186,17 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
if !opts.IgnoreIssueAuthor {
pr.Content = fmt.Sprintf("Author: @%s \n\n%s", pr.PosterName, pr.Content)
}
if err := uploader.CreatePullRequest(pr); err != nil {
}
if err := uploader.CreatePullRequests(prs...); err != nil {
return err
}
if !opts.Comments {
continue
}
var allComments = make([]*base.Comment, 0, 100)
for _, pr := range prs {
comments, err := downloader.GetComments(pr.Number)
if err != nil {
return err
@ -193,11 +205,23 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
if !opts.IgnoreIssueAuthor {
comment.Content = fmt.Sprintf("Author: @%s \n\n%s", comment.PosterName, comment.Content)
}
if err := uploader.CreateComment(pr.Number, comment); err != nil {
}
allComments = append(allComments, comments...)
if len(allComments) >= 100 {
if err := uploader.CreateComments(allComments...); err != nil {
return err
}
allComments = make([]*base.Comment, 0, 100)
}
}
if len(allComments) > 0 {
if err := uploader.CreateComments(allComments...); err != nil {
return err
}
}
}
if len(prs) < 100 {
break
}