Make the github migration less rate limit waiting to get comment per page from repository but not per issue (#16070)

* Make the github migration less rate limit waiting to get comment per page from repository but not per issue

* Fix lint

* adjust Downloader interface

* Fix missed reviews

* Fix test

* Remove unused struct
This commit is contained in:
Lunny Xiao 2021-06-30 15:23:49 +08:00 committed by GitHub
parent e8c6cead0f
commit 0966349354
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 194 additions and 56 deletions

View file

@ -11,6 +11,13 @@ import (
"code.gitea.io/gitea/modules/structs"
)
// GetCommentOptions represents an options for get comment
type GetCommentOptions struct {
IssueNumber int64
Page int
PageSize int
}
// Downloader downloads the site repo informations
type Downloader interface {
SetContext(context.Context)
@ -20,7 +27,8 @@ type Downloader interface {
GetReleases() ([]*Release, error)
GetLabels() ([]*Label, error)
GetIssues(page, perPage int) ([]*Issue, bool, error)
GetComments(issueNumber int64) ([]*Comment, error)
GetComments(opts GetCommentOptions) ([]*Comment, bool, error)
SupportGetRepoComments() bool
GetPullRequests(page, perPage int) ([]*PullRequest, bool, error)
GetReviews(pullRequestNumber int64) ([]*Review, error)
FormatCloneURL(opts MigrateOptions, remoteAddr string) (string, error)

View file

@ -51,8 +51,8 @@ func (n NullDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
}
// GetComments returns comments according issueNumber
func (n NullDownloader) GetComments(issueNumber int64) ([]*Comment, error) {
return nil, &ErrNotSupported{Entity: "Comments"}
func (n NullDownloader) GetComments(GetCommentOptions) ([]*Comment, bool, error) {
return nil, false, &ErrNotSupported{Entity: "Comments"}
}
// GetPullRequests returns pull requests according page and perPage
@ -80,3 +80,8 @@ func (n NullDownloader) FormatCloneURL(opts MigrateOptions, remoteAddr string) (
}
return remoteAddr, nil
}
// SupportGetRepoComments return true if it supports get repo comments
func (n NullDownloader) SupportGetRepoComments() bool {
return false
}

View file

@ -150,18 +150,19 @@ func (d *RetryDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) {
}
// GetComments returns a repository's comments with retry
func (d *RetryDownloader) GetComments(issueNumber int64) ([]*Comment, error) {
func (d *RetryDownloader) GetComments(opts GetCommentOptions) ([]*Comment, bool, error) {
var (
comments []*Comment
isEnd bool
err error
)
err = d.retry(func() error {
comments, err = d.Downloader.GetComments(issueNumber)
comments, isEnd, err = d.Downloader.GetComments(opts)
return err
})
return comments, err
return comments, isEnd, err
}
// GetPullRequests returns a repository's pull requests with retry

View file

@ -435,37 +435,37 @@ func (g *GiteaDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, err
}
// GetComments returns comments according issueNumber
func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) {
func (g *GiteaDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var allComments = make([]*base.Comment, 0, g.maxPerPage)
// for i := 1; ; i++ {
// make sure gitea can shutdown gracefully
select {
case <-g.ctx.Done():
return nil, nil
return nil, false, nil
default:
}
comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, index, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{
comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, opts.IssueNumber, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{
// PageSize: g.maxPerPage,
// Page: i,
}})
if err != nil {
return nil, fmt.Errorf("error while listing comments for issue #%d. Error: %v", index, err)
return nil, false, fmt.Errorf("error while listing comments for issue #%d. Error: %v", opts.IssueNumber, err)
}
for _, comment := range comments {
reactions, err := g.getCommentReactions(comment.ID)
if err != nil {
log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)
log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)
if err2 := models.CreateRepositoryNotice(
fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil {
fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil {
log.Error("create repository notice failed: ", err2)
}
}
allComments = append(allComments, &base.Comment{
IssueIndex: index,
IssueIndex: opts.IssueNumber,
PosterID: comment.Poster.ID,
PosterName: comment.Poster.UserName,
PosterEmail: comment.Poster.Email,
@ -481,7 +481,7 @@ func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) {
// break
// }
//}
return allComments, nil
return allComments, true, nil
}
// GetPullRequests returns pull requests according page and perPage

View file

@ -224,7 +224,9 @@ func TestGiteaDownloadRepo(t *testing.T) {
Closed: &closed2,
}, issues[1])
comments, err := downloader.GetComments(4)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 4,
})
assert.NoError(t, err)
assert.Len(t, comments, 2)
assert.EqualValues(t, 1598975370, comments[0].Created.Unix())

View file

@ -11,6 +11,7 @@ import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
@ -450,8 +451,22 @@ func (g *GithubDownloaderV3) GetIssues(page, perPage int) ([]*base.Issue, bool,
return allIssues, len(issues) < perPage, nil
}
// SupportGetRepoComments return true if it supports get repo comments
func (g *GithubDownloaderV3) SupportGetRepoComments() bool {
return true
}
// GetComments returns comments according issueNumber
func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, error) {
func (g *GithubDownloaderV3) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
if opts.IssueNumber > 0 {
comments, err := g.getComments(opts.IssueNumber)
return comments, false, err
}
return g.GetAllComments(opts.Page, opts.PageSize)
}
func (g *GithubDownloaderV3) getComments(issueNumber int64) ([]*base.Comment, error) {
var (
allComments = make([]*base.Comment, 0, g.maxPerPage)
created = "created"
@ -519,6 +534,75 @@ func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, er
return allComments, nil
}
// GetAllComments returns repository comments according page and perPageSize
func (g *GithubDownloaderV3) GetAllComments(page, perPage int) ([]*base.Comment, bool, error) {
var (
allComments = make([]*base.Comment, 0, perPage)
created = "created"
asc = "asc"
)
opt := &github.IssueListCommentsOptions{
Sort: &created,
Direction: &asc,
ListOptions: github.ListOptions{
Page: page,
PerPage: perPage,
},
}
g.sleep()
comments, resp, err := g.client.Issues.ListComments(g.ctx, g.repoOwner, g.repoName, 0, opt)
if err != nil {
return nil, false, fmt.Errorf("error while listing repos: %v", err)
}
log.Trace("Request get comments %d/%d, but in fact get %d", perPage, page, len(comments))
g.rate = &resp.Rate
for _, comment := range comments {
var email string
if comment.User.Email != nil {
email = *comment.User.Email
}
// get reactions
var reactions []*base.Reaction
for i := 1; ; i++ {
g.sleep()
res, resp, err := g.client.Reactions.ListIssueCommentReactions(g.ctx, g.repoOwner, g.repoName, comment.GetID(), &github.ListOptions{
Page: i,
PerPage: g.maxPerPage,
})
if err != nil {
return nil, false, err
}
g.rate = &resp.Rate
if len(res) == 0 {
break
}
for _, reaction := range res {
reactions = append(reactions, &base.Reaction{
UserID: reaction.User.GetID(),
UserName: reaction.User.GetLogin(),
Content: reaction.GetContent(),
})
}
}
idx := strings.LastIndex(*comment.IssueURL, "/")
issueIndex, _ := strconv.ParseInt((*comment.IssueURL)[idx+1:], 10, 64)
allComments = append(allComments, &base.Comment{
IssueIndex: issueIndex,
PosterID: *comment.User.ID,
PosterName: *comment.User.Login,
PosterEmail: email,
Content: *comment.Body,
Created: *comment.CreatedAt,
Updated: *comment.UpdatedAt,
Reactions: reactions,
})
}
return allComments, len(allComments) < perPage, nil
}
// GetPullRequests returns pull requests according page and perPage
func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) {
if perPage > g.maxPerPage {
@ -539,6 +623,7 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq
if err != nil {
return nil, false, fmt.Errorf("error while listing repos: %v", err)
}
log.Trace("Request get pull requests %d/%d, but in fact get %d", perPage, page, len(prs))
g.rate = &resp.Rate
for _, pr := range prs {
var body string

View file

@ -240,7 +240,9 @@ func TestGitHubDownloadRepo(t *testing.T) {
}, issues)
// downloader.GetComments()
comments, err := downloader.GetComments(2)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 2,
})
assert.NoError(t, err)
assert.Len(t, comments, 2)
assert.EqualValues(t, []*base.Comment{

View file

@ -430,7 +430,8 @@ func (g *GitlabDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, er
// GetComments returns comments according issueNumber
// TODO: figure out how to transfer comment reactions
func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) {
func (g *GitlabDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var issueNumber = opts.IssueNumber
var allComments = make([]*base.Comment, 0, g.maxPerPage)
var page = 1
@ -457,7 +458,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
}
if err != nil {
return nil, fmt.Errorf("error while listing comments: %v %v", g.repoID, err)
return nil, false, fmt.Errorf("error while listing comments: %v %v", g.repoID, err)
}
for _, comment := range comments {
// Flatten comment threads
@ -490,7 +491,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro
}
page = resp.NextPage
}
return allComments, nil
return allComments, true, nil
}
// GetPullRequests returns pull requests according page and perPage

View file

@ -204,7 +204,9 @@ func TestGitlabDownloadRepo(t *testing.T) {
},
}, issues)
comments, err := downloader.GetComments(2)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 2,
})
assert.NoError(t, err)
assert.Len(t, comments, 4)
assert.EqualValues(t, []*base.Comment{

View file

@ -227,12 +227,13 @@ func (g *GogsDownloader) getIssues(page int, state string) ([]*base.Issue, bool,
}
// GetComments returns comments according issueNumber
func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) {
func (g *GogsDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var issueNumber = opts.IssueNumber
var allComments = make([]*base.Comment, 0, 100)
comments, err := g.client.ListIssueComments(g.repoOwner, g.repoName, issueNumber)
if err != nil {
return nil, fmt.Errorf("error while listing repos: %v", err)
return nil, false, fmt.Errorf("error while listing repos: %v", err)
}
for _, comment := range comments {
if len(comment.Body) == 0 || comment.Poster == nil {
@ -249,7 +250,7 @@ func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error)
})
}
return allComments, nil
return allComments, true, nil
}
// GetTopics return repository topics

View file

@ -103,7 +103,9 @@ func TestGogsDownloadRepo(t *testing.T) {
}, issues)
// downloader.GetComments()
comments, err := downloader.GetComments(1)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: 1,
})
assert.NoError(t, err)
assert.Len(t, comments, 1)
assert.EqualValues(t, []*base.Comment{

View file

@ -292,6 +292,8 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
reviewBatchSize = uploader.MaxBatchInsertSize("review")
)
supportAllComments := downloader.SupportGetRepoComments()
if opts.Issues {
log.Trace("migrating issues and comments")
messenger("repo.migrate.migrating_issues")
@ -311,11 +313,13 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
return err
}
if opts.Comments {
if opts.Comments && !supportAllComments {
var allComments = make([]*base.Comment, 0, commentBatchSize)
for _, issue := range issues {
log.Trace("migrating issue %d's comments", issue.Number)
comments, err := downloader.GetComments(issue.Number)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: issue.Number,
})
if err != nil {
if !base.IsErrNotSupported(err) {
return err
@ -366,30 +370,34 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
}
if opts.Comments {
// plain comments
var allComments = make([]*base.Comment, 0, commentBatchSize)
for _, pr := range prs {
log.Trace("migrating pull request %d's comments", pr.Number)
comments, err := downloader.GetComments(pr.Number)
if err != nil {
if !base.IsErrNotSupported(err) {
if !supportAllComments {
// plain comments
var allComments = make([]*base.Comment, 0, commentBatchSize)
for _, pr := range prs {
log.Trace("migrating pull request %d's comments", pr.Number)
comments, _, err := downloader.GetComments(base.GetCommentOptions{
IssueNumber: pr.Number,
})
if err != nil {
if !base.IsErrNotSupported(err) {
return err
}
log.Warn("migrating comments is not supported, ignored")
}
allComments = append(allComments, comments...)
if len(allComments) >= commentBatchSize {
if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil {
return err
}
allComments = allComments[commentBatchSize:]
}
}
if len(allComments) > 0 {
if err = uploader.CreateComments(allComments...); err != nil {
return err
}
log.Warn("migrating comments is not supported, ignored")
}
allComments = append(allComments, comments...)
if len(allComments) >= commentBatchSize {
if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil {
return err
}
allComments = allComments[commentBatchSize:]
}
}
if len(allComments) > 0 {
if err = uploader.CreateComments(allComments...); err != nil {
return err
}
}
@ -439,6 +447,27 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts
}
}
if opts.Comments && supportAllComments {
log.Trace("migrating comments")
for i := 1; ; i++ {
comments, isEnd, err := downloader.GetComments(base.GetCommentOptions{
Page: i,
PageSize: commentBatchSize,
})
if err != nil {
return err
}
if err := uploader.CreateComments(comments...); err != nil {
return err
}
if isEnd {
break
}
}
}
return uploader.Finish()
}

View file

@ -212,27 +212,27 @@ func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool,
}
// GetComments returns comments according issueNumber
func (r *RepositoryRestorer) GetComments(issueNumber int64) ([]*base.Comment, error) {
func (r *RepositoryRestorer) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) {
var comments = make([]*base.Comment, 0, 10)
p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", issueNumber))
p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", opts.IssueNumber))
_, err := os.Stat(p)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
return nil, false, nil
}
return nil, err
return nil, false, err
}
bs, err := ioutil.ReadFile(p)
if err != nil {
return nil, err
return nil, false, err
}
err = yaml.Unmarshal(bs, &comments)
if err != nil {
return nil, err
return nil, false, err
}
return comments, nil
return comments, false, nil
}
// GetPullRequests returns pull requests according page and perPage