From 95383b7a16205d749e9d696d69d0ae665e91501e Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Sat, 25 Jun 2022 19:06:01 +0200 Subject: [PATCH] Add sitemap support (#18407) --- custom/conf/app.example.ini | 3 + .../doc/advanced/config-cheat-sheet.en-us.md | 1 + modules/setting/setting.go | 2 + modules/sitemap/sitemap.go | 69 +++++++++++++++++ modules/sitemap/sitemap_test.go | 77 +++++++++++++++++++ routers/web/explore/repo.go | 26 ++++++- routers/web/explore/user.go | 25 +++++- routers/web/home.go | 53 +++++++++++++ routers/web/web.go | 3 + 9 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 modules/sitemap/sitemap.go create mode 100644 modules/sitemap/sitemap_test.go diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 80017a117..5016f29cf 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1097,6 +1097,9 @@ PATH = ;; Number of items that are displayed in home feed ;FEED_PAGING_NUM = 20 ;; +;; Number of items that are displayed in a single subsitemap +;SITEMAP_PAGING_NUM = 20 +;; ;; Number of maximum commits displayed in commit graph. ;GRAPH_MAX_COMMIT_NUM = 100 ;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 15628a7de..df659b654 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -174,6 +174,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a - `MEMBERS_PAGING_NUM`: **20**: Number of members that are shown in organization members. - `FEED_MAX_COMMIT_NUM`: **5**: Number of maximum commits shown in one activity feed. - `FEED_PAGING_NUM`: **20**: Number of items that are displayed in home feed. +- `SITEMAP_PAGING_NUM`: **20**: Number of items that are displayed in a single subsitemap. - `GRAPH_MAX_COMMIT_NUM`: **100**: Number of maximum commits shown in the commit graph. - `CODE_COMMENT_LINES`: **4**: Number of line of codes shown for a code comment. - `DEFAULT_THEME`: **auto**: \[auto, gitea, arc-green\]: Set the default theme for the Gitea install. diff --git a/modules/setting/setting.go b/modules/setting/setting.go index 88f306b3f..7be0842b5 100644 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -207,6 +207,7 @@ var ( // UI settings UI = struct { ExplorePagingNum int + SitemapPagingNum int IssuePagingNum int RepoSearchPagingNum int MembersPagingNum int @@ -260,6 +261,7 @@ var ( } `ini:"ui.meta"` }{ ExplorePagingNum: 20, + SitemapPagingNum: 20, IssuePagingNum: 10, RepoSearchPagingNum: 10, MembersPagingNum: 20, diff --git a/modules/sitemap/sitemap.go b/modules/sitemap/sitemap.go new file mode 100644 index 000000000..14953765a --- /dev/null +++ b/modules/sitemap/sitemap.go @@ -0,0 +1,69 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package sitemap + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "time" +) + +// sitemapFileLimit contains the maximum size of a sitemap file +const sitemapFileLimit = 50 * 1024 * 1024 + +// Url represents a single sitemap entry +type URL struct { + URL string `xml:"loc"` + LastMod *time.Time `xml:"lastmod,omitempty"` +} + +// SitemapUrl represents a sitemap +type Sitemap struct { + XMLName xml.Name + Namespace string `xml:"xmlns,attr"` + + URLs []URL `xml:"url"` +} + +// NewSitemap creates a sitemap +func NewSitemap() *Sitemap { + return &Sitemap{ + XMLName: xml.Name{Local: "urlset"}, + Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9", + } +} + +// NewSitemap creates a sitemap index. +func NewSitemapIndex() *Sitemap { + return &Sitemap{ + XMLName: xml.Name{Local: "sitemapindex"}, + Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9", + } +} + +// Add adds a URL to the sitemap +func (s *Sitemap) Add(u URL) { + s.URLs = append(s.URLs, u) +} + +// Write writes the sitemap to a response +func (s *Sitemap) WriteTo(w io.Writer) (int64, error) { + if len(s.URLs) > 50000 { + return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs)) + } + buf := bytes.NewBufferString(xml.Header) + if err := xml.NewEncoder(buf).Encode(s); err != nil { + return 0, err + } + if err := buf.WriteByte('\n'); err != nil { + return 0, err + } + if buf.Len() > sitemapFileLimit { + return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len()) + } + return buf.WriteTo(w) +} diff --git a/modules/sitemap/sitemap_test.go b/modules/sitemap/sitemap_test.go new file mode 100644 index 000000000..63007b847 --- /dev/null +++ b/modules/sitemap/sitemap_test.go @@ -0,0 +1,77 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package sitemap + +import ( + "bytes" + "encoding/xml" + "fmt" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestOk(t *testing.T) { + testReal := func(s *Sitemap, name string, urls []URL, expected string) { + for _, url := range urls { + s.Add(url) + } + buf := &bytes.Buffer{} + _, err := s.WriteTo(buf) + assert.NoError(t, nil, err) + assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"\n", buf.String()) + } + test := func(urls []URL, expected string) { + testReal(NewSitemap(), "urlset", urls, expected) + testReal(NewSitemapIndex(), "sitemapindex", urls, expected) + } + + ts := time.Unix(1651322008, 0).UTC() + + test( + []URL{}, + "", + ) + test( + []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + }, + "https://gitea.io/test12022-04-30T12:33:28Z", + ) + test( + []URL{ + {URL: "https://gitea.io/test2", LastMod: nil}, + }, + "https://gitea.io/test2", + ) + test( + []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + {URL: "https://gitea.io/test2", LastMod: nil}, + }, + "https://gitea.io/test12022-04-30T12:33:28Z"+ + "https://gitea.io/test2", + ) +} + +func TestTooManyURLs(t *testing.T) { + s := NewSitemap() + for i := 0; i < 50001; i++ { + s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)}) + } + buf := &bytes.Buffer{} + _, err := s.WriteTo(buf) + assert.EqualError(t, err, "The sitemap contains too many URLs: 50001") +} + +func TestSitemapTooBig(t *testing.T) { + s := NewSitemap() + s.Add(URL{URL: strings.Repeat("b", sitemapFileLimit)}) + buf := &bytes.Buffer{} + _, err := s.WriteTo(buf) + assert.EqualError(t, err, "The sitemap is too big: 52428931") +} diff --git a/routers/web/explore/repo.go b/routers/web/explore/repo.go index f64642bc9..b5485f583 100644 --- a/routers/web/explore/repo.go +++ b/routers/web/explore/repo.go @@ -11,7 +11,9 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/sitemap" ) const ( @@ -30,11 +32,21 @@ type RepoSearchOptions struct { // RenderRepoSearch render repositories search page func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) { - page := ctx.FormInt("page") + // Sitemap index for sitemap paths + page := int(ctx.ParamsInt64("idx")) + isSitemap := ctx.Params("idx") != "" + if page <= 1 { + page = ctx.FormInt("page") + } + if page <= 0 { page = 1 } + if isSitemap { + opts.PageSize = setting.UI.SitemapPagingNum + } + var ( repos []*repo_model.Repository count int64 @@ -100,6 +112,18 @@ func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) { ctx.ServerError("SearchRepository", err) return } + if isSitemap { + m := sitemap.NewSitemap() + for _, item := range repos { + m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()}) + } + ctx.Resp.Header().Set("Content-Type", "text/xml") + if _, err := m.WriteTo(ctx.Resp); err != nil { + log.Error("Failed writing sitemap: %v", err) + } + return + } + ctx.Data["Keyword"] = keyword ctx.Data["Total"] = count ctx.Data["Repos"] = repos diff --git a/routers/web/explore/user.go b/routers/web/explore/user.go index ea0d7d5f9..ea3d83e8d 100644 --- a/routers/web/explore/user.go +++ b/routers/web/explore/user.go @@ -12,7 +12,9 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/sitemap" "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/util" ) @@ -33,11 +35,20 @@ func isKeywordValid(keyword string) bool { // RenderUserSearch render user search page func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions, tplName base.TplName) { - opts.Page = ctx.FormInt("page") + // Sitemap index for sitemap paths + opts.Page = int(ctx.ParamsInt64("idx")) + isSitemap := ctx.Params("idx") != "" + if opts.Page <= 1 { + opts.Page = ctx.FormInt("page") + } if opts.Page <= 1 { opts.Page = 1 } + if isSitemap { + opts.PageSize = setting.UI.SitemapPagingNum + } + var ( users []*user_model.User count int64 @@ -73,6 +84,18 @@ func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions, return } } + if isSitemap { + m := sitemap.NewSitemap() + for _, item := range users { + m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()}) + } + ctx.Resp.Header().Set("Content-Type", "text/xml") + if _, err := m.WriteTo(ctx.Resp); err != nil { + log.Error("Failed writing sitemap: %v", err) + } + return + } + ctx.Data["Keyword"] = opts.Keyword ctx.Data["Total"] = count ctx.Data["Users"] = users diff --git a/routers/web/home.go b/routers/web/home.go index 9036814dd..0c74987ba 100644 --- a/routers/web/home.go +++ b/routers/web/home.go @@ -7,11 +7,18 @@ package web import ( "net/http" + "strconv" + "code.gitea.io/gitea/models/db" + repo_model "code.gitea.io/gitea/models/repo" + user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/sitemap" + "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/web/middleware" "code.gitea.io/gitea/routers/web/auth" "code.gitea.io/gitea/routers/web/user" @@ -59,6 +66,52 @@ func Home(ctx *context.Context) { ctx.HTML(http.StatusOK, tplHome) } +// HomeSitemap renders the main sitemap +func HomeSitemap(ctx *context.Context) { + m := sitemap.NewSitemapIndex() + if !setting.Service.Explore.DisableUsersPage { + _, cnt, err := user_model.SearchUsers(&user_model.SearchUserOptions{ + Type: user_model.UserTypeIndividual, + ListOptions: db.ListOptions{PageSize: 1}, + IsActive: util.OptionalBoolTrue, + Visible: []structs.VisibleType{structs.VisibleTypePublic}, + }) + if err != nil { + ctx.ServerError("SearchUsers", err) + return + } + count := int(cnt) + idx := 1 + for i := 0; i < count; i += setting.UI.SitemapPagingNum { + m.Add(sitemap.URL{URL: setting.AppURL + "explore/users/sitemap-" + strconv.Itoa(idx) + ".xml"}) + idx++ + } + } + + _, cnt, err := repo_model.SearchRepository(&repo_model.SearchRepoOptions{ + ListOptions: db.ListOptions{ + PageSize: 1, + }, + Actor: ctx.Doer, + AllPublic: true, + }) + if err != nil { + ctx.ServerError("SearchRepository", err) + return + } + count := int(cnt) + idx := 1 + for i := 0; i < count; i += setting.UI.SitemapPagingNum { + m.Add(sitemap.URL{URL: setting.AppURL + "explore/repos/sitemap-" + strconv.Itoa(idx) + ".xml"}) + idx++ + } + + ctx.Resp.Header().Set("Content-Type", "text/xml") + if _, err := m.WriteTo(ctx.Resp); err != nil { + log.Error("Failed writing sitemap: %v", err) + } +} + // NotFound render 404 page func NotFound(ctx *context.Context) { ctx.Data["Title"] = "Page Not Found" diff --git a/routers/web/web.go b/routers/web/web.go index 4896bdb1e..80469ef7c 100644 --- a/routers/web/web.go +++ b/routers/web/web.go @@ -294,6 +294,7 @@ func RegisterRoutes(m *web.Route) { // Routers. // for health check m.Get("/", Home) + m.Get("/sitemap.xml", ignExploreSignIn, HomeSitemap) m.Group("/.well-known", func() { m.Get("/openid-configuration", auth.OIDCWellKnown) m.Group("", func() { @@ -310,7 +311,9 @@ func RegisterRoutes(m *web.Route) { ctx.Redirect(setting.AppSubURL + "/explore/repos") }) m.Get("/repos", explore.Repos) + m.Get("/repos/sitemap-{idx}.xml", explore.Repos) m.Get("/users", explore.Users) + m.Get("/users/sitemap-{idx}.xml", explore.Users) m.Get("/organizations", explore.Organizations) m.Get("/code", explore.Code) m.Get("/topics/search", explore.TopicSearch)