From 22a39bb961daedccb08e47cb4cf83b2d4ef72108 Mon Sep 17 00:00:00 2001 From: yp05327 <576951401@qq.com> Date: Tue, 13 Jun 2023 18:02:25 +0900 Subject: [PATCH] Fix profile render when the README.md size is larger than 1024 bytes (#25131) Fixes https://github.com/go-gitea/gitea/issues/25094 `GetBlobContent` will only get the first 1024 bytes, if the README.md size is larger than 1024 bytes, We can not render the rest of them. After this fix, we should provide the limited size to read when call `GetBlobContent`. After: ![image](https://github.com/go-gitea/gitea/assets/18380374/22a42936-4cf8-40b4-a5c7-e384082beb0d) --------- Co-authored-by: wxiaoguang --- models/issues/pull.go | 2 +- modules/git/blob.go | 13 ++-- modules/util/io.go | 39 +++++++++++- modules/util/io_test.go | 66 ++++++++++++++++++++ routers/web/repo/view.go | 2 +- routers/web/user/profile.go | 2 +- services/repository/files/content.go | 2 +- tests/integration/api_packages_cargo_test.go | 2 +- 8 files changed, 116 insertions(+), 12 deletions(-) create mode 100644 modules/util/io_test.go diff --git a/models/issues/pull.go b/models/issues/pull.go index 23b938f95a..2acc2b4226 100644 --- a/models/issues/pull.go +++ b/models/issues/pull.go @@ -920,7 +920,7 @@ func PullRequestCodeOwnersReview(ctx context.Context, pull *Issue, pr *PullReque var data string for _, file := range files { if blob, err := commit.GetBlobByPath(file); err == nil { - data, err = blob.GetBlobContent() + data, err = blob.GetBlobContent(setting.UI.MaxDisplayFileSize) if err == nil { break } diff --git a/modules/git/blob.go b/modules/git/blob.go index 8864f54d1b..bcecb42e16 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -20,17 +20,18 @@ func (b *Blob) Name() string { return b.name } -// GetBlobContent Gets the content of the blob as raw text -func (b *Blob) GetBlobContent() (string, error) { +// GetBlobContent Gets the limited content of the blob as raw text +func (b *Blob) GetBlobContent(limit int64) (string, error) { + if limit <= 0 { + return "", nil + } dataRc, err := b.DataAsync() if err != nil { return "", err } defer dataRc.Close() - buf := make([]byte, 1024) - n, _ := util.ReadAtMost(dataRc, buf) - buf = buf[:n] - return string(buf), nil + buf, err := util.ReadWithLimit(dataRc, int(limit)) + return string(buf), err } // GetBlobLineCount gets line count of the blob diff --git a/modules/util/io.go b/modules/util/io.go index 69b1d63145..1559b019a0 100644 --- a/modules/util/io.go +++ b/modules/util/io.go @@ -4,13 +4,14 @@ package util import ( + "bytes" "errors" "io" ) // ReadAtMost reads at most len(buf) bytes from r into buf. // It returns the number of bytes copied. n is only less than len(buf) if r provides fewer bytes. -// If EOF occurs while reading, err will be nil. +// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil. func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { n, err = io.ReadFull(r, buf) if err == io.EOF || err == io.ErrUnexpectedEOF { @@ -19,6 +20,42 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { return n, err } +// ReadWithLimit reads at most "limit" bytes from r into buf. +// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil. +func ReadWithLimit(r io.Reader, n int) (buf []byte, err error) { + return readWithLimit(r, 1024, n) +} + +func readWithLimit(r io.Reader, batch, limit int) ([]byte, error) { + if limit <= batch { + buf := make([]byte, limit) + n, err := ReadAtMost(r, buf) + if err != nil { + return nil, err + } + return buf[:n], nil + } + res := bytes.NewBuffer(make([]byte, 0, batch)) + bufFix := make([]byte, batch) + eof := false + for res.Len() < limit && !eof { + bufTmp := bufFix + if res.Len()+batch > limit { + bufTmp = bufFix[:limit-res.Len()] + } + n, err := io.ReadFull(r, bufTmp) + if err == io.EOF || err == io.ErrUnexpectedEOF { + eof = true + } else if err != nil { + return nil, err + } + if _, err = res.Write(bufTmp[:n]); err != nil { + return nil, err + } + } + return res.Bytes(), nil +} + // ErrNotEmpty is an error reported when there is a non-empty reader var ErrNotEmpty = errors.New("not-empty") diff --git a/modules/util/io_test.go b/modules/util/io_test.go new file mode 100644 index 0000000000..275575463a --- /dev/null +++ b/modules/util/io_test.go @@ -0,0 +1,66 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package util + +import ( + "bytes" + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +type readerWithError struct { + buf *bytes.Buffer +} + +func (r *readerWithError) Read(p []byte) (n int, err error) { + if r.buf.Len() < 2 { + return 0, errors.New("test error") + } + return r.buf.Read(p) +} + +func TestReadWithLimit(t *testing.T) { + bs := []byte("0123456789abcdef") + + // normal test + buf, err := readWithLimit(bytes.NewBuffer(bs), 5, 2) + assert.NoError(t, err) + assert.Equal(t, []byte("01"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 5) + assert.NoError(t, err) + assert.Equal(t, []byte("01234"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 6) + assert.NoError(t, err) + assert.Equal(t, []byte("012345"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, len(bs)) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 100) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) + + // test with error + buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 10) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789"), buf) + + buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 100) + assert.ErrorContains(t, err, "test error") + assert.Empty(t, buf) + + // test public function + buf, err = ReadWithLimit(bytes.NewBuffer(bs), 2) + assert.NoError(t, err) + assert.Equal(t, []byte("01"), buf) + + buf, err = ReadWithLimit(bytes.NewBuffer(bs), 9999999) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) +} diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 1d54f25884..cf719c49f0 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -363,7 +363,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.Data["FileError"] = ctx.Locale.Tr("actions.runs.invalid_workflow_helper", workFlowErr.Error()) } } else if util.SliceContains([]string{"CODEOWNERS", "docs/CODEOWNERS", ".gitea/CODEOWNERS"}, ctx.Repo.TreePath) { - if data, err := blob.GetBlobContent(); err == nil { + if data, err := blob.GetBlobContent(setting.UI.MaxDisplayFileSize); err == nil { _, warnings := issue_model.GetCodeOwnersFromContent(ctx, data) if len(warnings) > 0 { ctx.Data["FileWarning"] = strings.Join(warnings, "\n") diff --git a/routers/web/user/profile.go b/routers/web/user/profile.go index 42ae37e3ba..6f9f84d60d 100644 --- a/routers/web/user/profile.go +++ b/routers/web/user/profile.go @@ -107,7 +107,7 @@ func Profile(ctx *context.Context) { } blob, err := commit.GetBlobByPath("README.md") if err == nil { - bytes, err := blob.GetBlobContent() + bytes, err := blob.GetBlobContent(setting.UI.MaxDisplayFileSize) if err != nil { ctx.ServerError("GetBlobContent", err) return diff --git a/services/repository/files/content.go b/services/repository/files/content.go index 6f6dc91d85..c701431d67 100644 --- a/services/repository/files/content.go +++ b/services/repository/files/content.go @@ -203,7 +203,7 @@ func GetContents(ctx context.Context, repo *repo_model.Repository, treePath, ref } else if entry.IsLink() { contentsResponse.Type = string(ContentTypeLink) // The target of a symlink file is the content of the file - targetFromContent, err := entry.Blob().GetBlobContent() + targetFromContent, err := entry.Blob().GetBlobContent(1024) if err != nil { return nil, err } diff --git a/tests/integration/api_packages_cargo_test.go b/tests/integration/api_packages_cargo_test.go index 608f192968..03d8e0c520 100644 --- a/tests/integration/api_packages_cargo_test.go +++ b/tests/integration/api_packages_cargo_test.go @@ -88,7 +88,7 @@ func testPackageCargo(t *testing.T, _ *neturl.URL) { blob, err := commit.GetBlobByPath(path) assert.NoError(t, err) - content, err := blob.GetBlobContent() + content, err := blob.GetBlobContent(1024) assert.NoError(t, err) return content