From f99d50fc9f8baf406f32a491b214f8a13617d086 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 24 Oct 2021 23:12:43 +0200 Subject: [PATCH] Read expected buffer size (#17409) * Read expected buffer size. * Changed name. --- modules/charset/charset.go | 5 +++-- modules/csv/csv.go | 5 +---- modules/git/blob.go | 3 ++- modules/repofiles/update.go | 5 +++-- modules/typesniffer/typesniffer.go | 6 ++++-- modules/util/io.go | 20 ++++++++++++++++++++ routers/common/repo.go | 5 +++-- routers/web/repo/editor.go | 4 ++-- routers/web/repo/lfs.go | 7 ++++--- routers/web/repo/view.go | 13 ++++++------- services/attachment/attachment.go | 7 +++---- 11 files changed, 51 insertions(+), 29 deletions(-) create mode 100644 modules/util/io.go diff --git a/modules/charset/charset.go b/modules/charset/charset.go index 47906e263..ae5cf5aa1 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" "github.com/gogs/chardet" "golang.org/x/net/html/charset" @@ -25,9 +26,9 @@ var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { var buf = make([]byte, 2048) - n, err := rd.Read(buf) + n, err := util.ReadAtMost(rd, buf) if err != nil { - return rd + return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd) } charsetLabel, err := DetectEncoding(buf[:n]) diff --git a/modules/csv/csv.go b/modules/csv/csv.go index 30698830a..2e2611aa8 100644 --- a/modules/csv/csv.go +++ b/modules/csv/csv.go @@ -29,11 +29,8 @@ func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader { // CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader. func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) { var data = make([]byte, 1e4) - size, err := rd.Read(data) + size, err := util.ReadAtMost(rd, data) if err != nil { - if err == io.EOF { - return CreateReader(bytes.NewReader([]byte{}), rune(',')), nil - } return nil, err } diff --git a/modules/git/blob.go b/modules/git/blob.go index c7919f3c4..9567affd0 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -11,6 +11,7 @@ import ( "io" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) // This file contains common functions between the gogit and !gogit variants for git Blobs @@ -28,7 +29,7 @@ func (b *Blob) GetBlobContent() (string, error) { } defer dataRc.Close() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] return string(buf), nil } diff --git a/modules/repofiles/update.go b/modules/repofiles/update.go index dc2893cb1..d25accff1 100644 --- a/modules/repofiles/update.go +++ b/modules/repofiles/update.go @@ -19,6 +19,7 @@ import ( repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/transform" @@ -61,7 +62,7 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string } defer reader.Close() buf := make([]byte, 1024) - n, err := reader.Read(buf) + n, err := util.ReadAtMost(reader, buf) if err != nil { // return default return "UTF-8", false @@ -84,7 +85,7 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string } defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) + n, err = util.ReadAtMost(dataRc, buf) if err != nil { // return default return "UTF-8", false diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index d257b8179..9e29b3557 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -10,6 +10,8 @@ import ( "net/http" "regexp" "strings" + + "code.gitea.io/gitea/modules/util" ) // Use at most this many bytes to determine Content Type. @@ -86,8 +88,8 @@ func DetectContentType(data []byte) SniffedType { // DetectContentTypeFromReader guesses the content type contained in the reader. func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) { buf := make([]byte, sniffLen) - n, err := r.Read(buf) - if err != nil && err != io.EOF { + n, err := util.ReadAtMost(r, buf) + if err != nil { return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err) } buf = buf[:n] diff --git a/modules/util/io.go b/modules/util/io.go new file mode 100644 index 000000000..b467c0ac8 --- /dev/null +++ b/modules/util/io.go @@ -0,0 +1,20 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package util + +import ( + "io" +) + +// ReadAtMost reads at most len(buf) bytes from r into buf. +// It returns the number of bytes copied. n is only less then len(buf) if r provides fewer bytes. +// If EOF occurs while reading, err will be nil. +func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { + n, err = io.ReadFull(r, buf) + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = nil + } + return +} diff --git a/routers/common/repo.go b/routers/common/repo.go index af9170164..b0e14b63f 100644 --- a/routers/common/repo.go +++ b/routers/common/repo.go @@ -18,6 +18,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) // ServeBlob download a git.Blob @@ -42,8 +43,8 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error { // ServeData download file from io.Reader func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error { buf := make([]byte, 1024) - n, err := reader.Read(buf) - if err != nil && err != io.EOF { + n, err := util.ReadAtMost(reader, buf) + if err != nil { return err } if n >= 0 { diff --git a/routers/web/repo/editor.go b/routers/web/repo/editor.go index 1d18bfe9a..f27258349 100644 --- a/routers/web/repo/editor.go +++ b/routers/web/repo/editor.go @@ -118,7 +118,7 @@ func editFile(ctx *context.Context, isNewFile bool) { ctx.Data["FileName"] = blob.Name() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] // Only some file types are editable online as text. @@ -751,7 +751,7 @@ func UploadFileToServer(ctx *context.Context) { defer file.Close() buf := make([]byte, 1024) - n, _ := file.Read(buf) + n, _ := util.ReadAtMost(file, buf) if n > 0 { buf = buf[:n] } diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 271c63855..5e24cfa3c 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -25,6 +25,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) const ( @@ -271,7 +272,7 @@ func LFSFileGet(ctx *context.Context) { } defer dataRc.Close() buf := make([]byte, 1024) - n, err := dataRc.Read(buf) + n, err := util.ReadAtMost(dataRc, buf) if err != nil { ctx.ServerError("Data", err) return @@ -296,10 +297,10 @@ func LFSFileGet(ctx *context.Context) { break } - buf := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) + rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc)) // Building code view blocks with line number on server side. - fileContent, _ := io.ReadAll(buf) + fileContent, _ := io.ReadAll(rd) var output bytes.Buffer lines := strings.Split(string(fileContent), "\n") diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 0777a10e7..90be631c7 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -33,6 +33,7 @@ import ( "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/typesniffer" + "code.gitea.io/gitea/modules/util" ) const ( @@ -250,7 +251,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { defer dataRc.Close() buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] st := typesniffer.DetectContentType(buf) @@ -285,7 +286,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) + n, err = util.ReadAtMost(dataRc, buf) if err != nil { ctx.ServerError("Data", err) return @@ -377,7 +378,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.Data["RawFileLink"] = rawLink + "/" + ctx.Repo.TreePath buf := make([]byte, 1024) - n, _ := dataRc.Read(buf) + n, _ := util.ReadAtMost(dataRc, buf) buf = buf[:n] st := typesniffer.DetectContentType(buf) @@ -409,10 +410,8 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st defer dataRc.Close() buf = make([]byte, 1024) - n, err = dataRc.Read(buf) - // Error EOF don't mean there is an error, it just means we read to - // the end - if err != nil && err != io.EOF { + n, err = util.ReadAtMost(dataRc, buf) + if err != nil { ctx.ServerError("Data", err) return } diff --git a/services/attachment/attachment.go b/services/attachment/attachment.go index 7500a8ac3..f747ccec3 100644 --- a/services/attachment/attachment.go +++ b/services/attachment/attachment.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/upload" + "code.gitea.io/gitea/modules/util" "github.com/google/uuid" ) @@ -41,10 +42,8 @@ func NewAttachment(attach *models.Attachment, file io.Reader) (*models.Attachmen // UploadAttachment upload new attachment into storage and update database func UploadAttachment(file io.Reader, actorID, repoID, releaseID int64, fileName string, allowedTypes string) (*models.Attachment, error) { buf := make([]byte, 1024) - n, _ := file.Read(buf) - if n > 0 { - buf = buf[:n] - } + n, _ := util.ReadAtMost(file, buf) + buf = buf[:n] if err := upload.Verify(buf, fileName, allowedTypes); err != nil { return nil, err