Fix CSV render error (#17406) (#17431)

Backport #17406.

Closes #17378 

Both errors from #17378 were caused by  #15175.

Problem 1 (error with added file):
`ToUTF8WithFallbackReader` creates a `MultiReader` from a `byte[2048]` and the remaining reader. `CreateReaderAndGuessDelimiter` tries to read 10000 bytes from this reader but only gets 2048 because that's the first reader in the `MultiReader`. Then the `if size < 1e4` thinks the input is at EOF and just returns that.

Problem 2 (error with changed file):
The blob reader gets defer closed. That was fine because the old version reads the whole file into memory. Now with the streaming version the close needs to defer after the method.

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
KN4CK3R 2021-10-25 19:31:15 +02:00 committed by GitHub
parent 5159055278
commit 1fbdf96c34
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 22 deletions

View file

@ -28,6 +28,7 @@ func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
} }
// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader. // CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
// Reads at most 10k bytes.
func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) { func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
var data = make([]byte, 1e4) var data = make([]byte, 1e4)
size, err := util.ReadAtMost(rd, data) size, err := util.ReadAtMost(rd, data)
@ -35,25 +36,16 @@ func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
return nil, err return nil, err
} }
delimiter := guessDelimiter(data[:size]) return CreateReader(
io.MultiReader(bytes.NewReader(data[:size]), rd),
var newInput io.Reader guessDelimiter(data[:size]),
if size < 1e4 { ), nil
newInput = bytes.NewReader(data[:size])
} else {
newInput = io.MultiReader(bytes.NewReader(data), rd)
}
return CreateReader(newInput, delimiter), nil
} }
// guessDelimiter scores the input CSV data against delimiters, and returns the best match. // guessDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func guessDelimiter(data []byte) rune { func guessDelimiter(data []byte) rune {
maxLines := 10 maxLines := 10
maxBytes := util.Min(len(data), 1e4) text := quoteRegexp.ReplaceAllLiteralString(string(data), "")
text := string(data[:maxBytes])
text = quoteRegexp.ReplaceAllLiteralString(text, "")
lines := strings.SplitN(text, "\n", maxLines+1) lines := strings.SplitN(text, "\n", maxLines+1)
lines = lines[:util.Min(maxLines, len(lines))] lines = lines[:util.Min(maxLines, len(lines))]

View file

@ -10,6 +10,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"html" "html"
"io"
"net/http" "net/http"
"path" "path"
"path/filepath" "path/filepath"
@ -104,30 +105,36 @@ func setCsvCompareContext(ctx *context.Context) {
errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large")) errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))
csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) { csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, io.Closer, error) {
blob, err := c.GetBlobByPath(diffFile.Name) blob, err := c.GetBlobByPath(diffFile.Name)
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() { if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() {
return nil, errTooLarge return nil, nil, errTooLarge
} }
reader, err := blob.DataAsync() reader, err := blob.DataAsync()
if err != nil { if err != nil {
return nil, err return nil, nil, err
} }
defer reader.Close()
return csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader)) csvReader, err := csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader))
return csvReader, reader, err
} }
baseReader, err := csvReaderFromCommit(baseCommit) baseReader, baseBlobCloser, err := csvReaderFromCommit(baseCommit)
if baseBlobCloser != nil {
defer baseBlobCloser.Close()
}
if err == errTooLarge { if err == errTooLarge {
return CsvDiffResult{nil, err.Error()} return CsvDiffResult{nil, err.Error()}
} }
headReader, err := csvReaderFromCommit(headCommit) headReader, headBlobCloser, err := csvReaderFromCommit(headCommit)
if headBlobCloser != nil {
defer headBlobCloser.Close()
}
if err == errTooLarge { if err == errTooLarge {
return CsvDiffResult{nil, err.Error()} return CsvDiffResult{nil, err.Error()}
} }