diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini
index fe4fec7e9..1e0cd5f28 100644
--- a/custom/conf/app.example.ini
+++ b/custom/conf/app.example.ini
@@ -248,6 +248,10 @@ EVENT_SOURCE_UPDATE_TIME = 10s
; Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images.
ENABLE_RENDER = true
+[ui.csv]
+; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
+MAX_FILE_SIZE = 524288
+
[markdown]
; Render soft line breaks as hard line breaks, which means a single newline character between
; paragraphs will cause a line break and adding trailing whitespace to paragraphs is not
diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index e32112f02..255bfb2b9 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -198,6 +198,10 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `ENABLE_RENDER`: **true**: Whether to render SVG files as images. If SVG rendering is disabled, SVG files are displayed as text and cannot be embedded in markdown files as images.
+### UI - CSV Files (`ui.csv`)
+
+- `MAX_FILE_SIZE`: **524288** (512kb): Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
+
## Markdown (`markdown`)
- `ENABLE_HARD_LINE_BREAK_IN_COMMENTS`: **true**: Render soft line breaks as hard line breaks in comments, which
diff --git a/modules/csv/csv.go b/modules/csv/csv.go
new file mode 100644
index 000000000..1aa78fdee
--- /dev/null
+++ b/modules/csv/csv.go
@@ -0,0 +1,93 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package csv
+
+import (
+ "bytes"
+ "encoding/csv"
+ "errors"
+ "regexp"
+ "strings"
+
+ "code.gitea.io/gitea/modules/translation"
+ "code.gitea.io/gitea/modules/util"
+)
+
+var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
+
+// CreateReader creates a csv.Reader with the given delimiter.
+func CreateReader(rawBytes []byte, delimiter rune) *csv.Reader {
+ rd := csv.NewReader(bytes.NewReader(rawBytes))
+ rd.Comma = delimiter
+ rd.TrimLeadingSpace = true
+ return rd
+}
+
+// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
+func CreateReaderAndGuessDelimiter(rawBytes []byte) *csv.Reader {
+ delimiter := guessDelimiter(rawBytes)
+ return CreateReader(rawBytes, delimiter)
+}
+
+// guessDelimiter scores the input CSV data against delimiters, and returns the best match.
+// Reads at most 10k bytes & 10 lines.
+func guessDelimiter(data []byte) rune {
+ maxLines := 10
+ maxBytes := util.Min(len(data), 1e4)
+ text := string(data[:maxBytes])
+ text = quoteRegexp.ReplaceAllLiteralString(text, "")
+ lines := strings.SplitN(text, "\n", maxLines+1)
+ lines = lines[:util.Min(maxLines, len(lines))]
+
+ delimiters := []rune{',', ';', '\t', '|', '@'}
+ bestDelim := delimiters[0]
+ bestScore := 0.0
+ for _, delim := range delimiters {
+ score := scoreDelimiter(lines, delim)
+ if score > bestScore {
+ bestScore = score
+ bestDelim = delim
+ }
+ }
+
+ return bestDelim
+}
+
+// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV.
+func scoreDelimiter(lines []string, delim rune) float64 {
+ countTotal := 0
+ countLineMax := 0
+ linesNotEqual := 0
+
+ for _, line := range lines {
+ if len(line) == 0 {
+ continue
+ }
+
+ countLine := strings.Count(line, string(delim))
+ countTotal += countLine
+ if countLine != countLineMax {
+ if countLineMax != 0 {
+ linesNotEqual++
+ }
+ countLineMax = util.Max(countLine, countLineMax)
+ }
+ }
+
+ return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
+}
+
+// FormatError converts csv errors into readable messages.
+func FormatError(err error, locale translation.Locale) (string, error) {
+ var perr *csv.ParseError
+ if errors.As(err, &perr) {
+ if perr.Err == csv.ErrFieldCount {
+ return locale.Tr("repo.error.csv.invalid_field_count", perr.Line), nil
+ }
+ return locale.Tr("repo.error.csv.unexpected", perr.Line, perr.Column), nil
+ }
+
+ return "", err
+}
diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go
new file mode 100644
index 000000000..3a7584e21
--- /dev/null
+++ b/modules/csv/csv_test.go
@@ -0,0 +1,40 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package csv
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCreateReader(t *testing.T) {
+ rd := CreateReader([]byte{}, ',')
+ assert.Equal(t, ',', rd.Comma)
+}
+
+func TestCreateReaderAndGuessDelimiter(t *testing.T) {
+ input := "a;b;c\n1;2;3\n4;5;6"
+
+ rd := CreateReaderAndGuessDelimiter([]byte(input))
+ assert.Equal(t, ';', rd.Comma)
+}
+
+func TestGuessDelimiter(t *testing.T) {
+ var kases = map[string]rune{
+ "a": ',',
+ "1,2": ',',
+ "1;2": ';',
+ "1\t2": '\t',
+ "1|2": '|',
+ "1,2,3;4,5,6;7,8,9\na;b;c": ';',
+ "\"1,2,3,4\";\"a\nb\"\nc;d": ';',
+ "
": ',',
+ }
+
+ for k, v := range kases {
+ assert.EqualValues(t, guessDelimiter([]byte(k)), v)
+ }
+}
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go
index 1e3acc9b4..430e759eb 100644
--- a/modules/markup/csv/csv.go
+++ b/modules/markup/csv/csv.go
@@ -6,24 +6,20 @@ package markup
import (
"bytes"
- "encoding/csv"
"html"
"io"
- "regexp"
- "strings"
+ "strconv"
+ "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup"
- "code.gitea.io/gitea/modules/util"
+ "code.gitea.io/gitea/modules/setting"
)
-var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
-
func init() {
markup.RegisterParser(Parser{})
-
}
-// Parser implements markup.Parser for orgmode
+// Parser implements markup.Parser for csv files
type Parser struct {
}
@@ -38,11 +34,35 @@ func (Parser) Extensions() []string {
}
// Render implements markup.Parser
-func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
- rd := csv.NewReader(bytes.NewReader(rawBytes))
- rd.Comma = p.bestDelimiter(rawBytes)
+func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
var tmpBlock bytes.Buffer
- tmpBlock.WriteString(`
`)
+
+ if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) {
+ tmpBlock.WriteString("")
+ tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
+ tmpBlock.WriteString("
")
+ return tmpBlock.Bytes()
+ }
+
+ rd := csv.CreateReaderAndGuessDelimiter(rawBytes)
+
+ writeField := func(element, class, field string) {
+ tmpBlock.WriteString("<")
+ tmpBlock.WriteString(element)
+ if len(class) > 0 {
+ tmpBlock.WriteString(" class=\"")
+ tmpBlock.WriteString(class)
+ tmpBlock.WriteString("\"")
+ }
+ tmpBlock.WriteString(">")
+ tmpBlock.WriteString(html.EscapeString(field))
+ tmpBlock.WriteString("")
+ tmpBlock.WriteString(element)
+ tmpBlock.WriteString(">")
+ }
+
+ tmpBlock.WriteString(``)
+ row := 1
for {
fields, err := rd.Read()
if err == io.EOF {
@@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin
continue
}
tmpBlock.WriteString("")
+ element := "td"
+ if row == 1 {
+ element = "th"
+ }
+ writeField(element, "line-num", strconv.Itoa(row))
for _, field := range fields {
- tmpBlock.WriteString("")
- tmpBlock.WriteString(html.EscapeString(field))
- tmpBlock.WriteString(" | ")
+ writeField(element, "", field)
}
tmpBlock.WriteString("
")
+
+ row++
}
tmpBlock.WriteString("
")
return tmpBlock.Bytes()
}
-
-// bestDelimiter scores the input CSV data against delimiters, and returns the best match.
-// Reads at most 10k bytes & 10 lines.
-func (p Parser) bestDelimiter(data []byte) rune {
- maxLines := 10
- maxBytes := util.Min(len(data), 1e4)
- text := string(data[:maxBytes])
- text = quoteRegexp.ReplaceAllLiteralString(text, "")
- lines := strings.SplitN(text, "\n", maxLines+1)
- lines = lines[:util.Min(maxLines, len(lines))]
-
- delimiters := []rune{',', ';', '\t', '|'}
- bestDelim := delimiters[0]
- bestScore := 0.0
- for _, delim := range delimiters {
- score := p.scoreDelimiter(lines, delim)
- if score > bestScore {
- bestScore = score
- bestDelim = delim
- }
- }
-
- return bestDelim
-}
-
-// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
-func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) {
- countTotal := 0
- countLineMax := 0
- linesNotEqual := 0
-
- for _, line := range lines {
- if len(line) == 0 {
- continue
- }
-
- countLine := strings.Count(line, string(delim))
- countTotal += countLine
- if countLine != countLineMax {
- if countLineMax != 0 {
- linesNotEqual++
- }
- countLineMax = util.Max(countLine, countLineMax)
- }
- }
-
- return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
-}
diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go
index 4d4e0871e..5438ebdf5 100644
--- a/modules/markup/csv/csv_test.go
+++ b/modules/markup/csv/csv_test.go
@@ -13,14 +13,10 @@ import (
func TestRenderCSV(t *testing.T) {
var parser Parser
var kases = map[string]string{
- "a": "",
- "1,2": "",
- "1;2": "",
- "1\t2": "",
- "1|2": "",
- "1,2,3;4,5,6;7,8,9\na;b;c": "",
- "\"1,2,3,4\";\"a\nb\"\nc;d": "",
- "
": "",
+ "a": "",
+ "1,2": "",
+ "1;2\n3;4": "",
+ "
": "",
}
for k, v := range kases {
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 9214a75fb..19feaa3cc 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -69,6 +69,10 @@ func ReplaceSanitizer() {
// Allow icons, emojis, and chroma syntax on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span")
+ // Allow data tables
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
+
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",
diff --git a/modules/setting/setting.go b/modules/setting/setting.go
index 6a9868713..280987ed6 100644
--- a/modules/setting/setting.go
+++ b/modules/setting/setting.go
@@ -213,6 +213,10 @@ var (
Enabled bool `ini:"ENABLE_RENDER"`
} `ini:"ui.svg"`
+ CSV struct {
+ MaxFileSize int64
+ } `ini:"ui.csv"`
+
Admin struct {
UserPagingNum int
RepoPagingNum int
@@ -258,6 +262,11 @@ var (
}{
Enabled: true,
},
+ CSV: struct {
+ MaxFileSize int64
+ }{
+ MaxFileSize: 524288,
+ },
Admin: struct {
UserPagingNum int
RepoPagingNum int
diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini
index 3a8783930..3be209ffe 100644
--- a/options/locale/locale_en-US.ini
+++ b/options/locale/locale_en-US.ini
@@ -1860,6 +1860,7 @@ diff.whitespace_ignore_at_eol = Ignore changes in whitespace at EOL
diff.stats_desc = %d changed files with %d additions and %d deletions
diff.stats_desc_file = %d changes: %d additions and %d deletions
diff.bin = BIN
+diff.bin_not_shown = Binary file not shown.
diff.view_file = View File
diff.file_before = Before
diff.file_after = After
@@ -1960,6 +1961,10 @@ topic.done = Done
topic.count_prompt = You can not select more than 25 topics
topic.format_prompt = Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
+error.csv.too_large = Can't render this file because it is too large.
+error.csv.unexpected = Can't render this file because it contains an unexpected character in line %d and column %d.
+error.csv.invalid_field_count = Can't render this file because it has a wrong number of fields in line %d.
+
[org]
org_name_holder = Organization Name
org_full_name_holder = Organization Full Name
diff --git a/routers/repo/commit.go b/routers/repo/commit.go
index c3ee6b5ac..c06d09261 100644
--- a/routers/repo/commit.go
+++ b/routers/repo/commit.go
@@ -336,9 +336,8 @@ func Diff(ctx *context.Context) {
return
}
}
- setImageCompareContext(ctx, parentCommit, commit)
headTarget := path.Join(userName, repoName)
- setPathsCompareContext(ctx, parentCommit, commit, headTarget)
+ setCompareContext(ctx, parentCommit, commit, headTarget)
ctx.Data["Title"] = commit.Summary() + " ยท " + base.ShortSha(commitID)
ctx.Data["Commit"] = commit
verification := models.ParseCommitWithSignature(commit)
diff --git a/routers/repo/compare.go b/routers/repo/compare.go
index 38c3005cf..0b7bdf764 100644
--- a/routers/repo/compare.go
+++ b/routers/repo/compare.go
@@ -6,14 +6,20 @@ package repo
import (
"bufio"
+ "encoding/csv"
+ "errors"
"fmt"
"html"
+ "io/ioutil"
"path"
+ "path/filepath"
"strings"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
+ csv_module "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
@@ -26,6 +32,16 @@ const (
tplBlobExcerpt base.TplName = "repo/diff/blob_excerpt"
)
+// setCompareContext sets context data.
+func setCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) {
+ ctx.Data["BaseCommit"] = base
+ ctx.Data["HeadCommit"] = head
+
+ setPathsCompareContext(ctx, base, head, headTarget)
+ setImageCompareContext(ctx, base, head)
+ setCsvCompareContext(ctx)
+}
+
// setPathsCompareContext sets context data for source and raw paths
func setPathsCompareContext(ctx *context.Context, base *git.Commit, head *git.Commit, headTarget string) {
sourcePath := setting.AppSubURL + "/%s/src/commit/%s"
@@ -65,6 +81,73 @@ func setImageCompareContext(ctx *context.Context, base *git.Commit, head *git.Co
}
}
+// setCsvCompareContext sets context data that is required by the CSV compare template
+func setCsvCompareContext(ctx *context.Context) {
+ ctx.Data["IsCsvFile"] = func(diffFile *gitdiff.DiffFile) bool {
+ extension := strings.ToLower(filepath.Ext(diffFile.Name))
+ return extension == ".csv" || extension == ".tsv"
+ }
+
+ type CsvDiffResult struct {
+ Sections []*gitdiff.TableDiffSection
+ Error string
+ }
+
+ ctx.Data["CreateCsvDiff"] = func(diffFile *gitdiff.DiffFile, baseCommit *git.Commit, headCommit *git.Commit) CsvDiffResult {
+ if diffFile == nil || baseCommit == nil || headCommit == nil {
+ return CsvDiffResult{nil, ""}
+ }
+
+ errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))
+
+ csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) {
+ blob, err := c.GetBlobByPath(diffFile.Name)
+ if err != nil {
+ return nil, err
+ }
+
+ if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() {
+ return nil, errTooLarge
+ }
+
+ reader, err := blob.DataAsync()
+ if err != nil {
+ return nil, err
+ }
+ defer reader.Close()
+
+ b, err := ioutil.ReadAll(reader)
+ if err != nil {
+ return nil, err
+ }
+
+ b = charset.ToUTF8WithFallback(b)
+
+ return csv_module.CreateReaderAndGuessDelimiter(b), nil
+ }
+
+ baseReader, err := csvReaderFromCommit(baseCommit)
+ if err == errTooLarge {
+ return CsvDiffResult{nil, err.Error()}
+ }
+ headReader, err := csvReaderFromCommit(headCommit)
+ if err == errTooLarge {
+ return CsvDiffResult{nil, err.Error()}
+ }
+
+ sections, err := gitdiff.CreateCsvDiff(diffFile, baseReader, headReader)
+ if err != nil {
+ errMessage, err := csv_module.FormatError(err, ctx.Locale)
+ if err != nil {
+ log.Error("RenderCsvDiff failed: %v", err)
+ return CsvDiffResult{nil, ""}
+ }
+ return CsvDiffResult{nil, errMessage}
+ }
+ return CsvDiffResult{sections, ""}
+ }
+}
+
// ParseCompareInfo parse compare info between two commit for preparing comparing references
func ParseCompareInfo(ctx *context.Context) (*models.User, *models.Repository, *git.Repository, *git.CompareInfo, string, string) {
baseRepo := ctx.Repo.Repository
@@ -490,9 +573,8 @@ func PrepareCompareDiff(
ctx.Data["Username"] = headUser.Name
ctx.Data["Reponame"] = headRepo.Name
- setImageCompareContext(ctx, baseCommit, headCommit)
headTarget := path.Join(headUser.Name, repo.Name)
- setPathsCompareContext(ctx, baseCommit, headCommit, headTarget)
+ setCompareContext(ctx, baseCommit, headCommit, headTarget)
return false
}
diff --git a/routers/repo/pull.go b/routers/repo/pull.go
index 2ed47605f..cc6841da4 100644
--- a/routers/repo/pull.go
+++ b/routers/repo/pull.go
@@ -591,7 +591,6 @@ func ViewPullFiles(ctx *context.Context) {
gitRepo *git.Repository
)
- var headTarget string
var prInfo *git.CompareInfo
if pull.HasMerged {
prInfo = PrepareMergedViewPullInfo(ctx, issue)
@@ -618,7 +617,6 @@ func ViewPullFiles(ctx *context.Context) {
startCommitID = prInfo.MergeBase
endCommitID = headCommitID
- headTarget = path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name)
ctx.Data["Username"] = ctx.Repo.Owner.Name
ctx.Data["Reponame"] = ctx.Repo.Repository.Name
ctx.Data["AfterCommitID"] = endCommitID
@@ -672,8 +670,8 @@ func ViewPullFiles(ctx *context.Context) {
}
}
- setImageCompareContext(ctx, baseCommit, commit)
- setPathsCompareContext(ctx, baseCommit, commit, headTarget)
+ headTarget := path.Join(ctx.Repo.Owner.Name, ctx.Repo.Repository.Name)
+ setCompareContext(ctx, baseCommit, commit, headTarget)
ctx.Data["RequireHighlightJS"] = true
ctx.Data["RequireSimpleMDE"] = true
diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go
new file mode 100644
index 000000000..f4310d877
--- /dev/null
+++ b/services/gitdiff/csv.go
@@ -0,0 +1,379 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "encoding/csv"
+ "errors"
+ "io"
+
+ "code.gitea.io/gitea/modules/util"
+)
+
+const unmappedColumn = -1
+const maxRowsToInspect int = 10
+const minRatioToMatch float32 = 0.8
+
+// TableDiffCellType represents the type of a TableDiffCell.
+type TableDiffCellType uint8
+
+// TableDiffCellType possible values.
+const (
+ TableDiffCellEqual TableDiffCellType = iota + 1
+ TableDiffCellChanged
+ TableDiffCellAdd
+ TableDiffCellDel
+)
+
+// TableDiffCell represents a cell of a TableDiffRow
+type TableDiffCell struct {
+ LeftCell string
+ RightCell string
+ Type TableDiffCellType
+}
+
+// TableDiffRow represents a row of a TableDiffSection.
+type TableDiffRow struct {
+ RowIdx int
+ Cells []*TableDiffCell
+}
+
+// TableDiffSection represents a section of a DiffFile.
+type TableDiffSection struct {
+ Rows []*TableDiffRow
+}
+
+// csvReader wraps a csv.Reader which buffers the first rows.
+type csvReader struct {
+ reader *csv.Reader
+ buffer [][]string
+ line int
+ eof bool
+}
+
+// createCsvReader creates a csvReader and fills the buffer
+func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
+ csv := &csvReader{reader: reader}
+ csv.buffer = make([][]string, bufferRowCount)
+ for i := 0; i < bufferRowCount && !csv.eof; i++ {
+ row, err := csv.readNextRow()
+ if err != nil {
+ return nil, err
+ }
+ csv.buffer[i] = row
+ }
+ csv.line = bufferRowCount
+ return csv, nil
+}
+
+// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
+func (csv *csvReader) GetRow(row int) ([]string, error) {
+ if row < len(csv.buffer) {
+ return csv.buffer[row], nil
+ }
+ if csv.eof {
+ return nil, nil
+ }
+ for {
+ fields, err := csv.readNextRow()
+ if err != nil {
+ return nil, err
+ }
+ if csv.eof {
+ return nil, nil
+ }
+ csv.line++
+ if csv.line-1 == row {
+ return fields, nil
+ }
+ }
+}
+
+func (csv *csvReader) readNextRow() ([]string, error) {
+ if csv.eof {
+ return nil, nil
+ }
+ row, err := csv.reader.Read()
+ if err != nil {
+ if err != io.EOF {
+ return nil, err
+ }
+ csv.eof = true
+ }
+ return row, nil
+}
+
+// CreateCsvDiff creates a tabular diff based on two CSV readers.
+func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
+ if baseReader != nil && headReader != nil {
+ return createCsvDiff(diffFile, baseReader, headReader)
+ }
+
+ if baseReader != nil {
+ return createCsvDiffSingle(baseReader, TableDiffCellDel)
+ }
+ return createCsvDiffSingle(headReader, TableDiffCellAdd)
+}
+
+// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
+func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
+ var rows []*TableDiffRow
+ i := 1
+ for {
+ row, err := reader.Read()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return nil, err
+ }
+ cells := make([]*TableDiffCell, len(row))
+ for j := 0; j < len(row); j++ {
+ cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
+ }
+ rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
+ i++
+ }
+
+ return []*TableDiffSection{{Rows: rows}}, nil
+}
+
+func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
+ a, err := createCsvReader(baseReader, maxRowsToInspect)
+ if err != nil {
+ return nil, err
+ }
+
+ b, err := createCsvReader(headReader, maxRowsToInspect)
+ if err != nil {
+ return nil, err
+ }
+
+ a2b, b2a := getColumnMapping(a, b)
+
+ columns := len(a2b) + countUnmappedColumns(b2a)
+ if len(a2b) < len(b2a) {
+ columns = len(b2a) + countUnmappedColumns(a2b)
+ }
+
+ createDiffRow := func(aline int, bline int) (*TableDiffRow, error) {
+ cells := make([]*TableDiffCell, columns)
+
+ if aline == 0 || bline == 0 {
+ var (
+ row []string
+ celltype TableDiffCellType
+ err error
+ )
+ if bline == 0 {
+ row, err = a.GetRow(aline - 1)
+ celltype = TableDiffCellDel
+ } else {
+ row, err = b.GetRow(bline - 1)
+ celltype = TableDiffCellAdd
+ }
+ if err != nil {
+ return nil, err
+ }
+ if row == nil {
+ return nil, nil
+ }
+ for i := 0; i < len(row); i++ {
+ cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype}
+ }
+ return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
+ }
+
+ arow, err := a.GetRow(aline - 1)
+ if err != nil {
+ return nil, err
+ }
+ brow, err := b.GetRow(bline - 1)
+ if err != nil {
+ return nil, err
+ }
+ if len(arow) == 0 && len(brow) == 0 {
+ return nil, nil
+ }
+
+ for i := 0; i < len(a2b); i++ {
+ acell, _ := getCell(arow, i)
+ if a2b[i] == unmappedColumn {
+ cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel}
+ } else {
+ bcell, _ := getCell(brow, a2b[i])
+
+ celltype := TableDiffCellChanged
+ if acell == bcell {
+ celltype = TableDiffCellEqual
+ }
+
+ cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype}
+ }
+ }
+ for i := 0; i < len(b2a); i++ {
+ if b2a[i] == unmappedColumn {
+ bcell, _ := getCell(brow, i)
+ cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd}
+ }
+ }
+
+ return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
+ }
+
+ var sections []*TableDiffSection
+
+ for i, section := range diffFile.Sections {
+ var rows []*TableDiffRow
+ lines := tryMergeLines(section.Lines)
+ for j, line := range lines {
+ if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
+ diffRow, err := createDiffRow(1, 1)
+ if err != nil {
+ return nil, err
+ }
+ if diffRow != nil {
+ rows = append(rows, diffRow)
+ }
+ }
+ diffRow, err := createDiffRow(line[0], line[1])
+ if err != nil {
+ return nil, err
+ }
+ if diffRow != nil {
+ rows = append(rows, diffRow)
+ }
+ }
+
+ if len(rows) > 0 {
+ sections = append(sections, &TableDiffSection{Rows: rows})
+ }
+ }
+
+ return sections, nil
+}
+
+// getColumnMapping creates a mapping of columns between a and b
+func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) {
+ arow, _ := a.GetRow(0)
+ brow, _ := b.GetRow(0)
+
+ a2b := []int{}
+ b2a := []int{}
+
+ if arow != nil {
+ a2b = make([]int, len(arow))
+ }
+ if brow != nil {
+ b2a = make([]int, len(brow))
+ }
+
+ for i := 0; i < len(b2a); i++ {
+ b2a[i] = unmappedColumn
+ }
+
+ bcol := 0
+ for i := 0; i < len(a2b); i++ {
+ a2b[i] = unmappedColumn
+
+ acell, ea := getCell(arow, i)
+ if ea == nil {
+ for j := bcol; j < len(b2a); j++ {
+ bcell, eb := getCell(brow, j)
+ if eb == nil && acell == bcell {
+ a2b[i] = j
+ b2a[j] = i
+ bcol = j + 1
+ break
+ }
+ }
+ }
+ }
+
+ tryMapColumnsByContent(a, a2b, b, b2a)
+ tryMapColumnsByContent(b, b2a, a, a2b)
+
+ return a2b, b2a
+}
+
+// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
+func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) {
+ start := 0
+ for i := 0; i < len(a2b); i++ {
+ if a2b[i] == unmappedColumn {
+ if b2a[start] == unmappedColumn {
+ rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1))
+ same := 0
+ for j := 1; j <= rows; j++ {
+ acell, ea := getCell(a.buffer[j], i)
+ bcell, eb := getCell(b.buffer[j], start+1)
+ if ea == nil && eb == nil && acell == bcell {
+ same++
+ }
+ }
+ if (float32(same) / float32(rows)) > minRatioToMatch {
+ a2b[i] = start + 1
+ b2a[start+1] = i
+ }
+ }
+ }
+ start = a2b[i]
+ }
+}
+
+// getCell returns the specific cell or nil if not present.
+func getCell(row []string, column int) (string, error) {
+ if column < len(row) {
+ return row[column], nil
+ }
+ return "", errors.New("Undefined column")
+}
+
+// countUnmappedColumns returns the count of unmapped columns.
+func countUnmappedColumns(mapping []int) int {
+ count := 0
+ for i := 0; i < len(mapping); i++ {
+ if mapping[i] == unmappedColumn {
+ count++
+ }
+ }
+ return count
+}
+
+// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
+func tryMergeLines(lines []*DiffLine) [][2]int {
+ ids := make([][2]int, len(lines))
+
+ i := 0
+ for _, line := range lines {
+ if line.Type != DiffLineSection {
+ ids[i][0] = line.LeftIdx
+ ids[i][1] = line.RightIdx
+ i++
+ }
+ }
+
+ ids = ids[:i]
+
+ result := make([][2]int, len(ids))
+
+ j := 0
+ for i = 0; i < len(ids); i++ {
+ if ids[i][0] == 0 {
+ if j > 0 && result[j-1][1] == 0 {
+ temp := j
+ for temp > 0 && result[temp-1][1] == 0 {
+ temp--
+ }
+ result[temp][1] = ids[i][1]
+ continue
+ }
+ }
+ result[j] = ids[i]
+ j++
+ }
+
+ return result[:j]
+}
diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go
new file mode 100644
index 000000000..17edea582
--- /dev/null
+++ b/services/gitdiff/csv_test.go
@@ -0,0 +1,119 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "encoding/csv"
+ "strings"
+ "testing"
+
+ csv_module "code.gitea.io/gitea/modules/csv"
+ "code.gitea.io/gitea/modules/setting"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCSVDiff(t *testing.T) {
+ var cases = []struct {
+ diff string
+ base string
+ head string
+ cells [][2]TableDiffCellType
+ }{
+ // case 0
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -0,0 +1,2 @@
++col1,col2
++a,a`,
+ base: "",
+ head: "col1,col2\na,a",
+ cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}},
+ },
+ // case 1
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +1,3 @@
+ col1,col2
+-a,a
++a,a
++b,b`,
+ base: "col1,col2\na,a",
+ head: "col1,col2\na,a\nb,b",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}},
+ },
+ // case 2
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,3 +1,2 @@
+ col1,col2
+-a,a
+ b,b`,
+ base: "col1,col2\na,a\nb,b",
+ head: "col1,col2\nb,b",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}},
+ },
+ // case 3
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +1,2 @@
+ col1,col2
+-b,b
++b,c`,
+ base: "col1,col2\nb,b",
+ head: "col1,col2\nb,c",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}},
+ },
+ // case 4
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +0,0 @@
+-col1,col2
+-b,c`,
+ base: "col1,col2\nb,c",
+ head: "",
+ cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}},
+ },
+ }
+
+ for n, c := range cases {
+ diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff))
+ if err != nil {
+ t.Errorf("ParsePatch failed: %s", err)
+ }
+
+ var baseReader *csv.Reader
+ if len(c.base) > 0 {
+ baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base))
+ }
+ var headReader *csv.Reader
+ if len(c.head) > 0 {
+ headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head))
+ }
+
+ result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader)
+ assert.NoError(t, err)
+ assert.Equal(t, 1, len(result), "case %d: should be one section", n)
+
+ section := result[0]
+ assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells))
+
+ for i, row := range section.Rows {
+ assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i)
+ for j, cell := range row.Cells {
+ assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j)
+ }
+ }
+ }
+}
diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl
index f93dac9b4..368fc2c44 100644
--- a/templates/repo/diff/box.tmpl
+++ b/templates/repo/diff/box.tmpl
@@ -79,6 +79,8 @@
{{else}}
{{$isImage = (call $.IsImageFileInHead $file.Name)}}
{{end}}
+ {{$isCsv := (call $.IsCsvFile $file)}}
+ {{$showFileViewToggle := or $isImage $isCsv}}
{{svg "octicon-chevron-down" 18}}
@@ -92,6 +94,12 @@
{{if $file.IsRenamed}}{{$file.OldName}} → {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}}