Detect encoding changes while parsing diff (#16330)
* Detect encoding changes while parsing diff
This commit is contained in:
parent
2614309a58
commit
4ce32c9e93
1 changed files with 31 additions and 19 deletions
|
@ -32,6 +32,7 @@ import (
|
||||||
|
|
||||||
"github.com/sergi/go-diff/diffmatchpatch"
|
"github.com/sergi/go-diff/diffmatchpatch"
|
||||||
stdcharset "golang.org/x/net/html/charset"
|
stdcharset "golang.org/x/net/html/charset"
|
||||||
|
"golang.org/x/text/encoding"
|
||||||
"golang.org/x/text/transform"
|
"golang.org/x/text/transform"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -883,40 +884,51 @@ parsingLoop:
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: There are numerous issues with this:
|
// TODO: There are numerous issues with this:
|
||||||
// - we might want to consider detecting encoding while parsing but...
|
// - we might want to consider detecting encoding while parsing but...
|
||||||
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
|
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
|
||||||
// - and this doesn't really account for changes in encoding
|
var diffLineTypeBuffers = make(map[DiffLineType]*bytes.Buffer, 3)
|
||||||
var buf bytes.Buffer
|
var diffLineTypeDecoders = make(map[DiffLineType]*encoding.Decoder, 3)
|
||||||
|
diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
|
||||||
|
diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
|
||||||
|
diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
|
||||||
for _, f := range diff.Files {
|
for _, f := range diff.Files {
|
||||||
buf.Reset()
|
for _, buffer := range diffLineTypeBuffers {
|
||||||
|
buffer.Reset()
|
||||||
|
}
|
||||||
for _, sec := range f.Sections {
|
for _, sec := range f.Sections {
|
||||||
for _, l := range sec.Lines {
|
for _, l := range sec.Lines {
|
||||||
if l.Type == DiffLineSection {
|
if l.Type == DiffLineSection {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
buf.WriteString(l.Content[1:])
|
diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
|
||||||
buf.WriteString("\n")
|
diffLineTypeBuffers[l.Type].WriteString("\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
charsetLabel, err := charset.DetectEncoding(buf.Bytes())
|
for lineType, buffer := range diffLineTypeBuffers {
|
||||||
|
diffLineTypeDecoders[lineType] = nil
|
||||||
|
if buffer.Len() == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
|
||||||
if charsetLabel != "UTF-8" && err == nil {
|
if charsetLabel != "UTF-8" && err == nil {
|
||||||
encoding, _ := stdcharset.Lookup(charsetLabel)
|
encoding, _ := stdcharset.Lookup(charsetLabel)
|
||||||
if encoding != nil {
|
if encoding != nil {
|
||||||
d := encoding.NewDecoder()
|
diffLineTypeDecoders[lineType] = encoding.NewDecoder()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
for _, sec := range f.Sections {
|
for _, sec := range f.Sections {
|
||||||
for _, l := range sec.Lines {
|
for _, l := range sec.Lines {
|
||||||
if l.Type == DiffLineSection {
|
decoder := diffLineTypeDecoders[l.Type]
|
||||||
continue
|
if decoder != nil {
|
||||||
}
|
if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
|
||||||
if c, _, err := transform.String(d, l.Content[1:]); err == nil {
|
|
||||||
l.Content = l.Content[0:1] + c
|
l.Content = l.Content[0:1] + c
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
diff.NumFiles = len(diff.Files)
|
diff.NumFiles = len(diff.Files)
|
||||||
return diff, nil
|
return diff, nil
|
||||||
|
|
Loading…
Reference in a new issue