#2282 fast detection of utf-8
This commit is contained in:
parent
240fe07287
commit
44637f03cc
4 changed files with 10 additions and 5 deletions
2
gogs.go
2
gogs.go
|
@ -17,7 +17,7 @@ import (
|
||||||
"github.com/gogits/gogs/modules/setting"
|
"github.com/gogits/gogs/modules/setting"
|
||||||
)
|
)
|
||||||
|
|
||||||
const APP_VER = "0.8.13.1225"
|
const APP_VER = "0.8.13.1227"
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
runtime.GOMAXPROCS(runtime.NumCPU())
|
runtime.GOMAXPROCS(runtime.NumCPU())
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/Unknwon/com"
|
"github.com/Unknwon/com"
|
||||||
"github.com/Unknwon/i18n"
|
"github.com/Unknwon/i18n"
|
||||||
|
@ -53,6 +54,11 @@ func ShortSha(sha1 string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func DetectEncoding(content []byte) string {
|
func DetectEncoding(content []byte) string {
|
||||||
|
if utf8.Valid(content[:1024]) {
|
||||||
|
log.Debug("Detected encoding: utf-8 (fast)")
|
||||||
|
return "utf-8"
|
||||||
|
}
|
||||||
|
|
||||||
_, name, certain := charset.DetermineEncoding(content, "")
|
_, name, certain := charset.DetermineEncoding(content, "")
|
||||||
if name != "utf-8" && len(setting.Repository.AnsiCharset) > 0 {
|
if name != "utf-8" && len(setting.Repository.AnsiCharset) > 0 {
|
||||||
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
|
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
|
||||||
|
|
|
@ -12,7 +12,6 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"golang.org/x/net/html/charset"
|
"golang.org/x/net/html/charset"
|
||||||
"golang.org/x/text/transform"
|
"golang.org/x/text/transform"
|
||||||
|
@ -131,11 +130,11 @@ func Sha1(str string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func ToUtf8WithErr(content []byte) (error, string) {
|
func ToUtf8WithErr(content []byte) (error, string) {
|
||||||
if utf8.Valid(content[:1024]) {
|
charsetLabel := base.DetectEncoding(content)
|
||||||
|
if charsetLabel == "utf-8" {
|
||||||
return nil, string(content)
|
return nil, string(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
charsetLabel := base.DetectEncoding(content)
|
|
||||||
encoding, _ := charset.Lookup(charsetLabel)
|
encoding, _ := charset.Lookup(charsetLabel)
|
||||||
if encoding == nil {
|
if encoding == nil {
|
||||||
return fmt.Errorf("Unknown encoding: %s", charsetLabel), string(content)
|
return fmt.Errorf("Unknown encoding: %s", charsetLabel), string(content)
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
0.8.13.1225
|
0.8.13.1227
|
Reference in a new issue