From 87141b908d4a03ce27af3ce042dc417da925b84f Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sun, 23 Jan 2022 20:19:49 +0800 Subject: [PATCH] Fix mime-type detection for HTTP server (#18370) Bypass the unstable behavior of Golang's mime.TypeByExtension --- modules/public/mime_types.go | 41 +++++++++++++++++++ modules/public/public.go | 11 +++++ .../public/{dynamic.go => serve_dynamic.go} | 0 modules/public/{static.go => serve_static.go} | 29 ++++--------- 4 files changed, 61 insertions(+), 20 deletions(-) create mode 100644 modules/public/mime_types.go rename modules/public/{dynamic.go => serve_dynamic.go} (100%) rename modules/public/{static.go => serve_static.go} (68%) diff --git a/modules/public/mime_types.go b/modules/public/mime_types.go new file mode 100644 index 000000000..f8c92e824 --- /dev/null +++ b/modules/public/mime_types.go @@ -0,0 +1,41 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package public + +import "strings" + +// wellKnownMimeTypesLower comes from Golang's builtin mime package: `builtinTypesLower`, see the comment of detectWellKnownMimeType +var wellKnownMimeTypesLower = map[string]string{ + ".avif": "image/avif", + ".css": "text/css; charset=utf-8", + ".gif": "image/gif", + ".htm": "text/html; charset=utf-8", + ".html": "text/html; charset=utf-8", + ".jpeg": "image/jpeg", + ".jpg": "image/jpeg", + ".js": "text/javascript; charset=utf-8", + ".json": "application/json", + ".mjs": "text/javascript; charset=utf-8", + ".pdf": "application/pdf", + ".png": "image/png", + ".svg": "image/svg+xml", + ".wasm": "application/wasm", + ".webp": "image/webp", + ".xml": "text/xml; charset=utf-8", + + // well, there are some types missing from the builtin list + ".txt": "text/plain; charset=utf-8", +} + +// detectWellKnownMimeType will return the mime-type for a well-known file ext name +// The purpose of this function is to bypass the unstable behavior of Golang's mime.TypeByExtension +// mime.TypeByExtension would use OS's mime-type config to overwrite the well-known types (see its document). +// If the user's OS has incorrect mime-type config, it would make Gitea can not respond a correct Content-Type to browsers. +// For example, if Gitea returns `text/plain` for a `.js` file, the browser couldn't run the JS due to security reasons. +// detectWellKnownMimeType makes the Content-Type for well-known files stable. +func detectWellKnownMimeType(ext string) string { + ext = strings.ToLower(ext) + return wellKnownMimeTypesLower[ext] +} diff --git a/modules/public/public.go b/modules/public/public.go index 91ecf42a3..7804e945e 100644 --- a/modules/public/public.go +++ b/modules/public/public.go @@ -92,6 +92,15 @@ func parseAcceptEncoding(val string) map[string]bool { return types } +// setWellKnownContentType will set the Content-Type if the file is a well-known type. +// See the comments of detectWellKnownMimeType +func setWellKnownContentType(w http.ResponseWriter, file string) { + mimeType := detectWellKnownMimeType(filepath.Ext(file)) + if mimeType != "" { + w.Header().Set("Content-Type", mimeType) + } +} + func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.FileSystem, file string) bool { // use clean to keep the file is a valid path with no . or .. f, err := fs.Open(path.Clean(file)) @@ -122,6 +131,8 @@ func (opts *Options) handle(w http.ResponseWriter, req *http.Request, fs http.Fi return true } + setWellKnownContentType(w, file) + serveContent(w, req, fi, fi.ModTime(), f) return true } diff --git a/modules/public/dynamic.go b/modules/public/serve_dynamic.go similarity index 100% rename from modules/public/dynamic.go rename to modules/public/serve_dynamic.go diff --git a/modules/public/static.go b/modules/public/serve_static.go similarity index 68% rename from modules/public/static.go rename to modules/public/serve_static.go index d373c712e..8e82175e3 100644 --- a/modules/public/static.go +++ b/modules/public/serve_static.go @@ -9,15 +9,12 @@ package public import ( "bytes" - "compress/gzip" "io" - "mime" "net/http" "os" "path/filepath" "time" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/timeutil" ) @@ -66,24 +63,16 @@ func serveContent(w http.ResponseWriter, req *http.Request, fi os.FileInfo, modt encodings := parseAcceptEncoding(req.Header.Get("Accept-Encoding")) if encodings["gzip"] { if cf, ok := fi.(*vfsgen۰CompressedFileInfo); ok { - rd := bytes.NewReader(cf.GzipBytes()) - w.Header().Set("Content-Encoding", "gzip") - ctype := mime.TypeByExtension(filepath.Ext(fi.Name())) - if ctype == "" { - // read a chunk to decide between utf-8 text and binary - var buf [512]byte - grd, _ := gzip.NewReader(rd) - n, _ := io.ReadFull(grd, buf[:]) - ctype = http.DetectContentType(buf[:n]) - _, err := rd.Seek(0, io.SeekStart) // rewind to output whole file - if err != nil { - log.Error("rd.Seek error: %v", err) - http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) - return - } + rdGzip := bytes.NewReader(cf.GzipBytes()) + // all static files are managed by Gitea, so we can make sure every file has the correct ext name + // then we can get the correct Content-Type, we do not need to do http.DetectContentType on the decompressed data + mimeType := detectWellKnownMimeType(filepath.Ext(fi.Name())) + if mimeType == "" { + mimeType = "application/octet-stream" } - w.Header().Set("Content-Type", ctype) - http.ServeContent(w, req, fi.Name(), modtime, rd) + w.Header().Set("Content-Type", mimeType) + w.Header().Set("Content-Encoding", "gzip") + http.ServeContent(w, req, fi.Name(), modtime, rdGzip) return } }