Pooled and buffered gzip implementation (#5722)

* Pooled and buffered gzip implementation

* Add test for gzip

* Add integration test

* Ensure lfs check within transaction

The previous code made it possible for a race condition to occur whereby a LFSMetaObject could be checked into the database twice. We should check if the LFSMetaObject is within the database and insert it if not in one transaction.

* Try to avoid primary key problem in postgres

The integration tests are being affected by
https://github.com/go-testfixtures/testfixtures/issues/39 if we set the
primary key high enough, keep a count of this and remove at the end of
each test we shouldn't be affected by this.
This commit is contained in:
zeripath 2019-01-23 08:56:51 +00:00 committed by Lauris BH
parent 075649572d
commit 7d434376f1
6 changed files with 598 additions and 10 deletions

View file

@ -0,0 +1,129 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package integrations
import (
"archive/zip"
"bytes"
"crypto/sha256"
"encoding/hex"
"io"
"io/ioutil"
"net/http"
"testing"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/gzip"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
gzipp "github.com/klauspost/compress/gzip"
)
func GenerateLFSOid(content io.Reader) (string, error) {
h := sha256.New()
if _, err := io.Copy(h, content); err != nil {
return "", err
}
sum := h.Sum(nil)
return hex.EncodeToString(sum), nil
}
var lfsID = int64(20000)
func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string {
oid, err := GenerateLFSOid(bytes.NewReader(*content))
assert.NoError(t, err)
var lfsMetaObject *models.LFSMetaObject
if setting.UsePostgreSQL {
lfsMetaObject = &models.LFSMetaObject{ID: lfsID, Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID}
} else {
lfsMetaObject = &models.LFSMetaObject{Oid: oid, Size: int64(len(*content)), RepositoryID: repositoryID}
}
lfsID = lfsID + 1
lfsMetaObject, err = models.NewLFSMetaObject(lfsMetaObject)
assert.NoError(t, err)
contentStore := &lfs.ContentStore{BasePath: setting.LFS.ContentPath}
if !contentStore.Exists(lfsMetaObject) {
err := contentStore.Put(lfsMetaObject, bytes.NewReader(*content))
assert.NoError(t, err)
}
return oid
}
func doLfs(t *testing.T, content *[]byte, expectGzip bool) {
prepareTestEnv(t)
repo, err := models.GetRepositoryByOwnerAndName("user2", "repo1")
assert.NoError(t, err)
oid := storeObjectInRepo(t, repo.ID, content)
defer repo.RemoveLFSMetaObjectByOid(oid)
session := loginUser(t, "user2")
// Request OID
req := NewRequest(t, "GET", "/user2/repo1.git/info/lfs/objects/"+oid+"/test")
req.Header.Set("Accept-Encoding", "gzip")
resp := session.MakeRequest(t, req, http.StatusOK)
contentEncoding := resp.Header().Get("Content-Encoding")
if !expectGzip || !setting.EnableGzip {
assert.NotContains(t, contentEncoding, "gzip")
result := resp.Body.Bytes()
assert.Equal(t, *content, result)
} else {
assert.Contains(t, contentEncoding, "gzip")
gzippReader, err := gzipp.NewReader(resp.Body)
assert.NoError(t, err)
result, err := ioutil.ReadAll(gzippReader)
assert.NoError(t, err)
assert.Equal(t, *content, result)
}
}
func TestGetLFSSmall(t *testing.T) {
content := []byte("A very small file\n")
doLfs(t, &content, false)
}
func TestGetLFSLarge(t *testing.T) {
content := make([]byte, gzip.MinSize*10)
for i := range content {
content[i] = byte(i % 256)
}
doLfs(t, &content, true)
}
func TestGetLFSGzip(t *testing.T) {
b := make([]byte, gzip.MinSize*10)
for i := range b {
b[i] = byte(i % 256)
}
outputBuffer := bytes.NewBuffer([]byte{})
gzippWriter := gzipp.NewWriter(outputBuffer)
gzippWriter.Write(b)
gzippWriter.Close()
content := outputBuffer.Bytes()
doLfs(t, &content, false)
}
func TestGetLFSZip(t *testing.T) {
b := make([]byte, gzip.MinSize*10)
for i := range b {
b[i] = byte(i % 256)
}
outputBuffer := bytes.NewBuffer([]byte{})
zipWriter := zip.NewWriter(outputBuffer)
fileWriter, err := zipWriter.Create("default")
assert.NoError(t, err)
fileWriter.Write(b)
zipWriter.Close()
content := outputBuffer.Bytes()
doLfs(t, &content, false)
}

View file

@ -30,6 +30,7 @@ LFS_CONTENT_PATH = data/lfs-sqlite
OFFLINE_MODE = false
LFS_JWT_SECRET = Tv_MjmZuHqpIY6GFl12ebgkRAMt4RlWt0v4EHKSXO0w
APP_DATA_PATH = integrations/gitea-integration-sqlite/data
ENABLE_GZIP = true
[mailer]
ENABLED = false

View file

@ -44,20 +44,20 @@ const (
func NewLFSMetaObject(m *LFSMetaObject) (*LFSMetaObject, error) {
var err error
has, err := x.Get(m)
sess := x.NewSession()
defer sess.Close()
if err = sess.Begin(); err != nil {
return nil, err
}
has, err := sess.Get(m)
if err != nil {
return nil, err
}
if has {
m.Existing = true
return m, nil
}
sess := x.NewSession()
defer sess.Close()
if err = sess.Begin(); err != nil {
return nil, err
return m, sess.Commit()
}
if _, err = sess.Insert(m); err != nil {

327
modules/gzip/gzip.go Normal file
View file

@ -0,0 +1,327 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gzip
import (
"bufio"
"fmt"
"io"
"net"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
"github.com/klauspost/compress/gzip"
"gopkg.in/macaron.v1"
)
const (
acceptEncodingHeader = "Accept-Encoding"
contentEncodingHeader = "Content-Encoding"
contentLengthHeader = "Content-Length"
contentTypeHeader = "Content-Type"
rangeHeader = "Range"
varyHeader = "Vary"
)
const (
// MinSize is the minimum size of content we will compress
MinSize = 1400
)
// noopClosers are io.Writers with a shim to prevent early closure
type noopCloser struct {
io.Writer
}
func (noopCloser) Close() error { return nil }
// WriterPool is a gzip writer pool to reduce workload on creation of
// gzip writers
type WriterPool struct {
pool sync.Pool
compressionLevel int
}
// NewWriterPool creates a new pool
func NewWriterPool(compressionLevel int) *WriterPool {
return &WriterPool{pool: sync.Pool{
// New will return nil, we'll manage the creation of new
// writers in the middleware
New: func() interface{} { return nil },
},
compressionLevel: compressionLevel}
}
// Get a writer from the pool - or create one if not available
func (wp *WriterPool) Get(rw macaron.ResponseWriter) *gzip.Writer {
ret := wp.pool.Get()
if ret == nil {
ret, _ = gzip.NewWriterLevel(rw, wp.compressionLevel)
} else {
ret.(*gzip.Writer).Reset(rw)
}
return ret.(*gzip.Writer)
}
// Put returns a writer to the pool
func (wp *WriterPool) Put(w *gzip.Writer) {
wp.pool.Put(w)
}
var writerPool WriterPool
var regex regexp.Regexp
// Options represents the configuration for the gzip middleware
type Options struct {
CompressionLevel int
}
func validateCompressionLevel(level int) bool {
return level == gzip.DefaultCompression ||
level == gzip.ConstantCompression ||
(level >= gzip.BestSpeed && level <= gzip.BestCompression)
}
func validate(options []Options) Options {
// Default to level 4 compression (Best results seem to be between 4 and 6)
opt := Options{CompressionLevel: 4}
if len(options) > 0 {
opt = options[0]
}
if !validateCompressionLevel(opt.CompressionLevel) {
opt.CompressionLevel = 4
}
return opt
}
// Middleware creates a macaron.Handler to proxy the response
func Middleware(options ...Options) macaron.Handler {
opt := validate(options)
writerPool = *NewWriterPool(opt.CompressionLevel)
regex := regexp.MustCompile(`bytes=(\d+)\-.*`)
return func(ctx *macaron.Context) {
// If the client won't accept gzip or x-gzip don't compress
if !strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "gzip") &&
!strings.Contains(ctx.Req.Header.Get(acceptEncodingHeader), "x-gzip") {
return
}
// If the client is asking for a specific range of bytes - don't compress
if rangeHdr := ctx.Req.Header.Get(rangeHeader); rangeHdr != "" {
match := regex.FindStringSubmatch(rangeHdr)
if match != nil && len(match) > 1 {
return
}
}
// OK we should proxy the response writer
// We are still not necessarily going to compress...
proxyWriter := &ProxyResponseWriter{
ResponseWriter: ctx.Resp,
}
defer proxyWriter.Close()
ctx.Resp = proxyWriter
ctx.MapTo(proxyWriter, (*http.ResponseWriter)(nil))
// Check if render middleware has been registered,
// if yes, we need to modify ResponseWriter for it as well.
if _, ok := ctx.Render.(*macaron.DummyRender); !ok {
ctx.Render.SetResponseWriter(proxyWriter)
}
ctx.Next()
}
}
// ProxyResponseWriter is a wrapped macaron ResponseWriter that may compress its contents
type ProxyResponseWriter struct {
writer io.WriteCloser
macaron.ResponseWriter
stopped bool
code int
buf []byte
}
// Write appends data to the proxied gzip writer.
func (proxy *ProxyResponseWriter) Write(b []byte) (int, error) {
// if writer is initialized, use the writer
if proxy.writer != nil {
return proxy.writer.Write(b)
}
proxy.buf = append(proxy.buf, b...)
var (
contentLength, _ = strconv.Atoi(proxy.Header().Get(contentLengthHeader))
contentType = proxy.Header().Get(contentTypeHeader)
contentEncoding = proxy.Header().Get(contentEncodingHeader)
)
// OK if an encoding hasn't been chosen, and content length > 1400
// and content type isn't a compressed type
if contentEncoding == "" &&
(contentLength == 0 || contentLength >= MinSize) &&
(contentType == "" || !compressedContentType(contentType)) {
// If current buffer is less than the min size and a Content-Length isn't set, then wait
if len(proxy.buf) < MinSize && contentLength == 0 {
return len(b), nil
}
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
if contentLength >= MinSize || len(proxy.buf) >= MinSize {
// if we don't know the content type, infer it
if contentType == "" {
contentType = http.DetectContentType(proxy.buf)
proxy.Header().Set(contentTypeHeader, contentType)
}
// If the Content-Type is not compressed - Compress!
if !compressedContentType(contentType) {
if err := proxy.startGzip(); err != nil {
return 0, err
}
return len(b), nil
}
}
}
// If we got here, we should not GZIP this response.
if err := proxy.startPlain(); err != nil {
return 0, err
}
return len(b), nil
}
func (proxy *ProxyResponseWriter) startGzip() error {
// Set the content-encoding and vary headers.
proxy.Header().Set(contentEncodingHeader, "gzip")
proxy.Header().Set(varyHeader, acceptEncodingHeader)
// if the Content-Length is already set, then calls to Write on gzip
// will fail to set the Content-Length header since its already set
// See: https://github.com/golang/go/issues/14975.
proxy.Header().Del(contentLengthHeader)
// Write the header to gzip response.
if proxy.code != 0 {
proxy.ResponseWriter.WriteHeader(proxy.code)
// Ensure that no other WriteHeader's happen
proxy.code = 0
}
// Initialize and flush the buffer into the gzip response if there are any bytes.
// If there aren't any, we shouldn't initialize it yet because on Close it will
// write the gzip header even if nothing was ever written.
if len(proxy.buf) > 0 {
// Initialize the GZIP response.
proxy.writer = writerPool.Get(proxy.ResponseWriter)
return proxy.writeBuf()
}
return nil
}
func (proxy *ProxyResponseWriter) startPlain() error {
if proxy.code != 0 {
proxy.ResponseWriter.WriteHeader(proxy.code)
proxy.code = 0
}
proxy.stopped = true
proxy.writer = noopCloser{proxy.ResponseWriter}
return proxy.writeBuf()
}
func (proxy *ProxyResponseWriter) writeBuf() error {
if proxy.buf == nil {
return nil
}
n, err := proxy.writer.Write(proxy.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(proxy.buf) {
err = io.ErrShortWrite
}
proxy.buf = nil
return err
}
// WriteHeader will ensure that we have setup the writer before we write the header
func (proxy *ProxyResponseWriter) WriteHeader(code int) {
if proxy.code == 0 {
proxy.code = code
}
}
// Close the writer
func (proxy *ProxyResponseWriter) Close() error {
if proxy.stopped {
return nil
}
if proxy.writer == nil {
err := proxy.startPlain()
if err != nil {
err = fmt.Errorf("GzipMiddleware: write to regular responseWriter at close gets error: %q", err.Error())
}
}
err := proxy.writer.Close()
if poolWriter, ok := proxy.writer.(*gzip.Writer); ok {
writerPool.Put(poolWriter)
}
proxy.writer = nil
proxy.stopped = true
return err
}
// Flush the writer
func (proxy *ProxyResponseWriter) Flush() {
if proxy.writer == nil {
return
}
if gw, ok := proxy.writer.(*gzip.Writer); ok {
gw.Flush()
}
proxy.ResponseWriter.Flush()
}
// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
// Hijacker, its Hijack method is returned. Otherwise an error is returned.
func (proxy *ProxyResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
hijacker, ok := proxy.ResponseWriter.(http.Hijacker)
if !ok {
return nil, nil, fmt.Errorf("the ResponseWriter doesn't support the Hijacker interface")
}
return hijacker.Hijack()
}
// verify Hijacker interface implementation
var _ http.Hijacker = &ProxyResponseWriter{}
func compressedContentType(contentType string) bool {
switch contentType {
case "application/zip":
return true
case "application/x-gzip":
return true
case "application/gzip":
return true
default:
return false
}
}

131
modules/gzip/gzip_test.go Normal file
View file

@ -0,0 +1,131 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gzip
import (
"archive/zip"
"bytes"
"io/ioutil"
"net/http"
"net/http/httptest"
"testing"
gzipp "github.com/klauspost/compress/gzip"
"github.com/stretchr/testify/assert"
macaron "gopkg.in/macaron.v1"
)
func setup(sampleResponse []byte) (*macaron.Macaron, *[]byte) {
m := macaron.New()
m.Use(Middleware())
m.Get("/", func() *[]byte { return &sampleResponse })
return m, &sampleResponse
}
func reqNoAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte) {
// Request without accept gzip: Should not gzip
resp := httptest.NewRecorder()
req, err := http.NewRequest("GET", "/", nil)
assert.NoError(t, err)
m.ServeHTTP(resp, req)
_, ok := resp.HeaderMap[contentEncodingHeader]
assert.False(t, ok)
contentEncoding := resp.Header().Get(contentEncodingHeader)
assert.NotContains(t, contentEncoding, "gzip")
result := resp.Body.Bytes()
assert.Equal(t, *sampleResponse, result)
}
func reqAcceptGzip(t *testing.T, m *macaron.Macaron, sampleResponse *[]byte, expectGzip bool) {
// Request without accept gzip: Should not gzip
resp := httptest.NewRecorder()
req, err := http.NewRequest("GET", "/", nil)
assert.NoError(t, err)
req.Header.Set(acceptEncodingHeader, "gzip")
m.ServeHTTP(resp, req)
_, ok := resp.HeaderMap[contentEncodingHeader]
assert.Equal(t, ok, expectGzip)
contentEncoding := resp.Header().Get(contentEncodingHeader)
if expectGzip {
assert.Contains(t, contentEncoding, "gzip")
gzippReader, err := gzipp.NewReader(resp.Body)
assert.NoError(t, err)
result, err := ioutil.ReadAll(gzippReader)
assert.NoError(t, err)
assert.Equal(t, *sampleResponse, result)
} else {
assert.NotContains(t, contentEncoding, "gzip")
result := resp.Body.Bytes()
assert.Equal(t, *sampleResponse, result)
}
}
func TestMiddlewareSmall(t *testing.T) {
m, sampleResponse := setup([]byte("Small response"))
reqNoAcceptGzip(t, m, sampleResponse)
reqAcceptGzip(t, m, sampleResponse, false)
}
func TestMiddlewareLarge(t *testing.T) {
b := make([]byte, MinSize+1)
for i := range b {
b[i] = byte(i % 256)
}
m, sampleResponse := setup(b)
reqNoAcceptGzip(t, m, sampleResponse)
// This should be gzipped as we accept gzip
reqAcceptGzip(t, m, sampleResponse, true)
}
func TestMiddlewareGzip(t *testing.T) {
b := make([]byte, MinSize*10)
for i := range b {
b[i] = byte(i % 256)
}
outputBuffer := bytes.NewBuffer([]byte{})
gzippWriter := gzipp.NewWriter(outputBuffer)
gzippWriter.Write(b)
gzippWriter.Flush()
gzippWriter.Close()
output := outputBuffer.Bytes()
m, sampleResponse := setup(output)
reqNoAcceptGzip(t, m, sampleResponse)
// This should not be gzipped even though we accept gzip
reqAcceptGzip(t, m, sampleResponse, false)
}
func TestMiddlewareZip(t *testing.T) {
b := make([]byte, MinSize*10)
for i := range b {
b[i] = byte(i % 256)
}
outputBuffer := bytes.NewBuffer([]byte{})
zipWriter := zip.NewWriter(outputBuffer)
fileWriter, err := zipWriter.Create("default")
assert.NoError(t, err)
fileWriter.Write(b)
//fileWriter.Close()
zipWriter.Close()
output := outputBuffer.Bytes()
m, sampleResponse := setup(output)
reqNoAcceptGzip(t, m, sampleResponse)
// This should not be gzipped even though we accept gzip
reqAcceptGzip(t, m, sampleResponse, false)
}

View file

@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/gzip"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/metrics"
@ -36,7 +37,6 @@ import (
"github.com/go-macaron/cache"
"github.com/go-macaron/captcha"
"github.com/go-macaron/csrf"
"github.com/go-macaron/gzip"
"github.com/go-macaron/i18n"
"github.com/go-macaron/session"
"github.com/go-macaron/toolbox"
@ -54,7 +54,7 @@ func NewMacaron() *macaron.Macaron {
}
m.Use(macaron.Recovery())
if setting.EnableGzip {
m.Use(gzip.Gziper())
m.Use(gzip.Middleware())
}
if setting.Protocol == setting.FCGI {
m.SetURLPrefix(setting.AppSubURL)