From df9612bb535c0f2b4540641ce2fba2c3d65de284 Mon Sep 17 00:00:00 2001 From: qwerty287 <80460567+qwerty287@users.noreply.github.com> Date: Sat, 4 Jun 2022 15:17:53 +0200 Subject: [PATCH] Add API to serve blob or LFS file content (#19689) * Add LFS API * Update routers/api/v1/repo/file.go Co-authored-by: Gusted * Apply suggestions * Apply suggestions * Update routers/api/v1/repo/file.go Co-authored-by: Gusted * Report errors * ADd test * Use own repo for test * Use different repo name * Improve handling * Slight restructures 1. Avoid reading the blob data multiple times 2. Ensure that caching is only checked when about to serve the blob/lfs 3. Avoid nesting by returning early 4. Make log message a bit more clear 5. Ensure that the dataRc is closed by defer when passed to ServeData Signed-off-by: Andrew Thornton Co-authored-by: Gusted Co-authored-by: Andrew Thornton Co-authored-by: Lunny Xiao --- integrations/api_repo_file_get_test.go | 56 ++++++++++ routers/api/v1/api.go | 1 + routers/api/v1/repo/file.go | 142 +++++++++++++++++++++++++ templates/swagger/v1_json.tmpl | 46 ++++++++ 4 files changed, 245 insertions(+) create mode 100644 integrations/api_repo_file_get_test.go diff --git a/integrations/api_repo_file_get_test.go b/integrations/api_repo_file_get_test.go new file mode 100644 index 000000000..8d1c4c4bc --- /dev/null +++ b/integrations/api_repo_file_get_test.go @@ -0,0 +1,56 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package integrations + +import ( + "net/http" + "net/url" + "os" + "testing" + + api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" + + "github.com/stretchr/testify/assert" +) + +func TestAPIGetRawFileOrLFS(t *testing.T) { + defer prepareTestEnv(t)() + + // Test with raw file + req := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/README.md") + resp := MakeRequest(t, req, http.StatusOK) + assert.Equal(t, "# repo1\n\nDescription for repo1", resp.Body.String()) + + // Test with LFS + onGiteaRun(t, func(t *testing.T, u *url.URL) { + httpContext := NewAPITestContext(t, "user2", "repo-lfs-test") + doAPICreateRepository(httpContext, false, func(t *testing.T, repository api.Repository) { + u.Path = httpContext.GitPath() + dstPath, err := os.MkdirTemp("", httpContext.Reponame) + assert.NoError(t, err) + defer util.RemoveAll(dstPath) + + u.Path = httpContext.GitPath() + u.User = url.UserPassword("user2", userPassword) + + t.Run("Clone", doGitClone(dstPath, u)) + + dstPath2, err := os.MkdirTemp("", httpContext.Reponame) + assert.NoError(t, err) + defer util.RemoveAll(dstPath2) + + t.Run("Partial Clone", doPartialGitClone(dstPath2, u)) + + lfs, _ := lfsCommitAndPushTest(t, dstPath) + + reqLFS := NewRequest(t, "GET", "/api/v1/repos/user2/repo1/media/"+lfs) + respLFS := MakeRequestNilResponseRecorder(t, reqLFS, http.StatusOK) + assert.Equal(t, littleSize, respLFS.Length) + + doAPIDeleteRepository(httpContext) + }) + }) +} diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 62c4a8934..1492ef07a 100644 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -826,6 +826,7 @@ func Routes() *web.Route { Delete(reqAdmin(), repo.DeleteTeam) }, reqToken()) m.Get("/raw/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFile) + m.Get("/media/*", context.ReferencesGitRepo(), context.RepoRefForAPI, reqRepoReader(unit.TypeCode), repo.GetRawFileOrLFS) m.Get("/archive/*", reqRepoReader(unit.TypeCode), repo.GetArchive) m.Combo("/forks").Get(repo.ListForks). Post(reqToken(), reqRepoReader(unit.TypeCode), bind(api.CreateForkOption{}), repo.CreateFork) diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index 1fdf70c13..ab337e66e 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -6,8 +6,10 @@ package repo import ( + "bytes" "encoding/base64" "fmt" + "io" "net/http" "path" "time" @@ -18,7 +20,11 @@ import ( "code.gitea.io/gitea/modules/cache" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/httpcache" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/web" "code.gitea.io/gitea/routers/common" @@ -75,6 +81,142 @@ func GetRawFile(ctx *context.APIContext) { } } +// GetRawFileOrLFS get a file by repo's path, redirecting to LFS if necessary. +func GetRawFileOrLFS(ctx *context.APIContext) { + // swagger:operation GET /repos/{owner}/{repo}/media/{filepath} repository repoGetRawFileOrLFS + // --- + // summary: Get a file or it's LFS object from a repository + // parameters: + // - name: owner + // in: path + // description: owner of the repo + // type: string + // required: true + // - name: repo + // in: path + // description: name of the repo + // type: string + // required: true + // - name: filepath + // in: path + // description: filepath of the file to get + // type: string + // required: true + // - name: ref + // in: query + // description: "The name of the commit/branch/tag. Default the repository’s default branch (usually master)" + // type: string + // required: false + // responses: + // 200: + // description: Returns raw file content. + // "404": + // "$ref": "#/responses/notFound" + + if ctx.Repo.Repository.IsEmpty { + ctx.NotFound() + return + } + + blob, lastModified := getBlobForEntry(ctx) + if ctx.Written() { + return + } + + // LFS Pointer files are at most 1024 bytes - so any blob greater than 1024 bytes cannot be an LFS file + if blob.Size() > 1024 { + // First handle caching for the blob + if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) { + return + } + + // OK not cached - serve! + if err := common.ServeBlob(ctx.Context, blob, lastModified); err != nil { + ctx.ServerError("ServeBlob", err) + } + return + } + + // OK, now the blob is known to have at most 1024 bytes we can simply read this in in one go (This saves reading it twice) + dataRc, err := blob.DataAsync() + if err != nil { + ctx.ServerError("DataAsync", err) + return + } + + buf, err := io.ReadAll(dataRc) + if err != nil { + _ = dataRc.Close() + ctx.ServerError("DataAsync", err) + return + } + + if err := dataRc.Close(); err != nil { + log.Error("Error whilst closing blob %s reader in %-v. Error: %v", blob.ID, ctx.Context.Repo.Repository, err) + } + + // Check if the blob represents a pointer + pointer, _ := lfs.ReadPointer(bytes.NewReader(buf)) + + // if its not a pointer just serve the data directly + if !pointer.IsValid() { + // First handle caching for the blob + if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) { + return + } + + // OK not cached - serve! + if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil { + ctx.ServerError("ServeBlob", err) + } + return + } + + // Now check if there is a meta object for this pointer + meta, err := models.GetLFSMetaObjectByOid(ctx.Repo.Repository.ID, pointer.Oid) + + // If there isn't one just serve the data directly + if err == models.ErrLFSObjectNotExist { + // Handle caching for the blob SHA (not the LFS object OID) + if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) { + return + } + + if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil { + ctx.ServerError("ServeBlob", err) + } + return + } else if err != nil { + ctx.ServerError("GetLFSMetaObjectByOid", err) + return + } + + // Handle caching for the LFS object OID + if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) { + return + } + + if setting.LFS.ServeDirect { + // If we have a signed url (S3, object storage), redirect to this directly. + u, err := storage.LFS.URL(pointer.RelativePath(), blob.Name()) + if u != nil && err == nil { + ctx.Redirect(u.String()) + return + } + } + + lfsDataRc, err := lfs.ReadMetaObject(meta.Pointer) + if err != nil { + ctx.ServerError("ReadMetaObject", err) + return + } + defer lfsDataRc.Close() + + if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil { + ctx.ServerError("ServeData", err) + } +} + func getBlobForEntry(ctx *context.APIContext) (blob *git.Blob, lastModified time.Time) { entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath) if err != nil { diff --git a/templates/swagger/v1_json.tmpl b/templates/swagger/v1_json.tmpl index d63cde60e..c23bcb2e9 100644 --- a/templates/swagger/v1_json.tmpl +++ b/templates/swagger/v1_json.tmpl @@ -7150,6 +7150,52 @@ } } }, + "/repos/{owner}/{repo}/media/{filepath}": { + "get": { + "tags": [ + "repository" + ], + "summary": "Get a file or it's LFS object from a repository", + "operationId": "repoGetRawFileOrLFS", + "parameters": [ + { + "type": "string", + "description": "owner of the repo", + "name": "owner", + "in": "path", + "required": true + }, + { + "type": "string", + "description": "name of the repo", + "name": "repo", + "in": "path", + "required": true + }, + { + "type": "string", + "description": "filepath of the file to get", + "name": "filepath", + "in": "path", + "required": true + }, + { + "type": "string", + "description": "The name of the commit/branch/tag. Default the repository’s default branch (usually master)", + "name": "ref", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Returns raw file content." + }, + "404": { + "$ref": "#/responses/notFound" + } + } + } + }, "/repos/{owner}/{repo}/milestones": { "get": { "produces": [