From 583367de9ae9396a5b1565edf3c3690dceb5978a Mon Sep 17 00:00:00 2001 From: Ciprian Dorin Craciun Date: Tue, 20 Nov 2018 14:21:22 +0200 Subject: [PATCH] [archiver/server] Add support for file meta-data independent storage and files listing --- sources/cmd/archiver.go | 257 ++++++++++++++++++++++--------- sources/cmd/server.go | 36 +++-- sources/lib/common/namespaces.go | 5 + 3 files changed, 217 insertions(+), 81 deletions(-) diff --git a/sources/cmd/archiver.go b/sources/cmd/archiver.go index 195f610..bbb3657 100644 --- a/sources/cmd/archiver.go +++ b/sources/cmd/archiver.go @@ -29,12 +29,17 @@ import . "github.com/volution/kawipiko/lib/archiver" type context struct { cdbWriter *cdb.Writer - storedData map[string]bool - storedFiles map[[2]uint64]string + storedFilePaths []string + storedFolderPaths []string + storedDataMeta map[string]bool + storedDataContent map[string]bool + storedDataContentMeta map[string]map[string]string + storedFiles map[[2]uint64][2]string compress string includeIndex bool includeEtag bool - includeMetadata bool + includeFileListing bool + includeFolderListing bool debug bool } @@ -62,49 +67,68 @@ func archiveFile (_context *context, _pathResolved string, _pathInArchive string } } - _fingerprint, _wasStored := _context.storedFiles[_fileId] - var _data []byte + var _wasStored bool + var _fingerprintContent string + var _fingerprintMeta string + if _fingerprints, _wasStored_0 := _context.storedFiles[_fileId]; _wasStored_0 { + _fingerprintContent = _fingerprints[0] + _fingerprintMeta = _fingerprints[1] + _wasStored = true + } + + var _dataContent []byte var _dataMeta map[string]string + var _dataMetaRaw []byte if ! _wasStored { - if _data_0, _error := ioutil.ReadAll (_file); _error == nil { - _data = _data_0 + if _dataContent_0, _error := ioutil.ReadAll (_file); _error == nil { + _dataContent = _dataContent_0 } else { return _error } - if _fingerprint_0, _data_0, _dataMeta_0, _error := prepareData (_context, _pathResolved, _pathInArchive, _name, _data, ""); _error != nil { + if _fingerprintContent_0, _dataContent_0, _dataMeta_0, _error := prepareDataContent (_context, _pathResolved, _pathInArchive, _name, _dataContent, ""); _error != nil { return _error } else { - _fingerprint = _fingerprint_0 - _data = _data_0 + _fingerprintContent = _fingerprintContent_0 + _dataContent = _dataContent_0 _dataMeta = _dataMeta_0 } + if _fingerprintMeta_0, _dataMetaRaw_0, _error := prepareDataMeta (_context, _dataMeta); _error != nil { + return _error + } else { + _fingerprintMeta = _fingerprintMeta_0 + _dataMetaRaw = _dataMetaRaw_0 + } } for _, _suffix := range StripSuffixes { if strings.HasSuffix (_pathInArchive, _suffix) { _pathInArchive := _pathInArchive [: len (_pathInArchive) - len (_suffix)] - if _error := archiveReference (_context, NamespaceFilesContent, _pathInArchive, _fingerprint); _error != nil { + if _error := archiveReference (_context, NamespaceFilesContent, _pathInArchive, _fingerprintContent, _fingerprintMeta); _error != nil { return _error } break } } - if _error := archiveReference (_context, NamespaceFilesContent, _pathInArchive, _fingerprint); _error != nil { + if _error := archiveReference (_context, NamespaceFilesContent, _pathInArchive, _fingerprintContent, _fingerprintMeta); _error != nil { return _error } - - if (_data != nil) && (_dataMeta != nil) { - if _error := archiveData (_context, _fingerprint, _data, _dataMeta); _error != nil { + if _dataMetaRaw != nil { + if _error := archiveDataMeta (_context, _fingerprintMeta, _dataMetaRaw); _error != nil { + return _error + } + } + if _dataContent != nil { + if _error := archiveDataContent (_context, _fingerprintContent, _dataContent); _error != nil { return _error } } if ! _wasStored { - _context.storedFiles[_fileId] = _fingerprint + _context.storedFiles[_fileId] = [2]string { _fingerprintContent, _fingerprintMeta } } return nil @@ -127,7 +151,7 @@ func archiveFolder (_context *context, _pathResolved string, _pathInArchive stri } _entries := make ([]Entry, 0, len (_names)) - if _context.includeMetadata { + if _context.includeFolderListing { for _, _name := range _names { _entry := Entry { Name : _name, @@ -176,7 +200,7 @@ func archiveFolder (_context *context, _pathResolved string, _pathInArchive stri } } - if ! _context.includeMetadata { + if ! _context.includeFolderListing { return nil } @@ -192,7 +216,7 @@ func archiveFolder (_context *context, _pathResolved string, _pathInArchive stri return _error } - if _, _error := archiveReferenceAndData (_context, NamespaceFoldersContent, _pathResolved, _pathInArchive, "", _data, MimeTypeJson); _error != nil { + if _, _, _error := archiveReferenceAndData (_context, NamespaceFoldersContent, _pathResolved, _pathInArchive, "", _data, MimeTypeJson); _error != nil { return _error } @@ -202,67 +226,86 @@ func archiveFolder (_context *context, _pathResolved string, _pathInArchive stri -func archiveReferenceAndData (_context *context, _namespace string, _pathResolved string, _pathInArchive string, _name string, _data []byte, _dataType string) (string, error) { +func archiveReferenceAndData (_context *context, _namespace string, _pathResolved string, _pathInArchive string, _name string, _dataContent []byte, _dataType string) (string, string, error) { - var _fingerprint string + var _fingerprintContent string + var _fingerprintMeta string var _dataMeta map[string]string - if _fingerprint_0, _data_0, _dataMeta_0, _error := prepareData (_context, _pathResolved, _pathInArchive, _name, _data, _dataType); _error != nil { - return "", _error + var _dataMetaRaw []byte + + if _fingerprintContent_0, _dataContent_0, _dataMeta_0, _error := prepareDataContent (_context, _pathResolved, _pathInArchive, _name, _dataContent, _dataType); _error != nil { + return "", "", _error } else { - _fingerprint = _fingerprint_0 - _data = _data_0 + _fingerprintContent = _fingerprintContent_0 + _dataContent = _dataContent_0 _dataMeta = _dataMeta_0 } - - if _error := archiveReference (_context, _namespace, _pathInArchive, _fingerprint); _error != nil { - return "", _error + if _fingerprintMeta_0, _dataMetaRaw_0, _error := prepareDataMeta (_context, _dataMeta); _error != nil { + return "", "", _error + } else { + _fingerprintMeta = _fingerprintMeta_0 + _dataMetaRaw = _dataMetaRaw_0 } - if (_data != nil) && (_dataMeta != nil) { - if _error := archiveData (_context, _fingerprint, _data, _dataMeta); _error != nil { - return "", _error + if _error := archiveReference (_context, _namespace, _pathInArchive, _fingerprintContent, _fingerprintMeta); _error != nil { + return "", "", _error + } + if _dataMetaRaw != nil { + if _error := archiveDataMeta (_context, _fingerprintMeta, _dataMetaRaw); _error != nil { + return "", "", _error + } + } + if _dataContent != nil { + if _error := archiveDataContent (_context, _fingerprintContent, _dataContent); _error != nil { + return "", "", _error } } - return _fingerprint, nil + return _fingerprintContent, _fingerprintMeta, nil } -func archiveData (_context *context, _fingerprint string, _data []byte, _dataMeta map[string]string) (error) { + +func archiveDataContent (_context *context, _fingerprintContent string, _dataContent []byte) (error) { - if _wasStored, _ := _context.storedData[_fingerprint]; _wasStored { - return fmt.Errorf ("[256cde78] data already stored: `%s`!", _fingerprint) - } - - var _dataMetaRaw []byte - if _dataMetaRaw_0, _error := MetadataEncode (_dataMeta); _error == nil { - _dataMetaRaw = _dataMetaRaw_0 - } else { - return _error + if _wasStored, _ := _context.storedDataContent[_fingerprintContent]; _wasStored { + return fmt.Errorf ("[256cde78] data content already stored: `%s`!", _fingerprintContent) } { - _key := fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _fingerprint) + _key := fmt.Sprintf ("%s:%s", NamespaceDataContent, _fingerprintContent) if _context.debug { - log.Printf ("[ ] blob-meta ++ `%s`\n", _key) + log.Printf ("[ ] data-content ++ `%s`\n", _key) } - if _error := _context.cdbWriter.Put ([]byte (_key), _dataMetaRaw); _error != nil { + if _error := _context.cdbWriter.Put ([]byte (_key), _dataContent); _error != nil { return _error } } + _context.storedDataContent[_fingerprintContent] = true + + return nil +} + + +func archiveDataMeta (_context *context, _fingerprintMeta string, _dataMeta []byte) (error) { + + if _wasStored, _ := _context.storedDataMeta[_fingerprintMeta]; _wasStored { + return fmt.Errorf ("[2918c4e2] data meta already stored: `%s`!", _fingerprintMeta) + } + { - _key := fmt.Sprintf ("%s:%s", NamespaceDataContent, _fingerprint) + _key := fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _fingerprintMeta) if _context.debug { - log.Printf ("[ ] blob-data ++ `%s`\n", _key) + log.Printf ("[ ] data-meta ++ `%s`\n", _key) } - if _error := _context.cdbWriter.Put ([]byte (_key), _data); _error != nil { + if _error := _context.cdbWriter.Put ([]byte (_key), _dataMeta); _error != nil { return _error } } - _context.storedData[_fingerprint] = true + _context.storedDataMeta[_fingerprintMeta] = true return nil } @@ -270,13 +313,24 @@ func archiveData (_context *context, _fingerprint string, _data []byte, _dataMet -func archiveReference (_context *context, _namespace string, _pathInArchive string, _fingerprint string) (error) { +func archiveReference (_context *context, _namespace string, _pathInArchive string, _fingerprintContent string, _fingerprintMeta string) (error) { + + switch _namespace { + case NamespaceFilesContent : + _context.storedFilePaths = append (_context.storedFilePaths, _pathInArchive) + case NamespaceFoldersContent : + _context.storedFolderPaths = append (_context.storedFolderPaths, _pathInArchive) + default : + return fmt.Errorf ("[051a102a]") + } _key := fmt.Sprintf ("%s:%s", _namespace, _pathInArchive) if _context.debug { - log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s`\n", _namespace, _pathInArchive, _fingerprint) + log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s` ~ `%s`\n", _namespace, _pathInArchive, _fingerprintContent[:16], _fingerprintMeta[:16]) } - if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_fingerprint)); _error != nil { + + _fingerprints := fmt.Sprintf ("%s:%s", _fingerprintContent, _fingerprintMeta) + if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_fingerprints)); _error != nil { return _error } @@ -286,13 +340,14 @@ func archiveReference (_context *context, _namespace string, _pathInArchive stri -func prepareData (_context *context, _pathResolved string, _pathInArchive string, _name string, _data []byte, _dataType string) (string, []byte, map[string]string, error) { +func prepareDataContent (_context *context, _pathResolved string, _pathInArchive string, _name string, _dataContent []byte, _dataType string) (string, []byte, map[string]string, error) { - _fingerprintRaw := sha256.Sum256 (_data) - _fingerprint := hex.EncodeToString (_fingerprintRaw[:]) + _fingerprintContentRaw := sha256.Sum256 (_dataContent) + _fingerprintContent := hex.EncodeToString (_fingerprintContentRaw[:]) - if _wasStored, _ := _context.storedData[_fingerprint]; _wasStored { - return _fingerprint, nil, nil, nil + if _wasStored, _ := _context.storedDataContent[_fingerprintContent]; _wasStored { + _dataMeta := _context.storedDataContentMeta[_fingerprintContent] + return _fingerprintContent, nil, _dataMeta, nil } if (_dataType == "") && (_name != "") { @@ -303,19 +358,19 @@ func prepareData (_context *context, _pathResolved string, _pathInArchive string _dataType, _ = MimeTypesByExtension[_extension] } if _dataType == "" { - _dataType = http.DetectContentType (_data) + _dataType = http.DetectContentType (_dataContent) } if _dataType == "" { _dataType = MimeTypeRaw } _dataEncoding := "identity" - _dataUncompressedSize := len (_data) + _dataUncompressedSize := len (_dataContent) _dataSize := _dataUncompressedSize if _dataSize > 512 { - if _data_0, _dataEncoding_0, _error := Compress (_data, _context.compress); _error == nil { + if _dataContent_0, _dataEncoding_0, _error := Compress (_dataContent, _context.compress); _error == nil { if _dataEncoding_0 != "identity" { - _dataCompressedSize := len (_data_0) + _dataCompressedSize := len (_dataContent_0) _dataCompressedDelta := _dataUncompressedSize - _dataCompressedSize _dataCompressedRatio := (_dataCompressedDelta * 100) / _dataUncompressedSize _accepted := false @@ -329,7 +384,7 @@ func prepareData (_context *context, _pathResolved string, _pathInArchive string _accepted = _accepted || ((_dataUncompressedSize > (1 * 1024)) && (_dataCompressedRatio >= 40)) _accepted = _accepted || (_dataCompressedRatio >= 90) if _accepted { - _data = _data_0 + _dataContent = _dataContent_0 _dataEncoding = _dataEncoding_0 _dataSize = _dataCompressedSize } @@ -357,10 +412,32 @@ func prepareData (_context *context, _pathResolved string, _pathInArchive string _dataMeta["Content-Type"] = _dataType _dataMeta["Content-Encoding"] = _dataEncoding if _context.includeEtag { - _dataMeta["ETag"] = _fingerprint + _dataMeta["ETag"] = _fingerprintContent } - return _fingerprint, _data, _dataMeta, nil + _context.storedDataContentMeta[_fingerprintContent] = _dataMeta + + return _fingerprintContent, _dataContent, _dataMeta, nil +} + + +func prepareDataMeta (_context *context, _dataMeta map[string]string) (string, []byte, error) { + + var _dataMetaRaw []byte + if _dataMetaRaw_0, _error := MetadataEncode (_dataMeta); _error == nil { + _dataMetaRaw = _dataMetaRaw_0 + } else { + return "", nil, _error + } + + _fingerprintMetaRaw := sha256.Sum256 (_dataMetaRaw) + _fingerprintMeta := hex.EncodeToString (_fingerprintMetaRaw[:]) + + if _wasStored, _ := _context.storedDataMeta[_fingerprintMeta]; _wasStored { + return _fingerprintMeta, nil, nil + } + + return _fingerprintMeta, _dataMetaRaw, nil } @@ -552,7 +629,8 @@ func main_0 () (error) { var _compress string var _includeIndex bool var _includeEtag bool - var _includeMetadata bool + var _includeFileListing bool + var _includeFolderListing bool var _debug bool { @@ -587,9 +665,12 @@ func main_0 () (error) { --archive --compress + --exclude-index --exclude-etag - --include-metadata + + --exclude-file-listing + --include-folder-listing --debug @@ -601,7 +682,8 @@ func main_0 () (error) { _compress_0 := _flags.String ("compress", "", "") _excludeIndex_0 := _flags.Bool ("exclude-index", false, "") _excludeEtag_0 := _flags.Bool ("exclude-etag", false, "") - _includeMetadata_0 := _flags.Bool ("include-metadata", false, "") + _excludeFileListing_0 := _flags.Bool ("include-file-listing", false, "") + _includeFolderListing_0 := _flags.Bool ("exclude-folder-listing", false, "") _debug_0 := _flags.Bool ("debug", false, "") FlagsParse (_flags, 0, 0) @@ -611,7 +693,8 @@ func main_0 () (error) { _compress = *_compress_0 _includeIndex = ! *_excludeIndex_0 _includeEtag = ! *_excludeEtag_0 - _includeMetadata = *_includeMetadata_0 + _includeFileListing = ! *_excludeFileListing_0 + _includeFolderListing = *_includeFolderListing_0 _debug = *_debug_0 if _sourcesFolder == "" { @@ -630,14 +713,23 @@ func main_0 () (error) { AbortError (_error, "[85234ba0] failed creating archive (while opening)!") } + if _error := _cdbWriter.Put ([]byte (NamespaceSchemaVersion), []byte (CurrentSchemaVersion)); _error != nil { + AbortError (_error, "[43228812] failed writing archive!") + } + _context := & context { cdbWriter : _cdbWriter, - storedData : make (map[string]bool, 16 * 1024), - storedFiles : make (map[[2]uint64]string, 16 * 1024), + storedFilePaths : make ([]string, 0, 16 * 1024), + storedFolderPaths : make ([]string, 0, 16 * 1024), + storedDataMeta : make (map[string]bool, 16 * 1024), + storedDataContent : make (map[string]bool, 16 * 1024), + storedDataContentMeta : make (map[string]map[string]string, 16 * 1024), + storedFiles : make (map[[2]uint64][2]string, 16 * 1024), compress : _compress, includeIndex : _includeIndex, includeEtag : _includeEtag, - includeMetadata : _includeMetadata, + includeFileListing : _includeFileListing, + includeFolderListing : _includeFolderListing, debug : _debug, } @@ -645,11 +737,32 @@ func main_0 () (error) { AbortError (_error, "[b6a19ef4] failed walking folder!") } + if _includeFileListing { + _buffer := make ([]byte, 0, 1024 * 1024) + for _, _path := range _context.storedFilePaths { + _buffer = append (_buffer, _path ...) + _buffer = append (_buffer, '\n') + } + if _error := _cdbWriter.Put ([]byte (NamespaceFilesIndex), _buffer); _error != nil { + AbortError (_error, "[1dbdde05] failed writing archive!") + } + } + + if _includeFolderListing { + _buffer := make ([]byte, 0, 1024 * 1024) + for _, _path := range _context.storedFolderPaths { + _buffer = append (_buffer, _path ...) + _buffer = append (_buffer, '\n') + } + if _error := _cdbWriter.Put ([]byte (NamespaceFoldersIndex), _buffer); _error != nil { + AbortError (_error, "[e2dd2de0] failed writing archive!") + } + } + if _error := _cdbWriter.Close (); _error != nil { AbortError (_error, "[bbfb8478] failed creating archive (while closing)!") } - return nil } diff --git a/sources/cmd/server.go b/sources/cmd/server.go index 573c99a..0444c25 100644 --- a/sources/cmd/server.go +++ b/sources/cmd/server.go @@ -111,9 +111,9 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { _responseHeaders.SetCanonical ([]byte ("X-content-type-Options"), []byte ("nosniff")) _responseHeaders.SetCanonical ([]byte ("X-XSS-Protection"), []byte ("1; mode=block")) - var _fingerprint []byte + var _fingerprints []byte - if _fingerprint == nil { + if _fingerprints == nil { _loop_1 : for _, _namespaceAndPathSuffix := range [][2]string { {NamespaceFilesContent, ""}, {NamespaceFilesContent, "/"}, @@ -142,7 +142,7 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { if _value, _error := _server.cdbReader.GetWithCdbHash (_key); _error == nil { if _value != nil { - _fingerprint = _value + _fingerprints = _value if ((_namespace == NamespaceFoldersContent) || _pathSuffixHasSlash) && (!_pathIsRoot && !_pathHasSlash) { _path = append (_path, '/') _server.ServeRedirect (_context, http.StatusTemporaryRedirect, _path, true) @@ -157,14 +157,14 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { } } - if _fingerprint == nil { + if _fingerprints == nil { if bytes.Equal ([]byte ("/favicon.ico"), _path) { _server.ServeStatic (_context, http.StatusOK, FaviconData, FaviconContentType, FaviconContentEncoding, true) return } } - if _fingerprint == nil { + if _fingerprints == nil { _loop_2 : for _pathLimit := bytes.LastIndexByte (_path, '/'); _pathLimit >= 0; @@ -178,7 +178,7 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { if _value, _error := _server.cdbReader.GetWithCdbHash (_key); _error == nil { if _value != nil { - _fingerprint = _value + _fingerprints = _value break _loop_2 } } else { @@ -188,12 +188,20 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { } } - if _fingerprint == nil { + if _fingerprints == nil { log.Printf ("[ww] [7416f61d] not found `%s`!\n", _requestHeaders.RequestURI ()) _server.ServeError (_context, http.StatusNotFound, nil, true) return } + if len (_fingerprints) != 129 { + log.Printf ("[ee] [7ee6c981] invalid data fingerprints for `%s`!\n", _requestHeaders.RequestURI ()) + _server.ServeError (_context, http.StatusInternalServerError, nil, false) + return + } + _fingerprintContent := _fingerprints[:64] + _fingerprintMeta := _fingerprints[65:] + _responseHeaders.SetCanonical ([]byte ("Cache-Control"), []byte ("public, immutable, max-age=3600")) var _data []byte @@ -201,7 +209,7 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { _key := _keyBuffer[:0] _key = append (_key, NamespaceDataContent ...) _key = append (_key, ':') - _key = append (_key, _fingerprint ...) + _key = append (_key, _fingerprintContent ...) if _value, _error := _server.cdbReader.GetWithCdbHash (_key); _error == nil { if _value != nil { _data = _value @@ -221,7 +229,7 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { _key := _keyBuffer[:0] _key = append (_key, NamespaceDataMetadata ...) _key = append (_key, ':') - _key = append (_key, _fingerprint ...) + _key = append (_key, _fingerprintMeta ...) if _value, _error := _server.cdbReader.GetWithCdbHash (_key); _error == nil { if _value != nil { _handleHeader := func (_name []byte, _value []byte) { @@ -699,6 +707,16 @@ func main_0 () (error) { } } + if _schemaVersion, _error := _cdbReader.Get ([]byte (NamespaceSchemaVersion)); _error == nil { + if _schemaVersion == nil { + AbortError (nil, "[09316866] missing archive schema version!") + } else if string (_schemaVersion) != CurrentSchemaVersion { + AbortError (nil, "[e6482cf7] invalid archive schema version!") + } + } else { + AbortError (_error, "[87cae197] failed opening archive!") + } + _server := & server { httpServer : nil, diff --git a/sources/lib/common/namespaces.go b/sources/lib/common/namespaces.go index 28245cd..5fb734c 100644 --- a/sources/lib/common/namespaces.go +++ b/sources/lib/common/namespaces.go @@ -5,8 +5,13 @@ package common +const NamespaceSchemaVersion = "schema:version" +const CurrentSchemaVersion = "2018-11-20-a--27c59009ca1e08a247b0de12293c9275" + const NamespaceFilesContent = "files:content" +const NamespaceFilesIndex = "files:index" const NamespaceFoldersContent = "folders:entries" +const NamespaceFoldersIndex = "folders:index" const NamespaceDataContent = "data:content" const NamespaceDataMetadata = "data:metadata"