From d986d424c967cb0b3a9a06907e6c7ba7da291b79 Mon Sep 17 00:00:00 2001 From: Ciprian Dorin Craciun Date: Wed, 15 Dec 2021 14:24:48 +0200 Subject: [PATCH] [archive] Reduce archive size (reduce namespace length, replace fingerprints with incremental handles). --- sources/cmd/archiver/archiver.go | 57 +++++++++++++++++++++++++++----- sources/cmd/server/server.go | 15 ++++++--- sources/lib/common/namespaces.go | 16 ++++----- 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/sources/cmd/archiver/archiver.go b/sources/cmd/archiver/archiver.go index 8d2408e..4cfe6e9 100644 --- a/sources/cmd/archiver/archiver.go +++ b/sources/cmd/archiver/archiver.go @@ -39,6 +39,7 @@ type context struct { storedDataContent map[string]bool storedDataContentMeta map[string]map[string]string storedFiles map[[2]uint64][2]string + storedKeys map[string]string archivedReferences uint compress string compressCache *bbolt.DB @@ -291,7 +292,12 @@ func archiveDataContent (_context *context, _fingerprintContent string, _dataCon } { - _key := fmt.Sprintf ("%s:%s", NamespaceDataContent, _fingerprintContent) + var _key string + if _key_0, _error := prepareKey (_context, NamespaceDataContent, _fingerprintContent); _error == nil { + _key = fmt.Sprintf ("%s:%s", NamespaceDataContent, _key_0) + } else { + return _error + } if _context.debug { log.Printf ("[ ] data-content ++ `%s`\n", _key) } @@ -316,7 +322,12 @@ func archiveDataMeta (_context *context, _fingerprintMeta string, _dataMeta []by } { - _key := fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _fingerprintMeta) + var _key string + if _key_0, _error := prepareKey (_context, NamespaceDataMetadata, _fingerprintMeta); _error == nil { + _key = fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _key_0) + } else { + return _error + } if _context.debug { log.Printf ("[ ] data-meta ++ `%s`\n", _key) } @@ -348,19 +359,31 @@ func archiveReference (_context *context, _namespace string, _pathInArchive stri } _context.archivedReferences += 1 + _key := fmt.Sprintf ("%s:%s", _namespace, _pathInArchive) + + var _keyMeta, _keyContent string + if _key_0, _error := prepareKey (_context, NamespaceDataMetadata, _fingerprintMeta); _error == nil { + _keyMeta = _key_0 + } else { + return _error + } + if _key_0, _error := prepareKey (_context, NamespaceDataContent, _fingerprintContent); _error == nil { + _keyContent = _key_0 + } else { + return _error + } + _references := fmt.Sprintf ("%s:%s", _keyMeta, _keyContent) + if _context.debug { - log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s` ~ `%s`\n", _namespace, _pathInArchive, _fingerprintContent[:16], _fingerprintMeta[:16]) + log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s` ~ `%s`\n", _namespace, _pathInArchive, _keyMeta, _keyContent) } - _key := fmt.Sprintf ("%s:%s", _namespace, _pathInArchive) - _fingerprints := fmt.Sprintf ("%s:%s", _fingerprintContent, _fingerprintMeta) - - if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_fingerprints)); _error != nil { + if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_references)); _error != nil { return _error } _context.cdbWriteCount += 1 _context.cdbWriteKeySize += len (_key) - _context.cdbWriteDataSize += len (_fingerprints) + _context.cdbWriteDataSize += len (_references) if _context.progress { if _context.archivedReferences <= 1 { @@ -558,6 +581,23 @@ func prepareDataMeta (_context *context, _dataMeta map[string]string) (string, [ +func prepareKey (_context *context, _namespace string, _fingerprint string) (string, error) { + _qualified := fmt.Sprintf ("%s:%s", _namespace, _fingerprint) + if _key, _found := _context.storedKeys[_qualified]; _found { + return _key, nil + } + _keyIndex := len (_context.storedKeys) + 1 + if _keyIndex >= (1 << 32) { + return "", fmt.Errorf ("[aba09b4d] maximum stored keys reached!") + } + _key := fmt.Sprintf ("%x", _keyIndex) + _context.storedKeys[_qualified] = _key + return _key, nil +} + + + + func walkPath (_context *context, _pathResolved string, _pathInArchive string, _name string, _recursed map[string]uint, _recurse bool) (os.FileInfo, error) { if _recursed == nil { @@ -865,6 +905,7 @@ func main_0 () (error) { storedDataContent : make (map[string]bool, 16 * 1024), storedDataContentMeta : make (map[string]map[string]string, 16 * 1024), storedFiles : make (map[[2]uint64][2]string, 16 * 1024), + storedKeys : make (map[string]string, 16 * 1024), compress : _compress, compressCache : _compressCacheDb, includeIndex : _includeIndex, diff --git a/sources/cmd/server/server.go b/sources/cmd/server/server.go index 4417c3b..400bf36 100644 --- a/sources/cmd/server/server.go +++ b/sources/cmd/server/server.go @@ -218,13 +218,14 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () { return } - if len (_fingerprints) != 129 { + _fingerprintsSplit := bytes.IndexByte (_fingerprints, ':') + if _fingerprintsSplit < 0 { log.Printf ("[ee] [7ee6c981] invalid data fingerprints for `%s`!\n", _requestHeaders.RequestURI ()) _server.ServeError (_context, http.StatusInternalServerError, nil, false) return } - _fingerprintContent := _fingerprints[0:64] - _fingerprintMeta := _fingerprints[65:129] + _fingerprintMeta := _fingerprints[:_fingerprintsSplit] + _fingerprintContent := _fingerprints[_fingerprintsSplit + 1:] var _data []byte if _server.cachedDataContent != nil { @@ -990,8 +991,12 @@ func main_0 () (error) { if _fingerprints_0, _error := _cdbReader.GetWithCdbHash (_key); _error == nil { if _fingerprints_0 != nil { _fingerprints = _fingerprints_0 - _fingerprintContent = _fingerprints[0:64] - _fingerprintMeta = _fingerprints[65:129] + _fingerprintsSplit := bytes.IndexByte (_fingerprints, ':') + if _fingerprintsSplit < 0 { + AbortError (nil, "[aa6e678f] failed indexing archive!") + } + _fingerprintMeta = _fingerprints[:_fingerprintsSplit] + _fingerprintContent = _fingerprints[_fingerprintsSplit + 1:] } else { AbortError (_error, "[460b3cf1] failed indexing archive!") } diff --git a/sources/lib/common/namespaces.go b/sources/lib/common/namespaces.go index 5fb734c..2e1065d 100644 --- a/sources/lib/common/namespaces.go +++ b/sources/lib/common/namespaces.go @@ -5,14 +5,14 @@ package common -const NamespaceSchemaVersion = "schema:version" -const CurrentSchemaVersion = "2018-11-20-a--27c59009ca1e08a247b0de12293c9275" +const NamespaceSchemaVersion = "s" +const CurrentSchemaVersion = "2021a" -const NamespaceFilesContent = "files:content" -const NamespaceFilesIndex = "files:index" -const NamespaceFoldersContent = "folders:entries" -const NamespaceFoldersIndex = "folders:index" +const NamespaceFilesContent = "f" +const NamespaceFilesIndex = "fi" +const NamespaceFoldersContent = "l" +const NamespaceFoldersIndex = "li" -const NamespaceDataContent = "data:content" -const NamespaceDataMetadata = "data:metadata" +const NamespaceDataContent = "c" +const NamespaceDataMetadata = "m"