[archive] Reduce archive size (reduce namespace length, replace fingerprints with incremental handles).

This commit is contained in:
Ciprian Dorin Craciun 2021-12-15 14:24:48 +02:00
parent 740e3c8c95
commit d986d424c9
3 changed files with 67 additions and 21 deletions

View file

@ -39,6 +39,7 @@ type context struct {
storedDataContent map[string]bool
storedDataContentMeta map[string]map[string]string
storedFiles map[[2]uint64][2]string
storedKeys map[string]string
archivedReferences uint
compress string
compressCache *bbolt.DB
@ -291,7 +292,12 @@ func archiveDataContent (_context *context, _fingerprintContent string, _dataCon
}
{
_key := fmt.Sprintf ("%s:%s", NamespaceDataContent, _fingerprintContent)
var _key string
if _key_0, _error := prepareKey (_context, NamespaceDataContent, _fingerprintContent); _error == nil {
_key = fmt.Sprintf ("%s:%s", NamespaceDataContent, _key_0)
} else {
return _error
}
if _context.debug {
log.Printf ("[ ] data-content ++ `%s`\n", _key)
}
@ -316,7 +322,12 @@ func archiveDataMeta (_context *context, _fingerprintMeta string, _dataMeta []by
}
{
_key := fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _fingerprintMeta)
var _key string
if _key_0, _error := prepareKey (_context, NamespaceDataMetadata, _fingerprintMeta); _error == nil {
_key = fmt.Sprintf ("%s:%s", NamespaceDataMetadata, _key_0)
} else {
return _error
}
if _context.debug {
log.Printf ("[ ] data-meta ++ `%s`\n", _key)
}
@ -348,19 +359,31 @@ func archiveReference (_context *context, _namespace string, _pathInArchive stri
}
_context.archivedReferences += 1
_key := fmt.Sprintf ("%s:%s", _namespace, _pathInArchive)
var _keyMeta, _keyContent string
if _key_0, _error := prepareKey (_context, NamespaceDataMetadata, _fingerprintMeta); _error == nil {
_keyMeta = _key_0
} else {
return _error
}
if _key_0, _error := prepareKey (_context, NamespaceDataContent, _fingerprintContent); _error == nil {
_keyContent = _key_0
} else {
return _error
}
_references := fmt.Sprintf ("%s:%s", _keyMeta, _keyContent)
if _context.debug {
log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s` ~ `%s`\n", _namespace, _pathInArchive, _fingerprintContent[:16], _fingerprintMeta[:16])
log.Printf ("[ ] reference ++ `%s` :: `%s` -> `%s` ~ `%s`\n", _namespace, _pathInArchive, _keyMeta, _keyContent)
}
_key := fmt.Sprintf ("%s:%s", _namespace, _pathInArchive)
_fingerprints := fmt.Sprintf ("%s:%s", _fingerprintContent, _fingerprintMeta)
if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_fingerprints)); _error != nil {
if _error := _context.cdbWriter.Put ([]byte (_key), []byte (_references)); _error != nil {
return _error
}
_context.cdbWriteCount += 1
_context.cdbWriteKeySize += len (_key)
_context.cdbWriteDataSize += len (_fingerprints)
_context.cdbWriteDataSize += len (_references)
if _context.progress {
if _context.archivedReferences <= 1 {
@ -558,6 +581,23 @@ func prepareDataMeta (_context *context, _dataMeta map[string]string) (string, [
func prepareKey (_context *context, _namespace string, _fingerprint string) (string, error) {
_qualified := fmt.Sprintf ("%s:%s", _namespace, _fingerprint)
if _key, _found := _context.storedKeys[_qualified]; _found {
return _key, nil
}
_keyIndex := len (_context.storedKeys) + 1
if _keyIndex >= (1 << 32) {
return "", fmt.Errorf ("[aba09b4d] maximum stored keys reached!")
}
_key := fmt.Sprintf ("%x", _keyIndex)
_context.storedKeys[_qualified] = _key
return _key, nil
}
func walkPath (_context *context, _pathResolved string, _pathInArchive string, _name string, _recursed map[string]uint, _recurse bool) (os.FileInfo, error) {
if _recursed == nil {
@ -865,6 +905,7 @@ func main_0 () (error) {
storedDataContent : make (map[string]bool, 16 * 1024),
storedDataContentMeta : make (map[string]map[string]string, 16 * 1024),
storedFiles : make (map[[2]uint64][2]string, 16 * 1024),
storedKeys : make (map[string]string, 16 * 1024),
compress : _compress,
compressCache : _compressCacheDb,
includeIndex : _includeIndex,

View file

@ -218,13 +218,14 @@ func (_server *server) Serve (_context *fasthttp.RequestCtx) () {
return
}
if len (_fingerprints) != 129 {
_fingerprintsSplit := bytes.IndexByte (_fingerprints, ':')
if _fingerprintsSplit < 0 {
log.Printf ("[ee] [7ee6c981] invalid data fingerprints for `%s`!\n", _requestHeaders.RequestURI ())
_server.ServeError (_context, http.StatusInternalServerError, nil, false)
return
}
_fingerprintContent := _fingerprints[0:64]
_fingerprintMeta := _fingerprints[65:129]
_fingerprintMeta := _fingerprints[:_fingerprintsSplit]
_fingerprintContent := _fingerprints[_fingerprintsSplit + 1:]
var _data []byte
if _server.cachedDataContent != nil {
@ -990,8 +991,12 @@ func main_0 () (error) {
if _fingerprints_0, _error := _cdbReader.GetWithCdbHash (_key); _error == nil {
if _fingerprints_0 != nil {
_fingerprints = _fingerprints_0
_fingerprintContent = _fingerprints[0:64]
_fingerprintMeta = _fingerprints[65:129]
_fingerprintsSplit := bytes.IndexByte (_fingerprints, ':')
if _fingerprintsSplit < 0 {
AbortError (nil, "[aa6e678f] failed indexing archive!")
}
_fingerprintMeta = _fingerprints[:_fingerprintsSplit]
_fingerprintContent = _fingerprints[_fingerprintsSplit + 1:]
} else {
AbortError (_error, "[460b3cf1] failed indexing archive!")
}

View file

@ -5,14 +5,14 @@ package common
const NamespaceSchemaVersion = "schema:version"
const CurrentSchemaVersion = "2018-11-20-a--27c59009ca1e08a247b0de12293c9275"
const NamespaceSchemaVersion = "s"
const CurrentSchemaVersion = "2021a"
const NamespaceFilesContent = "files:content"
const NamespaceFilesIndex = "files:index"
const NamespaceFoldersContent = "folders:entries"
const NamespaceFoldersIndex = "folders:index"
const NamespaceFilesContent = "f"
const NamespaceFilesIndex = "fi"
const NamespaceFoldersContent = "l"
const NamespaceFoldersIndex = "li"
const NamespaceDataContent = "data:content"
const NamespaceDataMetadata = "data:metadata"
const NamespaceDataContent = "c"
const NamespaceDataMetadata = "m"