Upgrade blevesearch to v0.8.1 (#9177)

For #1441

a91b427b59
This commit is contained in:
Mura Li 2019-11-27 17:23:33 +08:00 committed by Lauris BH
parent b50dee5a61
commit 9591185c8f
180 changed files with 43400 additions and 41105 deletions

21
go.mod
View file

@ -16,14 +16,14 @@ require (
gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705 gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705
gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7 gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7
github.com/PuerkitoBio/goquery v1.5.0 github.com/PuerkitoBio/goquery v1.5.0
github.com/RoaringBitmap/roaring v0.4.7 // indirect github.com/RoaringBitmap/roaring v0.4.21 // indirect
github.com/bgentry/speakeasy v0.1.0 // indirect github.com/bgentry/speakeasy v0.1.0 // indirect
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 github.com/blevesearch/bleve v0.8.1
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f // indirect github.com/blevesearch/go-porterstemmer v1.0.2 // indirect
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc // indirect github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe // indirect github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd // indirect
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect
@ -31,14 +31,13 @@ require (
github.com/dgrijalva/jwt-go v3.2.0+incompatible github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/editorconfig/editorconfig-core-go/v2 v2.1.1 github.com/editorconfig/editorconfig-core-go/v2 v2.1.1
github.com/emirpasic/gods v1.12.0 github.com/emirpasic/gods v1.12.0
github.com/etcd-io/bbolt v1.3.2 // indirect github.com/etcd-io/bbolt v1.3.3 // indirect
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
github.com/gliderlabs/ssh v0.2.2 github.com/gliderlabs/ssh v0.2.2
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd // indirect github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e // indirect
github.com/go-openapi/jsonreference v0.19.3 // indirect github.com/go-openapi/jsonreference v0.19.3 // indirect
github.com/go-openapi/runtime v0.19.5 // indirect github.com/go-openapi/runtime v0.19.5 // indirect
github.com/go-redis/redis v6.15.2+incompatible github.com/go-redis/redis v6.15.2+incompatible
@ -68,12 +67,10 @@ require (
github.com/mattn/go-sqlite3 v1.11.0 github.com/mattn/go-sqlite3 v1.11.0
github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75 github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae // indirect
github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc
github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5 github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5
github.com/niklasfasching/go-org v0.1.8 github.com/niklasfasching/go-org v0.1.8
github.com/oliamb/cutter v0.2.2 github.com/oliamb/cutter v0.2.2
github.com/philhofer/fwd v1.0.0 // indirect
github.com/pkg/errors v0.8.1 github.com/pkg/errors v0.8.1
github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e
github.com/prometheus/client_golang v1.1.0 github.com/prometheus/client_golang v1.1.0
@ -90,19 +87,17 @@ require (
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect
github.com/stretchr/testify v1.4.0 github.com/stretchr/testify v1.4.0
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 // indirect
github.com/tstranex/u2f v1.0.0 github.com/tstranex/u2f v1.0.0
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e
github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6 github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141
github.com/urfave/cli v1.20.0 github.com/urfave/cli v1.20.0
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 // indirect
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53
golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f
golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2
golang.org/x/text v0.3.2 golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect

55
go.sum
View file

@ -46,8 +46,8 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/RoaringBitmap/roaring v0.4.7 h1:eGUudvFzvF7Kxh7JjYvXfI1f7l22/2duFby7r5+d4oc= github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8=
github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w= github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68= github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68=
@ -72,14 +72,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 h1:vinCy/rcjbtxWnMiw11CbMKcuyNi+y4L4MbZUpk7m4M= github.com/blevesearch/bleve v0.8.1 h1:20zBREtGe8dvBxCC+717SaxKcUVQOWk3/Fm75vabKpU=
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw= github.com/blevesearch/bleve v0.8.1/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA= github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ= github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ=
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f h1:J9ZVHbB2X6JNxbKw/f3Y4E9Xq+Ro+zPiivzgmi3RTvg= github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I=
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA= github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA=
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc h1:7OfDAkuAGx71ruzOIFqCkHqGIsVZU0C7PMw5u1bIrwU= github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg=
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8= github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8=
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q= github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q=
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA= github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA=
@ -92,6 +92,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE
github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
@ -102,10 +103,11 @@ github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d h1:XMf4E1U+b
github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c= github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c=
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo= github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo=
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs= github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs=
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe h1:2o6Y7KMjJNsuMTF8f2H2eTKRhqH7+bQbjr+D+LnhE5M= github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd h1:zeuJhcG3f8eePshH3KxkNE+Xtl53pVln9MOUPMyr/1w=
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g= github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd/go.mod h1:xbc8Ff/oG7h2ejd7AlwOpfd+6QZntc92ygpAOfGwcKY=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8= github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc= github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY= github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY=
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8= github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8=
@ -135,8 +137,8 @@ github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0= github.com/etcd-io/bbolt v1.3.3 h1:gSJmxrs37LgTqR/oyJBWok6k6SvXEUerFTbltIhXkBM=
github.com/etcd-io/bbolt v1.3.2/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE= github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE=
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs= github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ= github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
@ -154,10 +156,11 @@ github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd h1:r04MMPyLHj/QwZuMJ5+7tJcBr1AQjpiAK/rZWRrQT7o= github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI=
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e h1:SiEs4J3BKVIeaWrH3tKaz3QLZhJ68iJ/A4xrzIoE5+Y= github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
@ -279,6 +282,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg= github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg=
@ -304,6 +309,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0= github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0=
github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio= github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio=
github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko= github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko=
@ -467,6 +473,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
@ -504,11 +512,13 @@ github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk= github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo= github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU= github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU=
github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4= github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
@ -529,8 +539,8 @@ github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 h1:HOxvxvnntLiPn1
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 h1:ZVvr38DYEyOPyelySqvF0I9I++85NnUMsWkroBDS4fs= github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ= github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM= github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
@ -538,6 +548,7 @@ github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo= github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw= github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw=
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU= github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU=
@ -549,8 +560,8 @@ github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 h1:Z79lyIznnziKA
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0= github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0=
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw= github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 h1:E8u341JM/N8LCnPXBV6ZFD1RKo/j+qHl1XOqSV+GstA= github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70= github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4= github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
@ -574,6 +585,7 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@ -642,6 +654,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -660,6 +673,8 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY= golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE=
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View file

@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

View file

@ -8,10 +8,12 @@ install:
notifications: notifications:
email: false email: false
go: go:
- 1.7.x - "1.7.x"
- 1.8.x - "1.8.x"
- 1.9.x - "1.9.x"
- 1.10.x - "1.10.x"
- "1.11.x"
- "1.12.x"
- tip - tip
# whitelist # whitelist
@ -21,10 +23,14 @@ branches:
script: script:
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test - goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
- go test -race -run TestConcurrent* - go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build - GOARCH=arm64 go build
- GOARCH=386 go build - GOARCH=386 go build
- GOARCH=386 go test - GOARCH=386 go test
- GOARCH=arm go build - GOARCH=arm go build
- GOARCH=arm64 go build
matrix: matrix:
allow_failures: allow_failures:
- go: tip - go: tip

View file

@ -7,4 +7,5 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly), Tyson Maly (@tvmaly),
Will Glynn (@willglynn), Will Glynn (@willglynn),
Brent Pedersen (@brentp) Brent Pedersen (@brentp)
Maciej Biłas (@maciej) Maciej Biłas (@maciej),
Joe Nall (@joenall)

View file

@ -9,4 +9,8 @@ Will Glynn (@willglynn),
Brent Pedersen (@brentp), Brent Pedersen (@brentp),
Jason E. Aten (@glycerine), Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139), Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd) Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard),
Alexander Petrov (@alldroll)

View file

@ -200,3 +200,36 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
================================================================================
Portions of runcontainer.go are from the Go standard library, which is licensed
under:
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,4 +1,4 @@
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets .PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
# Get the dependencies # Get the dependencies
deps: deps:
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/willf/bitset GOPATH=$(GOPATH) go get github.com/willf/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat GOPATH=$(GOPATH) go get github.com/mschoch/smat
@ -97,18 +97,8 @@ nuke:
rm -rf ./target rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./... GOPATH=$(GOPATH) go clean -i ./...
rle:
cp rle.go rle16.go
perl -pi -e 's/32/16/g' rle16.go
cp rle_test.go rle16_test.go
perl -pi -e 's/32/16/g' rle16_test.go
backrle: ser:
cp rle16.go rle.go
perl -pi -e 's/16/32/g' rle.go
perl -pi -e 's/2032/2016/g' rle.go
ser: rle
go generate go generate
cover: cover:

View file

@ -1,4 +1,5 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring) roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
============= =============
This is a go version of the Roaring bitmap data structure. This is a go version of the Roaring bitmap data structure.
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. [Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[lucene]: https://lucene.apache.org/ [lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/ [solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch [elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: http://druid.io/ [druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/ [spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com [opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent [cloudtorrent]: https://github.com/jpillora/cloud-torrent
@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
Dependencies are fetched automatically by giving the `-t` flag to `go get`. Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include they include
- github.com/smartystreets/goconvey/convey
- github.com/willf/bitset - github.com/willf/bitset
- github.com/mschoch/smat - github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream - github.com/glycerine/go-unsnap-stream
@ -133,6 +133,7 @@ func main() {
if rb1.Equals(newrb) { if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.") fmt.Println("I wrote the content to a byte stream and read it back.")
} }
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
} }
``` ```
@ -206,7 +207,7 @@ You can use roaring with gore:
- go get -u github.com/motemen/gore - go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``. - Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github/RoaringBitmap/roaring - go get github.com/RoaringBitmap/roaring
```go ```go
$ gore $ gore

View file

@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
} }
} }
func (ac *arrayContainer) getShortIterator() shortIterable { func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0} return &shortIterator{ac.content, 0}
} }
func (ac *arrayContainer) getReverseIterator() shortIterable {
return &reverseIterator{ac.content, len(ac.content) - 1}
}
func (ac *arrayContainer) getManyIterator() manyIterable { func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0} return &shortIterator{ac.content, 0}
} }
func (ac *arrayContainer) minimum() uint16 { func (ac *arrayContainer) minimum() uint16 {
@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,endx) // flip the values in the range [firstOfRange,endx)
func (ac *arrayContainer) not(firstOfRange, endx int) container { func (ac *arrayContainer) not(firstOfRange, endx int) container {
if firstOfRange >= endx { if firstOfRange >= endx {
//p("arrayContainer.not(): exiting early with ac.clone()")
return ac.clone() return ac.clone()
} }
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1] return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange] // flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container { func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
//p("arrayContainer.notClose(): exiting early with ac.clone()")
return ac.clone() return ac.clone()
} }
// determine the span of array indices to be affected^M // determine the span of array indices to be affected^M
startIndex := binarySearch(ac.content, uint16(firstOfRange)) startIndex := binarySearch(ac.content, uint16(firstOfRange))
//p("startIndex=%v", startIndex)
if startIndex < 0 { if startIndex < 0 {
startIndex = -startIndex - 1 startIndex = -startIndex - 1
} }
lastIndex := binarySearch(ac.content, uint16(lastOfRange)) lastIndex := binarySearch(ac.content, uint16(lastOfRange))
//p("lastIndex=%v", lastIndex)
if lastIndex < 0 { if lastIndex < 0 {
lastIndex = -lastIndex - 2 lastIndex = -lastIndex - 2
} }
@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
newValuesInRange := spanToBeFlipped - currentValuesInRange newValuesInRange := spanToBeFlipped - currentValuesInRange
cardinalityChange := newValuesInRange - currentValuesInRange cardinalityChange := newValuesInRange - currentValuesInRange
newCardinality := len(ac.content) + cardinalityChange newCardinality := len(ac.content) + cardinalityChange
//p("new card is %v", newCardinality)
if newCardinality > arrayDefaultMaxSize { if newCardinality > arrayDefaultMaxSize {
//p("new card over arrayDefaultMaxSize, so returning bitmap")
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1) return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
} }
answer := newArrayContainer() answer := newArrayContainer()
@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
} }
func (ac *arrayContainer) and(a container) container { func (ac *arrayContainer) and(a container) container {
//p("ac.and() called")
switch x := a.(type) { switch x := a.(type) {
case *arrayContainer: case *arrayContainer:
return ac.andArray(x) return ac.andArray(x)
@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
return ac.iandBitmap(x) return ac.iandBitmap(x)
case *runContainer16: case *runContainer16:
if x.isFull() { if x.isFull() {
return ac.clone() return ac
} }
return x.andArray(ac) return x.andArray(ac)
} }
@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange] // flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container { func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
//p("ac.inotClose() starting")
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange] if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
return ac return ac
} }
@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
if cardinalityChange > 0 { if cardinalityChange > 0 {
if newCardinality > len(ac.content) { if newCardinality > len(ac.content) {
if newCardinality > arrayDefaultMaxSize { if newCardinality > arrayDefaultMaxSize {
//p("ac.inotClose() converting to bitmap and doing inot there")
bcRet := ac.toBitmapContainer() bcRet := ac.toBitmapContainer()
bcRet.inot(firstOfRange, lastOfRange+1) bcRet.inot(firstOfRange, lastOfRange+1)
*ac = *bcRet.toArrayContainer() *ac = *bcRet.toArrayContainer()
@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
} }
} }
ac.content = ac.content[:newCardinality] ac.content = ac.content[:newCardinality]
//p("bottom of ac.inotClose(): returning ac")
return ac return ac
} }
@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
func (ac *arrayContainer) containerType() contype { func (ac *arrayContainer) containerType() contype {
return arrayContype return arrayContype
} }
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
high.content = append(high.content, lowbits(y))
} else {
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp" import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) { func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) { func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1 // map header, size 1
// write "content" // write "content"
@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) { func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 1 // map header, size 1
@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *arrayContainer) Msgsize() (s int) { func (z *arrayContainer) Msgsize() (s int) {
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size)) s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
return return

View file

@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0 return bcsi.i >= 0
} }
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
return uint16(bcsi.i)
}
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
if bcsi.hasNext() && bcsi.peekNext() < minval {
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
}
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator { func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)} return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
} }
func (bc *bitmapContainer) getShortIterator() shortIterable { func (bc *bitmapContainer) getShortIterator() shortPeekable {
return newBitmapContainerShortIterator(bc) return newBitmapContainerShortIterator(bc)
} }
type reverseBitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
}
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 {
if bcsi.i == -1 {
panic("reverseBitmapContainerShortIterator.next() going beyond what is available")
}
j := bcsi.i
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1)
return uint16(j)
}
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator {
if a.cardinality == 0 {
return &reverseBitmapContainerShortIterator{a, -1}
}
return &reverseBitmapContainerShortIterator{a, int(a.maximum())}
}
func (bc *bitmapContainer) getReverseIterator() shortIterable {
return newReverseBitmapContainerShortIterator(bc)
}
type bitmapContainerManyIterator struct { type bitmapContainerManyIterator struct {
ptr *bitmapContainer ptr *bitmapContainer
base int base int
@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
for n < len(buf) { for n < len(buf) {
if bitset == 0 { if bitset == 0 {
base += 1 base++
if base >= len(bcmi.ptr.bitmap) { if base >= len(bcmi.ptr.bitmap) {
bcmi.base = base bcmi.base = base
bcmi.bitset = bitset bcmi.bitset = bitset
@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int {
func bitmapEquals(a, b []uint64) bool { func bitmapEquals(a, b []uint64) bool {
if len(a) != len(b) { if len(a) != len(b) {
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
return false return false
} }
for i, v := range a { for i, v := range a {
if v != b[i] { if v != b[i] {
//p("bitmaps differ on element i=%v", i)
return false return false
} }
} }
//p("bitmapEquals returning true")
return true return true
} }
@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui
func (bc *bitmapContainer) equals(o container) bool { func (bc *bitmapContainer) equals(o container) bool {
srb, ok := o.(*bitmapContainer) srb, ok := o.(*bitmapContainer)
if ok { if ok {
//p("bitmapContainers.equals: both are bitmapContainers")
if srb.cardinality != bc.cardinality { if srb.cardinality != bc.cardinality {
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
return false return false
} }
return bitmapEquals(bc.bitmap, srb.bitmap) return bitmapEquals(bc.bitmap, srb.bitmap)
@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
// iremove returns true if i was found. // iremove returns true if i was found.
func (bc *bitmapContainer) iremove(i uint16) bool { func (bc *bitmapContainer) iremove(i uint16) bool {
/* branchless code
w := bc.bitmap[i>>6]
mask := uint64(1) << (i % 64)
neww := w &^ mask
bc.cardinality -= int((w ^ neww) >> (i % 64))
bc.bitmap[i>>6] = neww */
if bc.contains(i) { if bc.contains(i) {
bc.cardinality-- bc.cardinality--
bc.bitmap[i/64] &^= (uint64(1) << (i % 64)) bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container
// flip all values in range [firstOfRange,endx) // flip all values in range [firstOfRange,endx)
func (bc *bitmapContainer) inot(firstOfRange, endx int) container { func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
if endx-firstOfRange == maxCapacity { if endx-firstOfRange == maxCapacity {
//p("endx-firstOfRange == maxCapacity")
flipBitmapRange(bc.bitmap, firstOfRange, endx) flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.cardinality = maxCapacity - bc.cardinality bc.cardinality = maxCapacity - bc.cardinality
//p("bc.cardinality is now %v", bc.cardinality)
} else if endx-firstOfRange > maxCapacity/2 { } else if endx-firstOfRange > maxCapacity/2 {
//p("endx-firstOfRange > maxCapacity/2")
flipBitmapRange(bc.bitmap, firstOfRange, endx) flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.computeCardinality() bc.computeCardinality()
} else { } else {
@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container {
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container { func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container {
answer := bc answer := bc
c := value2.getCardinality() c := value2.getCardinality()
for k := 0; k < c; k++ { for k := 0; k+3 < c; k += 4 {
content := (*[4]uint16)(unsafe.Pointer(&value2.content[k]))
vc0 := content[0]
i0 := uint(vc0) >> 6
answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64))
vc1 := content[1]
i1 := uint(vc1) >> 6
answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64))
vc2 := content[2]
i2 := uint(vc2) >> 6
answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64))
vc3 := content[3]
i3 := uint(vc3) >> 6
answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64))
}
for k := c &^ 3; k < c; k++ {
vc := value2.content[k] vc := value2.content[k]
i := uint(vc) >> 6 i := uint(vc) >> 6
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64)) answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64))
} }
answer.cardinality = invalidCardinality answer.cardinality = invalidCardinality
return answer return answer
} }
@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container {
} }
func (bc *bitmapContainer) iandNot(a container) container { func (bc *bitmapContainer) iandNot(a container) container {
//p("bitmapContainer.iandNot() starting")
switch x := a.(type) { switch x := a.(type) {
case *arrayContainer: case *arrayContainer:
return bc.iandNotArray(x) return bc.iandNotArray(x)
@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container {
return ac return ac
} }
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer { func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container {
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap)) newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
for k := 0; k < len(bc.bitmap); k++ { for k := 0; k < len(bc.bitmap); k++ {
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k] bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
} }
bc.cardinality = newCardinality bc.cardinality = newCardinality
if bc.getCardinality() <= arrayDefaultMaxSize {
return bc.toArrayContainer()
}
return bc return bc
} }
@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int {
return -1 return -1
} }
func (bc *bitmapContainer) PrevSetBit(i int) int {
if i < 0 {
return -1
}
x := i / 64
if x >= len(bc.bitmap) {
return -1
}
w := bc.bitmap[x]
b := i % 64
w = w << uint(63-b)
if w != 0 {
return i - countLeadingZeros(w)
}
x--
for ; x >= 0; x-- {
if bc.bitmap[x] != 0 {
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
}
}
return -1
}
// reference the java implementation // reference the java implementation
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892 // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892
// //
@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
func (bc *bitmapContainer) containerType() contype { func (bc *bitmapContainer) containerType() contype {
return bitmapContype return bitmapContype
} }
func (bc *bitmapContainer) addOffset(x uint16) []container {
low := newBitmapContainer()
high := newBitmapContainer()
b := uint32(x) >> 6
i := uint32(x) % 64
end := uint32(1024) - b
if i == 0 {
copy(low.bitmap[b:], bc.bitmap[:end])
copy(high.bitmap[:b], bc.bitmap[end:])
} else {
low.bitmap[b] = bc.bitmap[0] << i
for k := uint32(1); k < end; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
low.bitmap[b+k] = newval
}
for k := end; k < 1024; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
high.bitmap[k-end] = newval
}
high.bitmap[b] = bc.bitmap[1023] >> (64 - i)
}
low.computeCardinality()
high.computeCardinality()
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp" import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) { func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) { func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2 // map header, size 2
// write "cardinality" // write "cardinality"
@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) { func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 2 // map header, size 2
@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainer) Msgsize() (s int) { func (z *bitmapContainer) Msgsize() (s int) {
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size)) s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) { func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) { func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2 // map header, size 2
// write "ptr" // write "ptr"
@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) { func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 2 // map header, size 2
@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainerShortIterator) Msgsize() (s int) { func (z *bitmapContainerShortIterator) Msgsize() (s int) {
s = 1 + 4 s = 1 + 4
if z.ptr == nil { if z.ptr == nil {

161
vendor/github.com/RoaringBitmap/roaring/byte_input.go generated vendored Normal file
View file

@ -0,0 +1,161 @@
package roaring
import (
"encoding/binary"
"io"
)
type byteInput interface {
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
next(n int) ([]byte, error)
// readUInt32 reads uint32 with LittleEndian order
readUInt32() (uint32, error)
// readUInt16 reads uint16 with LittleEndian order
readUInt16() (uint16, error)
// getReadBytes returns read bytes
getReadBytes() int64
// skipBytes skips exactly n bytes
skipBytes(n int) error
}
func newByteInputFromReader(reader io.Reader) byteInput {
return &byteInputAdapter{
r: reader,
readBytes: 0,
}
}
func newByteInput(buf []byte) byteInput {
return &byteBuffer{
buf: buf,
off: 0,
}
}
type byteBuffer struct {
buf []byte
off int
}
// next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *byteBuffer) next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteBuffer) readUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteBuffer) readUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// getReadBytes returns read bytes
func (b *byteBuffer) getReadBytes() int64 {
return int64(b.off)
}
// skipBytes skips exactly n bytes
func (b *byteBuffer) skipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// reset resets the given buffer with a new byte slice
func (b *byteBuffer) reset(buf []byte) {
b.buf = buf
b.off = 0
}
type byteInputAdapter struct {
r io.Reader
readBytes int
}
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *byteInputAdapter) next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteInputAdapter) readUInt32() (uint32, error) {
buf, err := b.next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteInputAdapter) readUInt16() (uint16, error) {
buf, err := b.next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// getReadBytes returns read bytes
func (b *byteInputAdapter) getReadBytes() int64 {
return int64(b.readBytes)
}
// skipBytes skips exactly n bytes
func (b *byteInputAdapter) skipBytes(n int) error {
_, err := b.next(n)
return err
}
// reset resets the given buffer with a new stream
func (b *byteInputAdapter) reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

11
vendor/github.com/RoaringBitmap/roaring/clz.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}

36
vendor/github.com/RoaringBitmap/roaring/clz_compat.go generated vendored Normal file
View file

@ -0,0 +1,36 @@
// +build !go1.9
package roaring
// LeadingZeroBits returns the number of consecutive most significant zero
// bits of x.
func countLeadingZeros(i uint64) int {
if i == 0 {
return 64
}
n := 1
x := uint32(i >> 32)
if x == 0 {
n += 32
x = uint32(i)
}
if (x >> 16) == 0 {
n += 16
x <<= 16
}
if (x >> 24) == 0 {
n += 8
x <<= 8
}
if x>>28 == 0 {
n += 4
x <<= 4
}
if x>>30 == 0 {
n += 2
x <<= 2
}
n -= int(x >> 31)
return n
}

16
vendor/github.com/RoaringBitmap/roaring/go.mod generated vendored Normal file
View file

@ -0,0 +1,16 @@
module github.com/RoaringBitmap/roaring
go 1.12
require (
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/golang/snappy v0.0.1 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
github.com/philhofer/fwd v1.0.0 // indirect
github.com/stretchr/testify v1.4.0
github.com/tinylib/msgp v1.1.0
github.com/willf/bitset v1.1.10
)

30
vendor/github.com/RoaringBitmap/roaring/go.sum generated vendored Normal file
View file

@ -0,0 +1,30 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -4,12 +4,7 @@ type manyIterable interface {
nextMany(hs uint32, buf []uint32) int nextMany(hs uint32, buf []uint32) int
} }
type manyIterator struct { func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
slice []uint16
loc int
}
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
n := 0 n := 0
l := si.loc l := si.loc
s := si.slice s := si.slice

View file

@ -143,8 +143,8 @@ func toBitmapContainer(c container) container {
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) { func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
expectedKeys := -1 expectedKeys := -1
appendedKeys := 0 appendedKeys := 0
keys := make([]uint16, 0) var keys []uint16
containers := make([]container, 0) var containers []container
for appendedKeys != expectedKeys { for appendedKeys != expectedKeys {
select { select {
case item := <-resultChan: case item := <-resultChan:
@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
// (if it is set to 0, a default number of workers is chosen) // (if it is set to 0, a default number of workers is chosen)
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap { func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
var lKey uint16 = MaxUint16 var lKey uint16 = MaxUint16
var hKey uint16 = 0 var hKey uint16
bitmapsFiltered := bitmaps[:0] bitmapsFiltered := bitmaps[:0]
for _, b := range bitmaps { for _, b := range bitmaps {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,163 +0,0 @@
package roaring
import (
"fmt"
)
// common to rle32.go and rle16.go
// rleVerbose controls whether p() prints show up.
// The testing package sets this based on
// testing.Verbose().
var rleVerbose bool
// p is a shorthand for fmt.Printf with beginning and
// trailing newlines. p() makes it easy
// to add diagnostic print statements.
func p(format string, args ...interface{}) {
if rleVerbose {
fmt.Printf("\n"+format+"\n", args...)
}
}
// MaxUint32 is the largest uint32 value.
const MaxUint32 = 4294967295
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
const MaxUint16 = 65535
// searchOptions allows us to accelerate runContainer32.search with
// prior knowledge of (mostly lower) bounds. This is used by Union
// and Intersect.
type searchOptions struct {
// start here instead of at 0
startIndex int64
// upper bound instead of len(rc.iv);
// endxIndex == 0 means ignore the bound and use
// endxIndex == n ==len(rc.iv) which is also
// naturally the default for search()
// when opt = nil.
endxIndex int64
}
// And finds the intersection of rc and b.
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
for i := p.start; i <= p.last; i++ {
if b.Contains(i) {
out.Add(i)
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
if out.Contains(v) {
out.RemoveRange(uint64(v), uint64(v+1))
} else {
out.Add(v)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
out.Add(v)
}
}
return out
}
// trial is used in the randomized testing of runContainers
type trial struct {
n int
percentFill float64
ntrial int
// only in the union test
// only subtract test
percentDelete float64
// only in 067 randomized operations
// we do this + 1 passes
numRandomOpsPass int
// allow sampling range control
// only recent tests respect this.
srang *interval16
}
// And finds the intersection of rc and b.
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
plast := p.last()
for i := p.start; i <= plast; i++ {
if b.Contains(uint32(i)) {
out.Add(uint32(i))
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
w := uint32(v)
if out.Contains(w) {
out.RemoveRange(uint64(w), uint64(w+1))
} else {
out.Add(w)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
out.Add(uint32(v))
}
}
return out
}
//func (rc *runContainer32) and(container) container {
// panic("TODO. not yet implemented")
//}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer16. This is for the
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
func (rc *runContainer16) serializedSizeInBytes() int {
// number of runs in one uint16, then each run
// needs two more uint16
return 2 + len(rc.iv)*4
}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer32.
func (rc *runContainer32) serializedSizeInBytes() int {
return 4 + len(rc.iv)*8
}

View file

@ -1,695 +0,0 @@
package roaring
///////////////////////////////////////////////////
//
// container interface methods for runContainer16
//
///////////////////////////////////////////////////
import (
"fmt"
)
// compile time verify we meet interface requirements
var _ container = &runContainer16{}
func (rc *runContainer16) clone() container {
return newRunContainer16CopyIv(rc.iv)
}
func (rc *runContainer16) minimum() uint16 {
return rc.iv[0].start // assume not empty
}
func (rc *runContainer16) maximum() uint16 {
return rc.iv[len(rc.iv)-1].last() // assume not empty
}
func (rc *runContainer16) isFull() bool {
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
}
func (rc *runContainer16) and(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.intersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.andBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) andCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.intersectCardinality(c))
case *arrayContainer:
return rc.andArrayCardinality(c)
case *bitmapContainer:
return rc.andBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// andBitmapContainer finds the intersection of rc and b.
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.andBitmap(bc)
}
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
pos := 0
answer := 0
maxpos := ac.getCardinality()
if maxpos == 0 {
return 0 // won't happen in actual code
}
v := ac.content[pos]
mainloop:
for _, p := range rc.iv {
for v < p.start {
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
for v <= p.last() {
answer++
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
}
return answer
}
func (rc *runContainer16) iand(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceIntersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.iandBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
// TODO: optimize by doing less allocation, possibly?
// sect will be new
sect := rc.intersect(rc2)
*rc = *sect
return rc
}
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
isect := rc.andBitmapContainer(bc)
*rc = *newRunContainer16FromContainer(isect)
return rc
}
func (rc *runContainer16) andArray(ac *arrayContainer) container {
if len(rc.iv) == 0 {
return newArrayContainer()
}
acCardinality := ac.getCardinality()
c := newArrayContainerCapacity(acCardinality)
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
iv := rc.iv[rlePos]
arrayVal := ac.content[arrayPos]
for iv.last() < arrayVal {
rlePos++
if rlePos == len(rc.iv) {
return c
}
iv = rc.iv[rlePos]
}
if iv.start > arrayVal {
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
} else {
c.content = append(c.content, arrayVal)
arrayPos++
}
}
return c
}
func (rc *runContainer16) andNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.andNotArray(c)
case *bitmapContainer:
return rc.andNotBitmap(c)
case *runContainer16:
return rc.andNotRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
k := 0
var val int64
for _, p := range rc.iv {
n := p.runlen()
for j := int64(0); j < n; j++ {
val = int64(p.start) + j
x[k+i] = uint32(val) | mask
k++
}
}
}
func (rc *runContainer16) getShortIterator() shortIterable {
return rc.newRunIterator16()
}
func (rc *runContainer16) getManyIterator() manyIterable {
return rc.newManyRunIterator16()
}
// add the values in the range [firstOfRange, endx). endx
// is still abe to express 2^16 because it is an int not an uint16.
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
}
addme := newRunContainer16TakeOwnership([]interval16{
{
start: uint16(firstOfRange),
length: uint16(endx - 1 - firstOfRange),
},
})
*rc = *rc.union(addme)
return rc
}
// remove the values in the range [firstOfRange,endx)
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
" nothing to do.", firstOfRange, endx))
//return rc
}
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
rc.isubtract(x)
return rc
}
// not flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
return rc.Not(firstOfRange, endx)
}
// Not flips the values in the range [firstOfRange,endx).
// This is not inplace. Only the returned value has the flipped bits.
//
// Currently implemented as (!A intersect B) union (A minus B),
// where A is rc, and B is the supplied [firstOfRange, endx) interval.
//
// TODO(time optimization): convert this to a single pass
// algorithm by copying AndNotRunContainer16() and modifying it.
// Current routine is correct but
// makes 2 more passes through the arrays than should be
// strictly necessary. Measure both ways though--this may not matter.
//
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
}
if firstOfRange >= endx {
return rc.Clone()
}
a := rc
// algo:
// (!A intersect B) union (A minus B)
nota := a.invert()
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
b := newRunContainer16TakeOwnership(bs)
notAintersectB := nota.intersect(b)
aMinusB := a.AndNotRunContainer16(b)
rc2 := notAintersectB.union(aMinusB)
return rc2
}
// equals is now logical equals; it does not require the
// same underlying container type.
func (rc *runContainer16) equals(o container) bool {
srb, ok := o.(*runContainer16)
if !ok {
// maybe value instead of pointer
val, valok := o.(*runContainer16)
if valok {
srb = val
ok = true
}
}
if ok {
// Check if the containers are the same object.
if rc == srb {
return true
}
if len(srb.iv) != len(rc.iv) {
return false
}
for i, v := range rc.iv {
if v != srb.iv[i] {
return false
}
}
return true
}
// use generic comparison
if o.getCardinality() != rc.getCardinality() {
return false
}
rit := rc.getShortIterator()
bit := o.getShortIterator()
//k := 0
for rit.hasNext() {
if bit.next() != rit.next() {
return false
}
//k++
}
return true
}
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
rc.Add(x)
return rc
}
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
return rc.Add(x)
}
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
rc.removeKey(x)
return rc
}
func (rc *runContainer16) iremove(x uint16) bool {
return rc.removeKey(x)
}
func (rc *runContainer16) or(a container) container {
if rc.isFull() {
return rc.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.union(c)
case *arrayContainer:
return rc.orArray(c)
case *bitmapContainer:
return rc.orBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) orCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.unionCardinality(c))
case *arrayContainer:
return rc.orArrayCardinality(c)
case *bitmapContainer:
return rc.orBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// orBitmapContainer finds the union of rc and bc.
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.iorBitmap(bc)
}
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
answer := 0
for i := range rc.iv {
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
}
//bc.computeCardinality()
return answer
}
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArray(ac *arrayContainer) container {
bc1 := newBitmapContainerFromRun(rc)
bc2 := ac.toBitmapContainer()
return bc1.orBitmap(bc2)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
}
func (rc *runContainer16) ior(a container) container {
if rc.isFull() {
return rc
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceUnion(c)
case *arrayContainer:
return rc.iorArray(c)
case *bitmapContainer:
return rc.iorBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
for _, p := range rc2.iv {
last := int64(p.last())
for i := int64(p.start); i <= last; i++ {
rc.Add(uint16(i))
}
}
return rc
}
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
it := bc.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
it := ac.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
// lazyIOR is described (not yet implemented) in
// this nice note from @lemire on
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
//
// Description of lazyOR and lazyIOR from @lemire:
//
// Lazy functions are optional and can be simply
// wrapper around non-lazy functions.
//
// The idea of "laziness" is as follows. It is
// inspired by the concept of lazy evaluation
// you might be familiar with (functional programming
// and all that). So a roaring bitmap is
// such that all its containers are, in some
// sense, chosen to use as little memory as
// possible. This is nice. Also, all bitsets
// are "cardinality aware" so that you can do
// fast rank/select queries, or query the
// cardinality of the whole bitmap... very fast,
// without latency.
//
// However, imagine that you are aggregating 100
// bitmaps together. So you OR the first two, then OR
// that with the third one and so forth. Clearly,
// intermediate bitmaps don't need to be as
// compressed as possible, right? They can be
// in a "dirty state". You only need the end
// result to be in a nice state... which you
// can achieve by calling repairAfterLazy at the end.
//
// The Java/C code does something special for
// the in-place lazy OR runs. The idea is that
// instead of taking two run containers and
// generating a new one, we actually try to
// do the computation in-place through a
// technique invented by @gssiyankai (pinging him!).
// What you do is you check whether the host
// run container has lots of extra capacity.
// If it does, you move its data at the end of
// the backing array, and then you write
// the answer at the beginning. What this
// trick does is minimize memory allocations.
//
func (rc *runContainer16) lazyIOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.ior(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyIorArray(c)
case *bitmapContainer:
return rc.lazyIorBitmap(c)
case *runContainer16:
return rc.lazyIorRun16(c)
}
panic("unsupported container type")
*/
}
// lazyOR is described above in lazyIOR.
func (rc *runContainer16) lazyOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.or(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyOrArray(c)
case *bitmapContainer:
return rc.lazyOrBitmap(c)
case *runContainer16:
return rc.lazyOrRunContainer16(c)
}
panic("unsupported container type")
*/
}
func (rc *runContainer16) intersects(a container) bool {
// TODO: optimize by doing inplace/less allocation, possibly?
isect := rc.and(a)
return isect.getCardinality() > 0
}
func (rc *runContainer16) xor(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.xorArray(c)
case *bitmapContainer:
return rc.xorBitmap(c)
case *runContainer16:
return rc.xorRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) iandNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.iandNotArray(c)
case *bitmapContainer:
return rc.iandNotBitmap(c)
case *runContainer16:
return rc.iandNotRunContainer16(c)
}
panic("unsupported container type")
}
// flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) inot(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
// TODO: minimize copies, do it all inplace; not() makes a copy.
rc = rc.Not(firstOfRange, endx)
return rc
}
func (rc *runContainer16) getCardinality() int {
return int(rc.cardinality())
}
func (rc *runContainer16) rank(x uint16) int {
n := int64(len(rc.iv))
xx := int64(x)
w, already, _ := rc.search(xx, nil)
if w < 0 {
return 0
}
if !already && w == n-1 {
return rc.getCardinality()
}
var rnk int64
if !already {
for i := int64(0); i <= w; i++ {
rnk += rc.iv[i].runlen()
}
return int(rnk)
}
for i := int64(0); i < w; i++ {
rnk += rc.iv[i].runlen()
}
rnk += int64(x-rc.iv[w].start) + 1
return int(rnk)
}
func (rc *runContainer16) selectInt(x uint16) int {
return rc.selectInt16(x)
}
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
return rc.AndNotRunContainer16(b)
}
func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.andNotBitmap(acb)
}
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.andNotBitmap(bc)
}
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
bc := newBitmapContainer()
for i := range rc.iv {
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
bc.computeCardinality()
return bc
}
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
rcb.iandNotBitmapSurely(x2b)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
rcb.iandNotBitmapSurely(acb)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
rcb.iandNotBitmapSurely(bc)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
return rcb.xorBitmap(x2b)
}
func (rc *runContainer16) xorArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.xorBitmap(acb)
}
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.xorBitmap(bc)
}
// convert to bitmap or array *if needed*
func (rc *runContainer16) toEfficientContainer() container {
// runContainer16SerializedSizeInBytes(numRuns)
sizeAsRunContainer := rc.getSizeInBytes()
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
card := int(rc.cardinality())
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return rc
}
if card <= arrayDefaultMaxSize {
return rc.toArrayContainer()
}
bc := newBitmapContainerFromRun(rc)
return bc
}
func (rc *runContainer16) toArrayContainer() *arrayContainer {
ac := newArrayContainer()
for i := range rc.iv {
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
return ac
}
func newRunContainer16FromContainer(c container) *runContainer16 {
switch x := c.(type) {
case *runContainer16:
return x.Clone()
case *arrayContainer:
return newRunContainer16FromArray(x)
case *bitmapContainer:
return newRunContainer16FromBitmapContainer(x)
}
panic("unsupported container type")
}

View file

@ -6,12 +6,12 @@
package roaring package roaring
import ( import (
"bufio"
"bytes" "bytes"
"encoding/base64" "encoding/base64"
"fmt" "fmt"
"io" "io"
"strconv" "strconv"
"sync"
) )
// Bitmap represents a compressed bitmap where you can add integers. // Bitmap represents a compressed bitmap where you can add integers.
@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes() return rb.highlowcontainer.toBytes()
} }
// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized // Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
// version of this bitmap to stream. The format is not // version of this bitmap to stream. The format is not
// compatible with the WriteTo() format, and is // compatible with the WriteTo() format, and is
// experimental: it may produce smaller on disk // experimental: it may produce smaller on disk
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
// The format is compatible with other RoaringBitmap // The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here: // implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec // https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) { func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
return rb.highlowcontainer.readFrom(stream) stream := byteInputAdapterPool.Get().(*byteInputAdapter)
stream.reset(reader)
p, err = rb.highlowcontainer.readFrom(stream)
byteInputAdapterPool.Put(stream)
return
} }
// FromBuffer creates a bitmap from its serialized version stored in buffer // FromBuffer creates a bitmap from its serialized version stored in buffer
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
// You should *not* change the copy-on-write status of the resulting // You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite). // bitmaps (SetCopyOnWrite).
// //
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) { // If buf becomes unavailable, then a bitmap created with
return rb.highlowcontainer.fromBuffer(buf) // FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := byteBufferPool.Get().(*byteBuffer)
stream.reset(buf)
p, err = rb.highlowcontainer.readFrom(stream)
byteBufferPool.Put(stream)
return
} }
var (
byteBufferPool = sync.Pool{
New: func() interface{} {
return &byteBuffer{}
},
}
byteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &byteInputAdapter{}
},
}
)
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (rb *Bitmap) RunOptimize() { func (rb *Bitmap) RunOptimize() {
rb.highlowcontainer.runOptimize() rb.highlowcontainer.runOptimize()
@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool {
return rb.highlowcontainer.hasRunCompression() return rb.highlowcontainer.hasRunCompression()
} }
// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized // Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
// version of this bitmap from stream. The format is // version of this bitmap from stream. The format is
// expected is that written by the WriteToMsgpack() // expected is that written by the WriteToMsgpack()
// call; see additional notes there. // call; see additional notes there.
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
} }
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) { func (rb *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer return rb.ToBytes()
writer := bufio.NewWriter(&buf)
_, err := rb.WriteTo(writer)
if err != nil {
return nil, err
}
err = writer.Flush()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
} }
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
func (rb *Bitmap) UnmarshalBinary(data []byte) error { func (rb *Bitmap) UnmarshalBinary(data []byte) error {
var buf bytes.Buffer r := bytes.NewReader(data)
_, err := buf.Write(data) _, err := rb.ReadFrom(r)
if err != nil {
return err
}
reader := bufio.NewReader(&buf)
_, err = rb.ReadFrom(reader)
return err return err
} }
@ -215,10 +233,20 @@ type IntIterable interface {
Next() uint32 Next() uint32
} }
// IntPeekable allows you to look at the next value without advancing and
// advance as long as the next value is smaller than minval
type IntPeekable interface {
IntIterable
// PeekNext peeks the next value without advancing the iterator
PeekNext() uint32
// AdvanceIfNeeded advances as long as the next value is smaller than minval
AdvanceIfNeeded(minval uint32)
}
type intIterator struct { type intIterator struct {
pos int pos int
hs uint32 hs uint32
iter shortIterable iter shortPeekable
highlowcontainer *roaringArray highlowcontainer *roaringArray
} }
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
return x return x
} }
// PeekNext peeks the next value without advancing the iterator
func (ii *intIterator) PeekNext() uint32 {
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
}
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
for ii.HasNext() && (ii.hs>>16) < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
ii.pos++
ii.init()
}
}
}
func newIntIterator(a *Bitmap) *intIterator { func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator) p := new(intIterator)
p.pos = 0 p.pos = 0
@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator {
return p return p
} }
type intReverseIterator struct {
pos int
hs uint32
iter shortIterable
highlowcontainer *roaringArray
}
// HasNext returns true if there are more integers to iterate over
func (ii *intReverseIterator) HasNext() bool {
return ii.pos >= 0
}
func (ii *intReverseIterator) init() {
if ii.pos >= 0 {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
} else {
ii.iter = nil
}
}
// Next returns the next integer
func (ii *intReverseIterator) Next() uint32 {
x := uint32(ii.iter.next()) | ii.hs
if !ii.iter.hasNext() {
ii.pos = ii.pos - 1
ii.init()
}
return x
}
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
p := new(intReverseIterator)
p.highlowcontainer = &a.highlowcontainer
p.pos = a.highlowcontainer.size() - 1
p.init()
return p
}
// ManyIntIterable allows you to iterate over the values in a Bitmap // ManyIntIterable allows you to iterate over the values in a Bitmap
type ManyIntIterable interface { type ManyIntIterable interface {
// pass in a buffer to fill up with values, returns how many values were returned // pass in a buffer to fill up with values, returns how many values were returned
@ -325,12 +416,20 @@ func (rb *Bitmap) String() string {
return buffer.String() return buffer.String()
} }
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
func (rb *Bitmap) Iterator() IntIterable { // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb) return newIntIterator(rb)
} }
// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ReverseIterator() IntIterable {
return newIntReverseIterator(rb)
}
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ManyIterator() ManyIntIterable { func (rb *Bitmap) ManyIterator() ManyIntIterable {
return newManyIntIterator(rb) return newManyIntIterator(rb)
} }
@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool {
return false return false
} }
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
containerOffset := highbits(offset)
inOffset := lowbits(offset)
if inOffset == 0 {
answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := answer.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
answer.highlowcontainer.keys[pos] = key
}
} else {
answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := x.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset)
if offsetted[0].getCardinality() > 0 {
curSize := answer.highlowcontainer.size()
lastkey := uint16(0)
if curSize > 0 {
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1)
}
if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
} else {
answer.highlowcontainer.appendContainer(key, offsetted[0], false)
}
}
if offsetted[1].getCardinality() > 0 {
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false)
}
}
}
return answer
}
// Add the integer x to the bitmap // Add the integer x to the bitmap
func (rb *Bitmap) Add(x uint32) { func (rb *Bitmap) Add(x uint32) {
hb := highbits(x) hb := highbits(x)
@ -794,11 +933,6 @@ main:
} }
} }
/*func (rb *Bitmap) Or(x2 *Bitmap) {
results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage
rb.highlowcontainer = results.highlowcontainer
}*/
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap // AndNot computes the difference between two bitmaps and stores the result in the current bitmap
func (rb *Bitmap) AndNot(x2 *Bitmap) { func (rb *Bitmap) AndNot(x2 *Bitmap) {
pos1 := 0 pos1 := 0
@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
return return
} }
hbStart := highbits(uint32(rangeStart)) hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := lowbits(uint32(rangeStart)) lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := highbits(uint32(rangeEnd - 1)) hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := lowbits(uint32(rangeEnd - 1)) lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ { for hb := hbStart; hb <= hbLast; hb++ {
@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
containerLast = uint32(lbLast) containerLast = uint32(lbLast)
} }
i := rb.highlowcontainer.getIndex(hb) i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 { if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1) c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
} }
} else { // *think* the range of ones must never be } else { // *think* the range of ones must never be
// empty. // empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
} }
} }
} }
@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
lbLast := uint32(lowbits(uint32(rangeEnd - 1))) lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit var max uint32 = maxLowBit
for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ { for hb := hbStart; hb <= hbLast; hb++ {
containerStart := uint32(0) containerStart := uint32(0)
if hb == uint16(hbStart) { if hb == hbStart {
containerStart = lbStart containerStart = lbStart
} }
containerLast := max containerLast := max
if hb == uint16(hbLast) { if hb == hbLast {
containerLast = lbLast containerLast = lbLast
} }
i := rb.highlowcontainer.getIndex(hb) i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 { if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1) c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
rb.highlowcontainer.setContainerAtIndex(i, c) rb.highlowcontainer.setContainerAtIndex(i, c)
} else { // *think* the range of ones must never be } else { // *think* the range of ones must never be
// empty. // empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast))) rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
} }
} }
} }
@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
} }
answer := NewBitmap() answer := NewBitmap()
hbStart := highbits(uint32(rangeStart)) hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := lowbits(uint32(rangeStart)) lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := highbits(uint32(rangeEnd - 1)) hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := lowbits(uint32(rangeEnd - 1)) lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
// copy the containers before the active area // copy the containers before the active area
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart) answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
var max uint32 = maxLowBit var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ { for hb := hbStart; hb <= hbLast; hb++ {
@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
containerLast = uint32(lbLast) containerLast = uint32(lbLast)
} }
i := bm.highlowcontainer.getIndex(hb) i := bm.highlowcontainer.getIndex(uint16(hb))
j := answer.highlowcontainer.getIndex(hb) j := answer.highlowcontainer.getIndex(uint16(hb))
if i >= 0 { if i >= 0 {
c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1) c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
if c.getCardinality() > 0 { if c.getCardinality() > 0 {
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c) answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
} }
} else { // *think* the range of ones must never be } else { // *think* the range of ones must never be
// empty. // empty.
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
rangeOfOnes(int(containerStart), int(containerLast))) rangeOfOnes(int(containerStart), int(containerLast)))
} }
} }
// copy the containers after the active area. // copy the containers after the active area.
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast) answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
return answer return answer
} }
@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
return rb.highlowcontainer.copyOnWrite return rb.highlowcontainer.copyOnWrite
} }
// CloneCopyOnWriteContainers clones all containers which have
// needCopyOnWrite set to true.
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to, after
// calling FromBuffer.
// More generally this function is useful if you call FromBuffer
// to construct a bitmap with a backing array buf
// and then later discard the buf array. Note that you should call
// CloneCopyOnWriteContainers on all bitmaps that were derived
// from the 'FromBuffer' bitmap since they map have dependencies
// on the buf array as well.
func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// FlipInt calls Flip after casting the parameters (convenience method) // FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap { func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd)) return Flip(bm, uint64(rangeStart), uint64(rangeEnd))

View file

@ -4,16 +4,16 @@ import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"io"
"io/ioutil"
snappy "github.com/glycerine/go-unsnap-stream" snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
"io"
) )
//go:generate msgp -unexported //go:generate msgp -unexported
type container interface { type container interface {
addOffset(uint16) []container
clone() container clone() container
and(container) container and(container) container
andCardinality(container) int andCardinality(container) int
@ -37,7 +37,8 @@ type container interface {
not(start, final int) container // range is [firstOfRange,lastOfRange) not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container xor(r container) container
getShortIterator() shortIterable getShortIterator() shortPeekable
getReverseIterator() shortIterable
getManyIterator() manyIterable getManyIterator() manyIterable
contains(i uint16) bool contains(i uint16) bool
maximum() uint16 maximum() uint16
@ -61,7 +62,6 @@ type container interface {
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange) iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int serializedSizeInBytes() int
readFrom(io.Reader) (int, error)
writeTo(io.Writer) (int, error) writeTo(io.Writer) (int, error)
numberOfRuns() int numberOfRuns() int
@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray {
return &sa return &sa
} }
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function: // unused function:
//func (ra *roaringArray) containsKey(x uint16) bool { //func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0) // return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 {
// //
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
// //
func (ra *roaringArray) toBytes() ([]byte, error) { func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
stream := &bytes.Buffer{}
hasRun := ra.hasRunCompression() hasRun := ra.hasRunCompression()
isRunSizeInBytes := 0 isRunSizeInBytes := 0
cookieSize := 8 cookieSize := 8
@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
} }
} }
_, err := stream.Write(buf[:nw]) written, err := w.Write(buf[:nw])
if err != nil { if err != nil {
return nil, err return n, err
} }
for i, c := range ra.containers { n += int64(written)
_ = i
_, err := c.writeTo(stream) for _, c := range ra.containers {
written, err := c.writeTo(w)
if err != nil { if err != nil {
return nil, err return n, err
} }
n += int64(written)
} }
return stream.Bytes(), nil return n, nil
} }
// //
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec // spec: https://github.com/RoaringBitmap/RoaringFormatSpec
// //
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) { func (ra *roaringArray) toBytes() ([]byte, error) {
by, err := ra.toBytes() var buf bytes.Buffer
if err != nil { _, err := ra.writeTo(&buf)
return 0, err return buf.Bytes(), err
}
n, err := out.Write(by)
if err == nil && n < len(by) {
err = io.ErrShortWrite
}
return int64(n), err
} }
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) { func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
pos := 0 cookie, err := stream.readUInt32()
if len(buf) < 8 {
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf)) if err != nil {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
} }
cookie := binary.LittleEndian.Uint32(buf) var size uint32
pos += 4
var size uint32 // number of containers
haveRunContainers := false
var isRunBitmap []byte var isRunBitmap []byte
// cookie header
if cookie&0x0000FFFF == serialCookie { if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true size = uint32(uint16(cookie>>16) + 1)
size = uint32(uint16(cookie>>16) + 1) // number of containers
// create is-run-container bitmap // create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8 isRunBitmapSize := (int(size) + 7) / 8
if pos+isRunBitmapSize > len(buf) { isRunBitmap, err = stream.next(isRunBitmapSize)
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
}
isRunBitmap = buf[pos : pos+isRunBitmapSize] if err != nil {
pos += isRunBitmapSize return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
} else if cookie == serialCookieNoRunContainer { } else if cookie == serialCookieNoRunContainer {
size = binary.LittleEndian.Uint32(buf[pos:]) size, err = stream.readUInt32()
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
// keycard - is {key, cardinality} tuple slice
if pos+2*2*int(size) > len(buf) {
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
}
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
pos += 2 * 2 * int(size)
if !haveRunContainers || size >= noOffsetThreshold { if err != nil {
pos += 4 * int(size) return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.next(2 * 2 * int(size))
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.skipBytes(int(size) * 4); err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
} }
// Allocate slices upfront as number of containers is known // Allocate slices upfront as number of containers is known
@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} else { } else {
ra.containers = make([]container, size) ra.containers = make([]container, size)
} }
if cap(ra.keys) >= int(size) { if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size] ra.keys = ra.keys[:size]
} else { } else {
ra.keys = make([]uint16, size) ra.keys = make([]uint16, size)
} }
if cap(ra.needCopyOnWrite) >= int(size) { if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size] ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else { } else {
@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} }
for i := uint32(0); i < size; i++ { for i := uint32(0); i < size; i++ {
key := uint16(keycard[2*i]) key := keycard[2*i]
card := int(keycard[2*i+1]) + 1 card := int(keycard[2*i+1]) + 1
ra.keys[i] = key ra.keys[i] = key
ra.needCopyOnWrite[i] = true ra.needCopyOnWrite[i] = true
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 { if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container // run container
nr := binary.LittleEndian.Uint16(buf[pos:]) nr, err := stream.readUInt16()
pos += 2
if pos+int(nr)*4 > len(buf) { if err != nil {
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4) return 0, fmt.Errorf("failed to read runtime container size: %s", err)
} }
buf, err := stream.next(int(nr) * 4)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{ nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]), iv: byteSliceAsInterval16Slice(buf),
card: int64(card), card: int64(card),
} }
pos += int(nr) * 4
ra.containers[i] = &nb ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize { } else if card > arrayDefaultMaxSize {
// bitmap container // bitmap container
buf, err := stream.next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{ nb := bitmapContainer{
cardinality: card, cardinality: card,
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]), bitmap: byteSliceAsUint64Slice(buf),
} }
pos += arrayDefaultMaxSize * 2
ra.containers[i] = &nb ra.containers[i] = &nb
} else { } else {
// array container // array container
nb := arrayContainer{ buf, err := stream.next(card * 2)
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
} }
pos += card * 2
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb ra.containers[i] = &nb
} }
} }
return int64(pos), nil return stream.getReadBytes(), nil
}
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
pos := 0
var cookie uint32
err := binary.Read(stream, binary.LittleEndian, &cookie)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
pos += 4
var size uint32
haveRunContainers := false
var isRun *bitmapContainer
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1)
bytesToRead := (int(size) + 7) / 8
numwords := (bytesToRead + 7) / 8
by := make([]byte, bytesToRead, numwords*8)
nr, err := io.ReadFull(stream, by)
if err != nil {
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
}
pos += bytesToRead
by = by[:cap(by)]
isRun = newBitmapContainer()
for i := 0; i < numwords; i++ {
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
by = by[8:]
}
} else if cookie == serialCookieNoRunContainer {
err = binary.Read(stream, binary.LittleEndian, &size)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
}
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
keycard := make([]uint16, 2*size, 2*size)
err = binary.Read(stream, binary.LittleEndian, keycard)
if err != nil {
return 0, err
}
pos += 2 * 2 * int(size)
// offset header
if !haveRunContainers || size >= noOffsetThreshold {
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
pos += 4 * int(size)
}
for i := uint32(0); i < size; i++ {
key := int(keycard[2*i])
card := int(keycard[2*i+1]) + 1
if haveRunContainers && isRun.contains(uint16(i)) {
nb := newRunContainer16()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(uint16(key), nb, false)
} else if card > arrayDefaultMaxSize {
nb := newBitmapContainer()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
nb.cardinality = card
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
} else {
nb := newArrayContainerSize(card)
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
}
}
return int64(pos), nil
} }
func (ra *roaringArray) hasRunCompression() bool { func (ra *roaringArray) hasRunCompression() bool {

View file

@ -8,7 +8,7 @@ import (
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
) )
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) { func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) { func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2 // map header, size 2
// write "t" // write "t"
@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) { func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 2 // map header, size 2
@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *containerSerz) Msgsize() (s int) { func (z *containerSerz) Msgsize() (s int) {
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize() s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize()
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) { func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
{ {
var zajw uint8 var zajw uint8
@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z contype) EncodeMsg(en *msgp.Writer) (err error) { func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteUint8(uint8(z)) err = en.WriteUint8(uint8(z))
if err != nil { if err != nil {
@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z contype) MarshalMsg(b []byte) (o []byte, err error) { func (z contype) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
o = msgp.AppendUint8(o, uint8(z)) o = msgp.AppendUint8(o, uint8(z))
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
{ {
var zwht uint8 var zwht uint8
@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z contype) Msgsize() (s int) { func (z contype) Msgsize() (s int) {
s = msgp.Uint8Size s = msgp.Uint8Size
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) { func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) { func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4 // map header, size 4
// write "keys" // write "keys"
@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) { func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 4 // map header, size 4
@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *roaringArray) Msgsize() (s int) { func (z *roaringArray) Msgsize() (s int) {
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize
for zxhx := range z.conserz { for zxhx := range z.conserz {

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp" import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) { func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) { func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 5 // map header, size 5
// write "runstart" // write "runstart"
@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) { func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 5 // map header, size 5
@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *addHelper16) Msgsize() (s int) { func (z *addHelper16) Msgsize() (s int) {
s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3 s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3
if z.rc == nil { if z.rc == nil {
@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) {
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) { func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z interval16) EncodeMsg(en *msgp.Writer) (err error) { func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2 // map header, size 2
// write "start" // write "start"
@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z interval16) MarshalMsg(b []byte) (o []byte, err error) { func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 2 // map header, size 2
@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z interval16) Msgsize() (s int) { func (z interval16) Msgsize() (s int) {
s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) { func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) { func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2 // map header, size 2
// write "iv" // write "iv"
@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) { func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 2 // map header, size 2
@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runContainer16) Msgsize() (s int) { func (z *runContainer16) Msgsize() (s int) {
s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) { func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte var field []byte
_ = field _ = field
@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
if err != nil { if err != nil {
return return
} }
case "curSeq":
z.curSeq, err = dc.ReadInt64()
if err != nil {
return
}
default: default:
err = dc.Skip() err = dc.Skip()
if err != nil { if err != nil {
@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) { func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4 // map header, size 3
// write "rc" // write "rc"
err = en.Append(0x84, 0xa2, 0x72, 0x63) err = en.Append(0x83, 0xa2, 0x72, 0x63)
if err != nil { if err != nil {
return err return err
} }
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
if err != nil { if err != nil {
return return
} }
// write "curSeq"
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
if err != nil {
return err
}
err = en.WriteInt64(z.curSeq)
if err != nil {
return
}
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) { func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
// map header, size 4 // map header, size 3
// string "rc" // string "rc"
o = append(o, 0x84, 0xa2, 0x72, 0x63) o = append(o, 0x83, 0xa2, 0x72, 0x63)
if z.rc == nil { if z.rc == nil {
o = msgp.AppendNil(o) o = msgp.AppendNil(o)
} else { } else {
@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
// string "curPosInIndex" // string "curPosInIndex"
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78) o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
o = msgp.AppendUint16(o, z.curPosInIndex) o = msgp.AppendUint16(o, z.curPosInIndex)
// string "curSeq"
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
o = msgp.AppendInt64(o, z.curSeq)
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte var field []byte
_ = field _ = field
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
if err != nil { if err != nil {
return return
} }
case "curSeq":
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
return
}
default: default:
bts, err = msgp.Skip(bts) bts, err = msgp.Skip(bts)
if err != nil { if err != nil {
@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runIterator16) Msgsize() (s int) { func (z *runIterator16) Msgsize() (s int) {
s = 1 + 3 s = 1 + 3
if z.rc == nil { if z.rc == nil {
@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) {
} else { } else {
s += z.rc.Msgsize() s += z.rc.Msgsize()
} }
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
return return
} }
// DecodeMsg implements msgp.Decodable // Deprecated: DecodeMsg implements msgp.Decodable
func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) { func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
var zjpj uint32 var zjpj uint32
zjpj, err = dc.ReadArrayHeader() zjpj, err = dc.ReadArrayHeader()
@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
return return
} }
// EncodeMsg implements msgp.Encodable // Deprecated: EncodeMsg implements msgp.Encodable
func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) { func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteArrayHeader(uint32(len(z))) err = en.WriteArrayHeader(uint32(len(z)))
if err != nil { if err != nil {
@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
return return
} }
// MarshalMsg implements msgp.Marshaler // Deprecated: MarshalMsg implements msgp.Marshaler
func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) { func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize()) o = msgp.Require(b, z.Msgsize())
o = msgp.AppendArrayHeader(o, uint32(len(z))) o = msgp.AppendArrayHeader(o, uint32(len(z)))
@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
return return
} }
// UnmarshalMsg implements msgp.Unmarshaler // Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) { func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
var zgmo uint32 var zgmo uint32
zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts) zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts)
@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
return return
} }
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z uint16Slice) Msgsize() (s int) { func (z uint16Slice) Msgsize() (s int) {
s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size)) s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size))
return return

View file

@ -2,8 +2,6 @@ package roaring
import ( import (
"encoding/binary" "encoding/binary"
"errors"
"fmt"
"io" "io"
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
return stream.Write(buf) return stream.Write(buf)
} }
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil)
if err != nil {
return 0, err
}
return stream.Write(bts)
}
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) { func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil) bts, err := b.MarshalMsg(nil)
if err != nil { if err != nil {
@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
return stream.Write(bts) return stream.Write(bts)
} }
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) { func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b) err := msgp.Decode(stream, b)
return 0, err return 0, err
} }
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
b.iv = b.iv[:0]
b.card = 0
var numRuns uint16
err := binary.Read(stream, binary.LittleEndian, &numRuns)
if err != nil {
return 0, err
}
nr := int(numRuns)
encRun := make([]uint16, 2*nr)
by := make([]byte, 4*nr)
err = binary.Read(stream, binary.LittleEndian, &by)
if err != nil {
return 0, err
}
for i := range encRun {
if len(by) < 2 {
return 0, errCorruptedStream
}
encRun[i] = binary.LittleEndian.Uint16(by)
by = by[2:]
}
for i := 0; i < nr; i++ {
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
}
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
b.card += int64(encRun[i*2+1]) + 1
}
return 0, err
}

View file

@ -4,6 +4,7 @@ package roaring
import ( import (
"encoding/binary" "encoding/binary"
"errors"
"io" "io"
) )
@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
} }
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) { func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if b.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
// Write set // Write set
buf := make([]byte, 8*len(b.bitmap)) buf := make([]byte, 8*len(b.bitmap))
for i, v := range b.bitmap { for i, v := range b.bitmap {
@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
return by return by
} }
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 { func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 { if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2") panic("Slice size should be divisible by 2")

View file

@ -3,8 +3,10 @@
package roaring package roaring
import ( import (
"errors"
"io" "io"
"reflect" "reflect"
"runtime"
"unsafe" "unsafe"
) )
@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
} }
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) { func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if bc.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
buf := uint64SliceAsByteSlice(bc.bitmap) buf := uint64SliceAsByteSlice(bc.bitmap)
return stream.Write(buf) return stream.Write(buf)
} }
// readFrom reads an arrayContainer from stream.
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
// *before* you call readFrom. We can't guess the size in the stream
// by this point.
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return io.ReadFull(stream, buf)
}
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
buf := uint64SliceAsByteSlice(bc.bitmap)
n, err := io.ReadFull(stream, buf)
bc.computeCardinality()
return n, err
}
func uint64SliceAsByteSlice(slice []uint64) []byte { func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
header.Len *= 8 header.Len *= 8
header.Cap *= 8 header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it // return it
return *(*[]byte)(unsafe.Pointer(&header)) return result
} }
func uint16SliceAsByteSlice(slice []uint16) []byte { func uint16SliceAsByteSlice(slice []uint16) []byte {
@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
header.Len *= 2 header.Len *= 2
header.Cap *= 2 header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it // return it
return *(*[]byte)(unsafe.Pointer(&header)) return result
} }
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte { func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
// Deserialization code follows // Deserialization code follows
func byteSliceAsUint16Slice(slice []byte) []uint16 { ////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 { if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2") panic("Slice size should be divisible by 2")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 2 rHeader.Data = bHeader.Data
header.Cap /= 2 rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]uint16)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }
func byteSliceAsUint64Slice(slice []byte) []uint64 { func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 { if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8") panic("Slice size should be divisible by 8")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 8 rHeader.Data = bHeader.Data
header.Cap /= 8 rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]uint64)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }
func byteSliceAsInterval16Slice(slice []byte) []interval16 { func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 { if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4") panic("Slice size should be divisible by 4")
} }
// reference: https://go101.org/article/unsafe.html
// make a new slice header // make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice)) bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length // transfer the data from the given slice to a new variable (our result)
header.Len /= 4 rHeader.Data = bHeader.Data
header.Cap /= 4 rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// return it // instantiate result and use KeepAlive so data isn't unmapped.
return *(*[]interval16)(unsafe.Pointer(&header)) runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
} }

View file

@ -5,6 +5,12 @@ type shortIterable interface {
next() uint16 next() uint16
} }
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct { type shortIterator struct {
slice []uint16 slice []uint16
loc int loc int
@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 {
si.loc++ si.loc++
return a return a
} }
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct {
slice []uint16
loc int
}
func (si *reverseIterator) hasNext() bool {
return si.loc >= 0
}
func (si *reverseIterator) next() uint16 {
a := si.slice[si.loc]
si.loc--
return a
}

View file

@ -14,6 +14,17 @@ const (
serialCookie = 12347 // runs, arrays, and bitmaps serialCookie = 12347 // runs, arrays, and bitmaps
noOffsetThreshold = 4 noOffsetThreshold = 4
// MaxUint32 is the largest uint32 value.
MaxUint32 = 4294967295
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
// bound for ranges.
MaxRange uint64 = MaxUint32 + 1
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
MaxUint16 = 65535
// Compute wordSizeInBytes, the size of a word in bytes. // Compute wordSizeInBytes, the size of a word in bytes.
_m = ^uint64(0) _m = ^uint64(0)
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1 _logS = _m>>8&1 + _m>>16&1 + _m>>32&1
@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) {
endword := (end - 1) / 64 endword := (end - 1) / 64
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64)) bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
for i := firstword; i < endword; i++ { for i := firstword; i < endword; i++ {
//p("flipBitmapRange on i=%v", i)
bitmap[i] = ^bitmap[i] bitmap[i] = ^bitmap[i]
} }
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64) bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
} }
return b return b
} }
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}

View file

@ -3,9 +3,9 @@ sudo: false
language: go language: go
go: go:
- "1.9.x"
- "1.10.x" - "1.10.x"
- "1.11.x" - "1.11.x"
- "1.12.x"
script: script:
- go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/cover
@ -15,7 +15,12 @@ script:
- gvt restore - gvt restore
- go test -race -v $(go list ./... | grep -v vendor/) - go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/) - go vet $(go list ./... | grep -v vendor/)
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/) - go test ./test -v -indexType scorch
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
else
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
fi
- docs/project-code-coverage.sh - docs/project-code-coverage.sh
- docs/build_children.sh - docs/build_children.sh

View file

@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
return fieldLength, tokenFreqs return fieldLength, tokenFreqs
} }
func (t *TextField) Analyzer() *analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte { func (t *TextField) Value() []byte {
return t.value return t.value
} }

View file

@ -37,6 +37,12 @@ var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
// Point represents a geo point.
type Point struct {
Lon float64
Lat float64
}
// MortonHash computes the morton hash value for the provided geo point // MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat. // This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 { func MortonHash(lon, lat float64) uint64 {
@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
} }
return nil return nil
} }
func BoundingRectangleForPolygon(polygon []Point) (
float64, float64, float64, float64, error) {
err := checkLongitude(polygon[0].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[0].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY, minY := polygon[0].Lat, polygon[0].Lat
maxX, minX := polygon[0].Lon, polygon[0].Lon
for i := 1; i < len(polygon); i++ {
err := checkLongitude(polygon[i].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[i].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY = math.Max(maxY, polygon[i].Lat)
minY = math.Min(minY, polygon[i].Lat)
maxX = math.Max(maxX, polygon[i].Lon)
minX = math.Min(minX, polygon[i].Lon)
}
return minX, maxY, maxX, minY, nil
}

View file

@ -1,32 +1,21 @@
// The code here was obtained from: // Copyright (c) 2019 Couchbase, Inc.
// https://github.com/mmcloughlin/geohash //
// Licensed under the Apache License, Version 2.0 (the "License");
// The MIT License (MIT) // you may not use this file except in compliance with the License.
// Copyright (c) 2015 Michael McLoughlin // You may obtain a copy of the License at
// Permission is hereby granted, free of charge, to any person obtaining a copy //
// of this software and associated documentation files (the "Software"), to deal // http://www.apache.org/licenses/LICENSE-2.0
// in the Software without restriction, including without limitation the rights //
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // Unless required by applicable law or agreed to in writing, software
// copies of the Software, and to permit persons to whom the Software is // distributed under the License is distributed on an "AS IS" BASIS,
// furnished to do so, subject to the following conditions: // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// The above copyright notice and this permission notice shall be included in all // limitations under the License.
// copies or substantial portions of the Software. // This implementation is inspired from the geohash-js
// ref: https://github.com/davetroy/geohash-js
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
package geo package geo
import (
"math"
)
// encoding encapsulates an encoding defined by a given base32 alphabet. // encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct { type encoding struct {
enc string enc string
@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
return e return e
} }
// Decode string into bits of a 64-bit word. The string s may be at most 12 // base32encoding with the Geohash alphabet.
// characters.
func (e *encoding) decode(s string) uint64 {
x := uint64(0)
for i := 0; i < len(s); i++ {
x = (x << 5) | uint64(e.dec[s[i]])
}
return x
}
// Encode bits of 64-bit word into a string.
func (e *encoding) encode(x uint64) string {
b := [12]byte{}
for i := 0; i < 12; i++ {
b[11-i] = e.enc[x&0x1f]
x >>= 5
}
return string(b[:])
}
// Base32Encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
// BoundingBox returns the region encoded by the given string geohash. var masks = []uint64{16, 8, 4, 2, 1}
func geoBoundingBox(hash string) geoBox {
bits := uint(5 * len(hash))
inthash := base32encoding.decode(hash)
return geoBoundingBoxIntWithPrecision(inthash, bits)
}
// Box represents a rectangle in latitude/longitude space. // DecodeGeoHash decodes the string geohash faster with
type geoBox struct { // higher precision. This api is in experimental phase.
minLat float64 func DecodeGeoHash(geoHash string) (float64, float64) {
maxLat float64 even := true
minLng float64 lat := []float64{-90.0, 90.0}
maxLng float64 lon := []float64{-180.0, 180.0}
}
// Round returns a point inside the box, making an effort to round to minimal for i := 0; i < len(geoHash); i++ {
// precision. cd := uint64(base32encoding.dec[geoHash[i]])
func (b geoBox) round() (lat, lng float64) { for j := 0; j < 5; j++ {
x := maxDecimalPower(b.maxLat - b.minLat) if even {
lat = math.Ceil(b.minLat/x) * x if cd&masks[j] > 0 {
x = maxDecimalPower(b.maxLng - b.minLng) lon[0] = (lon[0] + lon[1]) / 2
lng = math.Ceil(b.minLng/x) * x } else {
return lon[1] = (lon[0] + lon[1]) / 2
}
// precalculated for performance
var exp232 = math.Exp2(32)
// errorWithPrecision returns the error range in latitude and longitude for in
// integer geohash with bits of precision.
func errorWithPrecision(bits uint) (latErr, lngErr float64) {
b := int(bits)
latBits := b / 2
lngBits := b - latBits
latErr = math.Ldexp(180.0, -latBits)
lngErr = math.Ldexp(360.0, -lngBits)
return
}
// minDecimalPlaces returns the minimum number of decimal places such that
// there must exist an number with that many places within any range of width
// r. This is intended for returning minimal precision coordinates inside a
// box.
func maxDecimalPower(r float64) float64 {
m := int(math.Floor(math.Log10(r)))
return math.Pow10(m)
}
// Encode the position of x within the range -r to +r as a 32-bit integer.
func encodeRange(x, r float64) uint32 {
p := (x + r) / (2 * r)
return uint32(p * exp232)
}
// Decode the 32-bit range encoding X back to a value in the range -r to +r.
func decodeRange(X uint32, r float64) float64 {
p := float64(X) / exp232
x := 2*r*p - r
return x
}
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
// ignored, and may take any value.
func squash(X uint64) uint32 {
X &= 0x5555555555555555
X = (X | (X >> 1)) & 0x3333333333333333
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
X = (X | (X >> 8)) & 0x0000ffff0000ffff
X = (X | (X >> 16)) & 0x00000000ffffffff
return uint32(X)
}
// Deinterleave the bits of X into 32-bit words containing the even and odd
// bitlevels of X, respectively.
func deinterleave(X uint64) (uint32, uint32) {
return squash(X), squash(X >> 1)
}
// BoundingBoxIntWithPrecision returns the region encoded by the integer
// geohash with the specified precision.
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
fullHash := hash << (64 - bits)
latInt, lngInt := deinterleave(fullHash)
lat := decodeRange(latInt, 90)
lng := decodeRange(lngInt, 180)
latErr, lngErr := errorWithPrecision(bits)
return geoBox{
minLat: lat,
maxLat: lat + latErr,
minLng: lng,
maxLng: lng + lngErr,
} }
} else {
if cd&masks[j] > 0 {
lat[0] = (lat[0] + lat[1]) / 2
} else {
lat[1] = (lat[0] + lat[1]) / 2
}
}
even = !even
}
}
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
} }
// ---------------------------------------------------------------------- func EncodeGeoHash(lat, lon float64) string {
even := true
lats := []float64{-90.0, 90.0}
lons := []float64{-180.0, 180.0}
precision := 12
var ch, bit uint64
var geoHash string
// Decode the string geohash to a (lat, lng) point. for len(geoHash) < precision {
func GeoHashDecode(hash string) (lat, lng float64) { if even {
box := geoBoundingBox(hash) mid := (lons[0] + lons[1]) / 2
return box.round() if lon > mid {
ch |= masks[bit]
lons[0] = mid
} else {
lons[1] = mid
}
} else {
mid := (lats[0] + lats[1]) / 2
if lat > mid {
ch |= masks[bit]
lats[0] = mid
} else {
lats[1] = mid
}
}
even = !even
if bit < 4 {
bit++
} else {
geoHash += string(base32encoding.enc[ch])
ch = 0
bit = 0
}
}
return geoHash
} }

View file

@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
} }
} else { } else {
// geohash // geohash
lat, lon = GeoHashDecode(geoStr) lat, lon = DecodeGeoHash(geoStr)
foundLat = true foundLat = true
foundLon = true foundLon = true
} }

View file

@ -117,6 +117,8 @@ func (b *Batch) String() string {
// be re-used in the future. // be re-used in the future.
func (b *Batch) Reset() { func (b *Batch) Reset() {
b.internal.Reset() b.internal.Reset()
b.lastDocSize = 0
b.totalSize = 0
} }
func (b *Batch) Merge(o *Batch) { func (b *Batch) Merge(o *Batch) {

View file

@ -121,6 +121,10 @@ type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
} }
type IndexReaderContains interface {
FieldDictContains(field string) (FieldDictContains, error)
}
// FieldTerms contains the terms used by a document, keyed by field // FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string type FieldTerms map[string][]string
@ -230,6 +234,10 @@ type FieldDict interface {
Close() error Close() error
} }
type FieldDictContains interface {
Contains(key []byte) (bool, error)
}
// DocIDReader is the interface exposing enumeration of documents identifiers. // DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources. // Close the reader to release associated resources.
type DocIDReader interface { type DocIDReader interface {

View file

@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
fileSegments++ fileSegments++
} }
} }
} }
// before the newMerge introduction, need to clean the newly // before the newMerge introduction, need to clean the newly
@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
} }
} }
} }
// In case where all the docs in the newly merged segment getting // In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction. // deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil && if nextMerge.new != nil &&
@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize() newSnapshot.updateSize()
s.rootLock.Lock() s.rootLock.Lock()
// swap in new index snapshot // swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch newSnapshot.epoch = s.nextSnapshotEpoch
@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
} }
newSnapshot.updateSize() newSnapshot.updateSize()
// swap in new snapshot // swap in new snapshot
rootPrev := s.root rootPrev := s.root
s.root = newSnapshot s.root = newSnapshot

View file

@ -18,6 +18,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os" "os"
"strings"
"sync/atomic" "sync/atomic"
"time" "time"
@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil return nil
} }
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now // process tasks in serial for now
var notifications []chan *IndexSnapshot var notifications []chan *IndexSnapshot
var filenames []string
for _, task := range resultMergePlan.Tasks { for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 { if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
segmentsToMerge = append(segmentsToMerge, zapSeg) segmentsToMerge = append(segmentsToMerge, zapSeg)
docsToDrop = append(docsToDrop, segSnapshot.deleted) docsToDrop = append(docsToDrop, segSnapshot.deleted)
} }
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := zapSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
} }
} }
} }
@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err return err
} }
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
if err != nil {
s.unmarkIneligibleForRemoval(filename)
return fmt.Errorf("merge validation failed: %v", err)
}
oldNewDocNums = make(map[uint64][]uint64) oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums { for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
} }
} }
// once all the newly merged segment introductions are done,
// its safe to unflip the removal ineligibility for the replaced
// older segments
for _, f := range filenames {
s.unmarkIneligibleForRemoval(f)
}
return nil return nil
} }
@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotMemMergeErr, 1) atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err return nil, 0, err
} }
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
if err != nil {
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
}
// update persisted stats // update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())

View file

@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64 var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions() po, err := s.parsePersisterOptions()
if err != nil { if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@ -111,7 +114,6 @@ OUTER:
if ew != nil && ew.epoch > lastMergedEpoch { if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch lastMergedEpoch = ew.epoch
} }
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers, po) lastMergedEpoch, persistWatchers, po)
@ -150,11 +152,25 @@ OUTER:
_ = ourSnapshot.DecRef() _ = ourSnapshot.DecRef()
break OUTER break OUTER
} }
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef() _ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER continue OUTER
} }
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks { for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err) ourPersistedCallbacks[i](err)
} }
@ -179,7 +195,6 @@ OUTER:
s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed { if changed {
s.removeOldData()
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER continue OUTER
} }
@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext return watchersNext
} }
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind // First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// check the merger lag by counting the segment files on disk, // Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _ := s.diskFileStats()
// On finding fewer files on disk, persister takes a short pause // On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next // for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop. // memory merge cum persist loop.
// On finding too many files on disk, persister pause until the merger
// catches up to reduce the segment file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
numFilesOnDisk, _ := s.diskFileStats()
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 { po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select { select {
@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
return lastMergedEpoch, persistWatchers return lastMergedEpoch, persistWatchers
} }
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
numFilesOnDisk, _ = s.diskFileStats()
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER: OUTER:
for po.PersisterNapUnderNumFiles > 0 && for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
} }
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
rv := &IndexSnapshot{ rv := &IndexSnapshot{
parent: s, parent: s,
internal: make(map[string][]byte), internal: make(map[string][]byte),
refs: 1, refs: 1,
creator: "loadSnapshot", creator: "loadSnapshot",
} }
var running uint64 var running uint64
c := snapshot.Cursor() c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() { for k, _ := c.First(); k != nil; k, _ = c.Next() {
@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
running += segmentSnapshot.segment.Count() running += segmentSnapshot.segment.Count()
} }
} }
return rv, nil return rv, nil
} }
@ -750,13 +774,12 @@ func (s *Scorch) removeOldData() {
if err != nil { if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err)) s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
} }
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
if removed > 0 {
err = s.removeOldZapFiles() err = s.removeOldZapFiles()
if err != nil { if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err)) s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
} }
}
} }
// NumSnapshotsToKeep represents how many recent, old snapshots to // NumSnapshotsToKeep represents how many recent, old snapshots to

View file

@ -41,12 +41,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed") var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct { type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool readOnly bool
version uint8 version uint8
config map[string]interface{} config map[string]interface{}
analysisQueue *index.AnalysisQueue analysisQueue *index.AnalysisQueue
stats Stats
nextSegmentID uint64
path string path string
unsafeBatch bool unsafeBatch bool
@ -73,8 +75,6 @@ type Scorch struct {
onEvent func(event Event) onEvent func(event Event)
onAsyncError func(err error) onAsyncError func(err error)
iStats internalStats
pauseLock sync.RWMutex pauseLock sync.RWMutex
pauseCount uint64 pauseCount uint64
@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis? // FIXME could sort ids list concurrent with analysis?
if len(batch.IndexOps) > 0 { if numUpdates > 0 {
go func() { go func() {
for _, doc := range batch.IndexOps { for _, doc := range batch.IndexOps {
if doc != nil { if doc != nil {
@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["CurOnDiskBytes"] = numBytesUsedDisk m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk m["CurOnDiskFiles"] = numFilesOnDisk
s.rootLock.RLock()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
// TODO: consider one day removing these backwards compatible // TODO: consider one day removing these backwards compatible
// names for apps using the old names // names for apps using the old names
m["updates"] = m["TotUpdates"] m["updates"] = m["TotUpdates"]

View file

@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{} return &EmptyDictionaryIterator{}
} }
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyDictionaryIterator struct{} type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil return nil, nil
} }
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return false, nil
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil return nil, nil
} }

View file

@ -19,7 +19,10 @@
package segment package segment
import "fmt" import (
"errors"
"fmt"
)
const ( const (
MaxVarintSize = 9 MaxVarintSize = 9
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
} }
return b[length:], v, nil return b[length:], v, nil
} }
// ------------------------------------------------------------
type MemUvarintReader struct {
C int // index of next byte to read from S
S []byte
}
func NewMemUvarintReader(s []byte) *MemUvarintReader {
return &MemUvarintReader{S: s}
}
// Len returns the number of unread bytes.
func (r *MemUvarintReader) Len() int {
n := len(r.S) - r.C
if n < 0 {
return 0
}
return n
}
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
// ReadUvarint reads an encoded uint64. The original code this was
// based on is at encoding/binary/ReadUvarint().
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
var x uint64
var s uint
var C = r.C
var S = r.S
for {
b := S[C]
C++
if b < 0x80 {
r.C = C
// why 63? The original code had an 'i += 1' loop var and
// checked for i > 9 || i == 9 ...; but, we no longer
// check for the i var, but instead check here for s,
// which is incremented by 7. So, 7*9 == 63.
//
// why the "extra" >= check? The normal case is that s <
// 63, so we check this single >= guard first so that we
// hit the normal, nil-error return pathway sooner.
if s >= 63 && (s > 63 || s == 63 && b > 1) {
return 0, ErrMemUvarintReaderOverflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}
// SkipUvarint skips ahead one encoded uint64.
func (r *MemUvarintReader) SkipUvarint() {
for {
b := r.S[r.C]
r.C++
if b < 0x80 {
return
}
}
}
// SkipBytes skips a count number of bytes.
func (r *MemUvarintReader) SkipBytes(count int) {
r.C = r.C + count
}
func (r *MemUvarintReader) Reset(s []byte) {
r.C = 0
r.S = s
}

View file

@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
s = s.Sub[0] s = s.Sub[0]
} }
if s.Op == syntax.OpLiteral { if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune) return string(s.Rune)
} }

View file

@ -59,6 +59,8 @@ type TermDictionary interface {
AutomatonIterator(a vellum.Automaton, AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
Contains(key []byte) (bool, error)
} }
type DictionaryIterator interface { type DictionaryIterator interface {

View file

@ -16,6 +16,7 @@ package zap
import ( import (
"bufio" "bufio"
"github.com/couchbase/vellum"
"math" "math"
"os" "os"
) )
@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
docValueOffset: docValueOffset, docValueOffset: docValueOffset,
dictLocs: dictLocs, dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader), fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
} }
sb.updateSize() sb.updateSize()

View file

@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
return rv return rv
} }
func (d *Dictionary) Contains(key []byte) (bool, error) {
return d.fst.Contains(key)
}
// Iterator returns an iterator for this dictionary // Iterator returns an iterator for this dictionary
func (d *Dictionary) Iterator() segment.DictionaryIterator { func (d *Dictionary) Iterator() segment.DictionaryIterator {
rv := &DictionaryIterator{ rv := &DictionaryIterator{
@ -143,12 +147,15 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
} }
// need to increment the end position to be inclusive // need to increment the end position to be inclusive
endBytes := []byte(end) var endBytes []byte
if len(end) > 0 {
endBytes = []byte(end)
if endBytes[len(endBytes)-1] < 0xff { if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++ endBytes[len(endBytes)-1]++
} else { } else {
endBytes = append(endBytes, 0xff) endBytes = append(endBytes, 0xff)
} }
}
if d.fst != nil { if d.fst != nil {
itr, err := d.fst.Iterator([]byte(start), endBytes) itr, err := d.fst.Iterator([]byte(start), endBytes)

View file

@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
type docVisitState struct { type docVisitState struct {
dvrs map[uint16]*docValueReader dvrs map[uint16]*docValueReader
segment *Segment segment *SegmentBase
} }
type docValueReader struct { type docValueReader struct {
@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields // get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted { if fieldDvLocStart == fieldNotUninverted {
return nil, fmt.Errorf("loadFieldDocValueReader: "+ // no docValues found, nothing to do
"no docValues found for field: %s", field) return nil, nil
} }
// read the number of chunks, and chunk offsets position // read the number of chunks, and chunk offsets position
@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets // acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
} else {
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
} }
fdvIter := &docValueReader{ fdvIter := &docValueReader{
@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
// VisitDocumentFieldTerms is an implementation of the // VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface // DocumentFieldTermVisitable interface
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) { segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState) dvs, ok := dvsIn.(*docVisitState)
@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded // check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() { if docInChunk != dvr.curChunkNumber() {
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) err := dvr.loadDvChunk(docInChunk, s)
if err != nil { if err != nil {
return dvs, err return dvs, err
} }
@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
// VisitableDocValueFields returns the list of fields with // VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the // persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method. // VisitDocumentFieldTerms method.
func (s *Segment) VisitableDocValueFields() ([]string, error) { func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil return s.fieldDvNames, nil
} }

View file

@ -31,6 +31,14 @@ import (
var DefaultFileMergerBufferSize = 1024 * 1024 var DefaultFileMergerBufferSize = 1024 * 1024
// ValidateMerge can be set by applications to perform additional checks
// on a new segment produced by a merge, by default this does nothing.
// Caller should provide EITHER segments or memSegments, but not both.
// This API is experimental and may be removed at any time.
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
return nil
}
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which // Merge takes a slice of zap segments and bit masks describing which

View file

@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0 var NewSegmentBufferNumResultsFactor float64 = 1.0
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
// ValidateDocFields can be set by applications to perform additional checks
// on fields in a document being added to a new segment, by default it does
// nothing.
// This API is experimental and may be removed at any time.
var ValidateDocFields = func(field document.Field) error {
return nil
}
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded // AnalysisResultsToSegmentBase produces an in-memory zap-encoded
// SegmentBase from analysis results // SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
if opts.IncludeDocValues() { if opts.IncludeDocValues() {
s.IncludeDocValues[fieldID] = true s.IncludeDocValues[fieldID] = true
} }
err := ValidateDocFields(field)
if err != nil {
return 0, err
}
} }
var curr int var curr int

View file

@ -15,10 +15,8 @@
package zap package zap
import ( import (
"bytes"
"encoding/binary" "encoding/binary"
"fmt" "fmt"
"io"
"math" "math"
"reflect" "reflect"
@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
} }
rv.postings = p rv.postings = p
rv.includeFreqNorm = includeFreq || includeNorm rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
rv.includeLocs = includeLocs rv.includeLocs = includeLocs
if p.normBits1Hit != 0 { if p.normBits1Hit != 0 {
@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
// Count returns the number of items on this postings list // Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 { func (p *PostingsList) Count() uint64 {
var n uint64 var n, e uint64
if p.normBits1Hit != 0 { if p.normBits1Hit != 0 {
n = 1 n = 1
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
e = 1
}
} else if p.postings != nil { } else if p.postings != nil {
n = p.postings.GetCardinality() n = p.postings.GetCardinality()
}
var e uint64
if p.except != nil { if p.except != nil {
e = p.except.GetCardinality() e = p.postings.AndCardinality(p.except)
} }
if n <= e {
return 0
} }
return n - e return n - e
} }
@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
// PostingsIterator provides a way to iterate through the postings list // PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct { type PostingsIterator struct {
postings *PostingsList postings *PostingsList
all roaring.IntIterable all roaring.IntPeekable
Actual roaring.IntIterable Actual roaring.IntPeekable
ActualBM *roaring.Bitmap ActualBM *roaring.Bitmap
currChunk uint32 currChunk uint32
currChunkFreqNorm []byte currChunkFreqNorm []byte
currChunkLoc []byte currChunkLoc []byte
freqNormReader *bytes.Reader freqNormReader *segment.MemUvarintReader
locReader *bytes.Reader locReader *segment.MemUvarintReader
freqChunkOffsets []uint64 freqChunkOffsets []uint64
freqChunkStart uint64 freqChunkStart uint64
@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end] i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil { if i.freqNormReader == nil {
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm) i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
} else { } else {
i.freqNormReader.Reset(i.currChunkFreqNorm) i.freqNormReader.Reset(i.currChunkFreqNorm)
} }
@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e end += e
i.currChunkLoc = i.postings.sb.mem[start:end] i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil { if i.locReader == nil {
i.locReader = bytes.NewReader(i.currChunkLoc) i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
} else { } else {
i.locReader.Reset(i.currChunkLoc) i.locReader.Reset(i.currChunkLoc)
} }
@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
return 1, i.normBits1Hit, false, nil return 1, i.normBits1Hit, false, nil
} }
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader) freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil { if err != nil {
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err) return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
} }
freq, hasLocs := decodeFreqHasLocs(freqHasLocs) freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
normBits, err := binary.ReadUvarint(i.freqNormReader) normBits, err := i.freqNormReader.ReadUvarint()
if err != nil { if err != nil {
return 0, 0, false, fmt.Errorf("error reading norm: %v", err) return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
} }
return freq, normBits, hasLocs, err return freq, normBits, hasLocs, nil
}
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
if i.normBits1Hit != 0 {
return false, nil
}
freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return false, fmt.Errorf("error reading freqHasLocs: %v", err)
}
i.freqNormReader.SkipUvarint() // Skip normBits.
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
} }
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 { func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
@ -449,59 +462,54 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
} }
// readLocation processes all the integers on the stream representing a single // readLocation processes all the integers on the stream representing a single
// location. if you care about it, pass in a non-nil location struct, and we // location.
// will fill it. if you don't care about it, pass in nil and we safely consume
// the contents.
func (i *PostingsIterator) readLocation(l *Location) error { func (i *PostingsIterator) readLocation(l *Location) error {
// read off field // read off field
fieldID, err := binary.ReadUvarint(i.locReader) fieldID, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location field: %v", err) return fmt.Errorf("error reading location field: %v", err)
} }
// read off pos // read off pos
pos, err := binary.ReadUvarint(i.locReader) pos, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location pos: %v", err) return fmt.Errorf("error reading location pos: %v", err)
} }
// read off start // read off start
start, err := binary.ReadUvarint(i.locReader) start, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location start: %v", err) return fmt.Errorf("error reading location start: %v", err)
} }
// read off end // read off end
end, err := binary.ReadUvarint(i.locReader) end, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location end: %v", err) return fmt.Errorf("error reading location end: %v", err)
} }
// read off num array pos // read off num array pos
numArrayPos, err := binary.ReadUvarint(i.locReader) numArrayPos, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err) return fmt.Errorf("error reading location num array pos: %v", err)
} }
// group these together for less branching
if l != nil {
l.field = i.postings.sb.fieldsInv[fieldID] l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos l.pos = pos
l.start = start l.start = start
l.end = end l.end = end
if cap(l.ap) < int(numArrayPos) { if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos)) l.ap = make([]uint64, int(numArrayPos))
} else { } else {
l.ap = l.ap[:int(numArrayPos)] l.ap = l.ap[:int(numArrayPos)]
} }
}
// read off array positions // read off array positions
for k := 0; k < int(numArrayPos); k++ { for k := 0; k < int(numArrayPos); k++ {
ap, err := binary.ReadUvarint(i.locReader) ap, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading array position: %v", err) return fmt.Errorf("error reading array position: %v", err)
} }
if l != nil {
l.ap[k] = ap l.ap[k] = ap
} }
}
return nil return nil
} }
@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
} }
rv.locs = i.nextSegmentLocs[:0] rv.locs = i.nextSegmentLocs[:0]
numLocsBytes, err := binary.ReadUvarint(i.locReader) numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err) return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
} }
@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
if hasLocs { if hasLocs {
startLoc := len(i.currChunkLoc) - i.locReader.Len() startLoc := len(i.currChunkLoc) - i.locReader.Len()
numLocsBytes, err := binary.ReadUvarint(i.locReader) numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return 0, 0, 0, nil, nil, return 0, 0, 0, nil, nil,
fmt.Errorf("error reading location nextBytes numLocs: %v", err) fmt.Errorf("error reading location nextBytes numLocs: %v", err)
} }
// skip over all the location bytes // skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) i.locReader.SkipBytes(int(numLocsBytes))
if err != nil {
return 0, 0, 0, nil, nil, err
}
endLoc := len(i.currChunkLoc) - i.locReader.Len() endLoc := len(i.currChunkLoc) - i.locReader.Len()
bytesLoc = i.currChunkLoc[startLoc:endLoc] bytesLoc = i.currChunkLoc[startLoc:endLoc]
@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
return i.nextDocNumAtOrAfterClean(atOrAfter) return i.nextDocNumAtOrAfterClean(atOrAfter)
} }
n := i.Actual.Next() i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next() if !i.Actual.HasNext() {
}
if uint64(n) < atOrAfter {
// couldn't find anything // couldn't find anything
return 0, false, nil return 0, false, nil
} }
n := i.Actual.Next()
allN := i.all.Next() allN := i.all.Next()
nChunk := n / i.postings.sb.chunkFactor nChunk := n / i.postings.sb.chunkFactor
@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
// no deletions) where the all bitmap is the same as the actual bitmap // no deletions) where the all bitmap is the same as the actual bitmap
func (i *PostingsIterator) nextDocNumAtOrAfterClean( func (i *PostingsIterator) nextDocNumAtOrAfterClean(
atOrAfter uint64) (uint64, bool, error) { atOrAfter uint64) (uint64, bool, error) {
n := i.Actual.Next()
if !i.includeFreqNorm { if !i.includeFreqNorm {
for uint64(n) < atOrAfter && i.Actual.HasNext() { i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
n = i.Actual.Next()
}
if uint64(n) < atOrAfter { if !i.Actual.HasNext() {
return 0, false, nil // couldn't find anything return 0, false, nil // couldn't find anything
} }
return uint64(n), true, nil return uint64(i.Actual.Next()), true, nil
} }
// freq-norm's needed, so maintain freq-norm chunk reader // freq-norm's needed, so maintain freq-norm chunk reader
sameChunkNexts := 0 // # of times we called Next() in the same chunk sameChunkNexts := 0 // # of times we called Next() in the same chunk
n := i.Actual.Next()
nChunk := n / i.postings.sb.chunkFactor nChunk := n / i.postings.sb.chunkFactor
for uint64(n) < atOrAfter && i.Actual.HasNext() { for uint64(n) < atOrAfter && i.Actual.HasNext() {
@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
} }
// read off freq/offsets even though we don't care about them // read off freq/offsets even though we don't care about them
_, _, hasLocs, err := i.readFreqNormHasLocs() hasLocs, err := i.skipFreqNormReadHasLocs()
if err != nil { if err != nil {
return err return err
} }
if i.includeLocs && hasLocs { if i.includeLocs && hasLocs {
numLocsBytes, err := binary.ReadUvarint(i.locReader) numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil { if err != nil {
return fmt.Errorf("error reading location numLocsBytes: %v", err) return fmt.Errorf("error reading location numLocsBytes: %v", err)
} }
// skip over all the location bytes // skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent) i.locReader.SkipBytes(int(numLocsBytes))
if err != nil {
return err
}
} }
return nil return nil

View file

@ -20,8 +20,8 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"reflect"
"sync" "sync"
"unsafe"
"github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
func init() { func init() {
var sb SegmentBase var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
} }
// Open returns a zap impl of a segment // Open returns a zap impl of a segment
@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
mem: mm[0 : len(mm)-FooterSize], mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16), fieldsMap: make(map[string]uint16),
fieldDvReaders: make(map[uint16]*docValueReader), fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
}, },
f: f, f: f,
mm: mm, mm: mm,
@ -101,6 +102,9 @@ type SegmentBase struct {
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
fieldDvNames []string // field names cached in fieldDvReaders fieldDvNames []string // field names cached in fieldDvReaders
size uint64 size uint64
m sync.Mutex
fieldFSTs map[uint16]*vellum.FST
} }
func (sb *SegmentBase) Size() int { func (sb *SegmentBase) Size() int {
@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
dictStart := sb.dictLocs[rv.fieldID] dictStart := sb.dictLocs[rv.fieldID]
if dictStart > 0 { if dictStart > 0 {
var ok bool
sb.m.Lock()
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
// read the length of the vellum data // read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64]) vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen] fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
if fstBytes != nil {
rv.fst, err = vellum.Load(fstBytes) rv.fst, err = vellum.Load(fstBytes)
if err != nil { if err != nil {
sb.m.Unlock()
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
} }
sb.fieldFSTs[rv.fieldID] = rv.fst
}
sb.m.Unlock()
rv.fstReader, err = rv.fst.Reader() rv.fstReader, err = rv.fst.Reader()
if err != nil { if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
} }
}
} }
} }
@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
} }
func (s *SegmentBase) loadDvReaders() error { func (s *SegmentBase) loadDvReaders() error {
if s.docValueOffset == fieldNotUninverted { if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
return nil return nil
} }
@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
} }
read += uint64(n) read += uint64(n)
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
if err != nil {
return err
}
if fieldDvReader != nil { if fieldDvReader != nil {
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, field) s.fieldDvNames = append(s.fieldDvNames, field)

View file

@ -28,13 +28,14 @@ import (
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment" "github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum" "github.com/couchbase/vellum"
lev2 "github.com/couchbase/vellum/levenshtein2" lev "github.com/couchbase/vellum/levenshtein"
) )
// re usable, threadsafe levenshtein builders // re usable, threadsafe levenshtein builders
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct { type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator dictItr segment.DictionaryIterator
index int index int
@ -51,11 +52,11 @@ func init() {
var is interface{} = IndexSnapshot{} var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error var err error
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil { if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
} }
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil { if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
} }
@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
} }
} }
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult) results := make(chan *asynchSegmentResult)
for index, segment := range i.segment { for index, segment := range i.segment {
@ -134,9 +137,13 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
dict, err := segment.segment.Dictionary(field) dict, err := segment.segment.Dictionary(field)
if err != nil { if err != nil {
results <- &asynchSegmentResult{err: err} results <- &asynchSegmentResult{err: err}
} else {
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else { } else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)} results <- &asynchSegmentResult{dictItr: makeItr(dict)}
} }
}
}(index, segment) }(index, segment)
} }
@ -150,6 +157,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil { if asr.err != nil && err == nil {
err = asr.err err = asr.err
} else { } else {
if !randomLookup {
next, err2 := asr.dictItr.Next() next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil { if err2 != nil && err == nil {
err = err2 err = err2
@ -160,14 +168,22 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
curr: *next, curr: *next,
}) })
} }
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
dict: asr.dict,
})
}
} }
} }
// after ensuring we've read all items on channel // after ensuring we've read all items on channel
if err != nil { if err != nil {
return nil, err return nil, err
} }
if !randomLookup {
// prepare heap // prepare heap
heap.Init(rv) heap.Init(rv)
}
return rv, nil return rv, nil
} }
@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator() return i.Iterator()
}) }, false)
} }
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) { endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm)) return i.RangeIterator(string(startTerm), string(endTerm))
}) }, false)
} }
func (i *IndexSnapshot) FieldDictPrefix(field string, func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) { termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix)) return i.PrefixIterator(string(termPrefix))
}) }, false)
} }
func (i *IndexSnapshot) FieldDictRegexp(field string, func (i *IndexSnapshot) FieldDictRegexp(field string,
@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd) return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}) }, false)
} }
func (i *IndexSnapshot) getLevAutomaton(term string, func (i *IndexSnapshot) getLevAutomaton(term string,
@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd) return i.AutomatonIterator(a, prefixBeg, prefixEnd)
}) }, false)
} }
func (i *IndexSnapshot) FieldDictOnly(field string, func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount) return i.OnlyIterator(onlyTerms, includeCount)
}) }, false)
}
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return i.newIndexSnapshotFieldDict(field, nil, true)
} }
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {

View file

@ -22,6 +22,7 @@ import (
) )
type segmentDictCursor struct { type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator itr segment.DictionaryIterator
curr index.DictEntry curr index.DictEntry
} }
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error { func (i *IndexSnapshotFieldDict) Close() error {
return nil return nil
} }
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}

View file

@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
} }
type cachedDocs struct { type cachedDocs struct {
size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field cache map[string]*cachedFieldDocs // Keyed by field
size uint64
} }
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {

View file

@ -107,6 +107,9 @@ type Stats struct {
TotFileMergeIntroductionsDone uint64 TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64 TotFileMergeIntroductionsSkipped uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64 TotMemMergeBeg uint64
TotMemMergeErr uint64 TotMemMergeErr uint64
TotMemMergeDone uint64 TotMemMergeDone uint64

View file

@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock // do analysis before acquiring write lock
analysisStart := time.Now() analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult) resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan) aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return return
} }
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update // start a writer for this update
indexStart := time.Now() indexStart := time.Now()
var kvwriter store.KVWriter var kvwriter store.KVWriter
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil { if err == nil {
atomic.AddUint64(&udc.stats.updates, 1) atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else { } else {
atomic.AddUint64(&udc.stats.errors, 1) atomic.AddUint64(&udc.stats.errors, 1)
} }
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
} }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now() analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
} }
} }
if len(batch.IndexOps) > 0 { if numUpdates > 0 {
go func() { go func() {
for _, doc := range batch.IndexOps { for _, doc := range batch.IndexOps {
if doc != nil { if doc != nil {
@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.errors, 1) atomic.AddUint64(&udc.stats.errors, 1)
} }
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
persistedCallback(err)
}
return return
} }

View file

@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Sort: req.Sort.Copy(), Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations, IncludeLocations: req.IncludeLocations,
Score: req.Score, Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
} }
return &rv return &rv
} }
@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
searchStart := time.Now() searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes)) asyncResults := make(chan *asyncSearchResult, len(indexes))
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
// run search on each index in separate go routine // run search on each index in separate go routine
var waitGroup sync.WaitGroup var waitGroup sync.WaitGroup
@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
// sort all hits with the requested order // sort all hits with the requested order
if len(req.Sort) > 0 { if len(req.Sort) > 0 {
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) sorter := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter) sort.Sort(sorter)
} }
@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
sr.Facets.Fixup(name, fr.Size) sr.Facets.Fixup(name, fr.Size)
} }
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
// fix up original request // fix up original request
sr.Request = req sr.Request = req
searchDuration := time.Since(searchStart) searchDuration := time.Since(searchStart)
@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock() defer f.index.mutex.RUnlock()
return f.fieldDict.Close() return f.fieldDict.Close()
} }
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
return &multiSearchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -19,6 +19,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"os" "os"
"sort"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed return nil, ErrorIndexClosed
} }
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
var coll *collector.TopNCollector
if req.SearchAfter != nil {
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
} else {
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
}
// open a reader for this search // open a reader for this search
indexReader, err := i.i.Reader() indexReader, err := i.i.Reader()
@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
facetsBuilder.Add(facetName, facetBuilder) facetsBuilder.Add(facetName, facetBuilder)
} }
} }
collector.SetFacetsBuilder(facetsBuilder) coll.SetFacetsBuilder(facetsBuilder)
} }
memNeeded := memNeededForSearch(req, searcher, collector) memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded) err = cbF(memNeeded)
@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} }
} }
err = collector.Collect(ctx, searcher, indexReader) err = coll.Collect(ctx, searcher, indexReader)
if err != nil { if err != nil {
return nil, err return nil, err
} }
hits := collector.Results() hits := coll.Results()
var highlighter highlight.Highlighter var highlighter highlight.Highlighter
@ -542,8 +556,54 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} }
for _, hit := range hits { for _, hit := range hits {
if i.name != "" {
hit.Index = i.name
}
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
if err != nil {
return nil, err
}
}
atomic.AddUint64(&i.stats.searches, 1)
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
if Config.SlowSearchLogThreshold > 0 &&
searchDuration > Config.SlowSearchLogThreshold {
logger.Printf("slow search took %s - %v", searchDuration, req)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Request: req,
Hits: hits,
Total: coll.Total(),
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: coll.FacetResults(),
}, nil
}
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
indexName string, r index.IndexReader,
highlighter highlight.Highlighter) error {
if len(req.Fields) > 0 || highlighter != nil { if len(req.Fields) > 0 || highlighter != nil {
doc, err := indexReader.Document(hit.ID) doc, err := r.Document(hit.ID)
if err == nil && doc != nil { if err == nil && doc != nil {
if len(req.Fields) > 0 { if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields) fieldsToLoad := deDuplicate(req.Fields)
@ -601,35 +661,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} else if doc == nil { } else if doc == nil {
// unexpected case, a doc ID that was found as a search hit // unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup // was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency return ErrorIndexReadInconsistency
}
}
if i.name != "" {
hit.Index = i.name
} }
} }
atomic.AddUint64(&i.stats.searches, 1) return nil
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
if Config.SlowSearchLogThreshold > 0 &&
searchDuration > Config.SlowSearchLogThreshold {
logger.Printf("slow search took %s - %v", searchDuration, req)
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Request: req,
Hits: hits,
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Took: searchDuration,
Facets: collector.FacetResults(),
}, nil
} }
// Fields returns the name of all the fields this // Fields returns the name of all the fields this
@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string {
} }
return ret return ret
} }
type searchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
return &searchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *searchHitSorter) Len() int { return len(m.hits) }
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *searchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if !propertyValue.IsNil() { if !propertyValue.IsNil() {
switch property := property.(type) { switch property := property.(type) {
case encoding.TextMarshaler: case encoding.TextMarshaler:
// ONLY process TextMarshaler if there is an explicit mapping
txt, err := property.MarshalText() // AND all of the fiels are of type text
if err == nil && subDocMapping != nil { // OTHERWISE process field without TextMarshaler
// index by explicit mapping if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields { for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" { if fieldMapping.Type != "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context) allFieldsText = false
break
}
}
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
} }
} }
} else {
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
}
default: default:
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
} }

View file

@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 { if shift > 63 {
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
} }
nChars := ((63 - shift) / 7) + 1 nChars := ((63 - shift) / 7) + 1
rv := make(PrefixCoded, nChars+1)
size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}
rv[0] = ShiftStartInt64 + byte(shift) rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000) sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars-- nChars--
sortableBits = int64(uint64(sortableBits) >> 7) sortableBits = int64(uint64(sortableBits) >> 7)
} }
return rv, nil
return rv, preallocRest, nil
} }
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {

View file

@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
// result score explanations. // result score explanations.
// Sort describes the desired order for the results to be returned. // Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed // Score controls the kind of scoring performed
// SearchAfter supports deep paging by providing a minimum sort key
// SearchBefore supports deep paging by providing a maximum sort key
// //
// A special field named "*" can be used to return all fields. // A special field named "*" can be used to return all fields.
type SearchRequest struct { type SearchRequest struct {
@ -275,6 +277,8 @@ type SearchRequest struct {
Sort search.SortOrder `json:"sort"` Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"` IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"` Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
} }
func (r *SearchRequest) Validate() error { func (r *SearchRequest) Validate() error {
@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
} }
} }
if r.SearchAfter != nil && r.SearchBefore != nil {
return fmt.Errorf("cannot use search after and search before together")
}
if r.SearchAfter != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search after with from !=0")
}
if len(r.SearchAfter) != len(r.Sort) {
return fmt.Errorf("search after must have same size as sort order")
}
}
if r.SearchBefore != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search before with from !=0")
}
if len(r.SearchBefore) != len(r.Sort) {
return fmt.Errorf("search before must have same size as sort order")
}
}
return r.Facets.Validate() return r.Facets.Validate()
} }
@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order r.Sort = order
} }
// SetSearchAfter sets the request to skip over hits with a sort
// value less than the provided sort after key
func (r *SearchRequest) SetSearchAfter(after []string) {
r.SearchAfter = after
}
// SetSearchBefore sets the request to skip over hits with a sort
// value greater than the provided sort before key
func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// UnmarshalJSON deserializes a JSON representation of // UnmarshalJSON deserializes a JSON representation of
// a SearchRequest // a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error { func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Sort []json.RawMessage `json:"sort"` Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"` IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"` Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
} }
err := json.Unmarshal(input, &temp) err := json.Unmarshal(input, &temp)
@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Facets = temp.Facets r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q) r.Query, err = query.ParseQuery(temp.Q)
if err != nil { if err != nil {
return err return err

View file

@ -69,6 +69,7 @@ type TopNCollector struct {
lowestMatchOutsideResults *search.DocumentMatch lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader dvReader index.DocValueReader
searchAfter *search.DocumentMatch
} }
// CheckDoneEvery controls how frequently we check the context deadline // CheckDoneEvery controls how frequently we check the context deadline
@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
// skipping over the first 'skip' hits // skipping over the first 'skip' hits
// ordering hits by the provided sort order // ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector { func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort)
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort} hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing // pre-allocate space on the store to avoid reslicing
@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
searchContext := &search.SearchContext{ searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc, Collector: hc,
IndexReader: reader,
} }
hc.dvReader, err = reader.DocValueReader(hc.neededFields) hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
if d == nil { if d == nil {
return nil return nil
} }
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap // optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if // with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed // this hit would have been added and then immediately removed

View file

@ -41,6 +41,14 @@ type BleveQueryTime struct {
time.Time time.Time
} }
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) { func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser) dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil { if err != nil {
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1) min := math.Inf(-1)
max := math.Inf(1) max := math.Inf(1)
if !q.Start.IsZero() { if !q.Start.IsZero() {
min = numeric.Int64ToFloat64(q.Start.UnixNano()) if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
} }
if !q.End.IsZero() { if !q.End.IsZero() {
max = numeric.Int64ToFloat64(q.End.UnixNano()) if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
} }
return &min, &max, nil return &min, &max, nil
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
} }
return nil return nil
} }
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}

View file

@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
if len(ss) < 1 { if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i) return searcher.NewMatchNoneSearcher(i)
} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
// apply optimization only if both conditions below are satisfied:
// - disjunction searcher has only 1 child searcher
// - parent searcher's min setting is equal to child searcher's min
return ss[0], nil
} }
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)

View file

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
} }
return &rv, nil return &rv, nil
} }
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type") return nil, fmt.Errorf("unknown query type")
} }

View file

@ -40,6 +40,7 @@ type TermQueryScorer struct {
idf float64 idf float64
options search.SearcherOptions options search.SearcherOptions
idfExplanation *search.Explanation idfExplanation *search.Explanation
includeScore bool
queryNorm float64 queryNorm float64
queryWeight float64 queryWeight float64
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
@ -70,6 +71,7 @@ func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options, options: options,
queryWeight: 1.0, queryWeight: 1.0,
includeScore: options.Score != "none",
} }
if options.Explain { if options.Explain {
@ -113,9 +115,10 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
} }
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch { func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation var scoreExplanation *search.Explanation
// need to compute score
var tf float64 var tf float64
if termMatch.Freq < MaxSqrtCache { if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)] tf = SqrtCache[int(termMatch.Freq)]
@ -157,12 +160,16 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
} }
} }
rv := ctx.DocumentMatchPool.Get() if s.includeScore {
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score rv.Score = score
}
if s.options.Explain { if s.options.Explain {
rv.Expl = scoreExplanation rv.Expl = scoreExplanation
} }
}
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
if len(termMatch.Vectors) > 0 { if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {

View file

@ -17,6 +17,7 @@ package search
import ( import (
"fmt" "fmt"
"reflect" "reflect"
"sort"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size" "github.com/blevesearch/bleve/size"
@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return true return true
} }
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct { type Location struct {
// Pos is the position of the term within the field, starting at 1 // Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"` Pos uint64 `json:"pos"`
@ -68,6 +87,46 @@ func (l *Location) Size() int {
type Locations []*Location type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) { func (t TermLocationMap) AddLocation(term string, location *Location) {
@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var lastField string var lastField string
var tlm TermLocationMap var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations { for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field { if lastField != ftl.Field {
@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
} }
tlm[ftl.Term] = append(tlm[ftl.Term], loc) locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{ Location: Location{
@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
}, },
} }
} }
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
} }
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
@ -279,6 +359,7 @@ type SearcherOptions struct {
type SearchContext struct { type SearchContext struct {
DocumentMatchPool *DocumentMatchPool DocumentMatchPool *DocumentMatchPool
Collector Collector Collector Collector
IndexReader index.IndexReader
} }
func (sc *SearchContext) Size() int { func (sc *SearchContext) Size() int {

View file

@ -45,6 +45,7 @@ type BooleanSearcher struct {
scorer *scorer.ConjunctionQueryScorer scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch matches []*search.DocumentMatch
initialized bool initialized bool
done bool
} }
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) { func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {
@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
} }
} }
if rv == nil {
s.done = true
}
return rv, nil return rv, nil
} }
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {
@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
} }
} }
// Advance the searchers only if the currentID cursor is trailing the lookup ID, // Advance the searcher only if the cursor is trailing the lookup ID
// additionally if the mustNotSearcher has been initialized, ensure that the if s.currentID == nil || s.currentID.Compare(ID) < 0 {
// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
// currentID) is trailing the lookup ID as well - for in the case where currentID
// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
// advance the currentID or the currMustNot cursors.
if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
var err error var err error
if s.mustSearcher != nil { if s.mustSearcher != nil {
if s.currMust != nil { if s.currMust != nil {
@ -362,6 +369,10 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
} }
if s.mustNotSearcher != nil { if s.mustNotSearcher != nil {
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil { if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot) ctx.DocumentMatchPool.Put(s.currMustNot)
} }
@ -370,6 +381,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
return nil, err return nil, err
} }
} }
}
if s.mustSearcher != nil && s.currMust != nil { if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID s.currentID = s.currMust.IndexInternalID

View file

@ -22,6 +22,11 @@ import (
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
type filterFunc func(key []byte) bool
var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat, func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64, maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) ( options search.SearcherOptions, checkBoundaries bool) (
@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
} }
// do math to produce list of terms needed for this search // do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1, onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries) minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field}) dvReader, err := indexReader.DocValueReader([]string{field})
@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2 var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint, func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64, sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
checkBoundaries bool) ( indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte) { onBoundary [][]byte, notOnBoundary [][]byte, err error) {
split := term | uint64(0x1)<<shift preallocBytesLen := 32
var upperMax uint64 preallocBytes := make([]byte, preallocBytesLen)
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint, makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
sminLon, sminLat, smaxLon, smaxLat float64, if len(preallocBytes) <= 0 {
checkBoundaries bool) ( preallocBytesLen = preallocBytesLen * 2
onBoundary [][]byte, notOnBoundary [][]byte) { preallocBytes = make([]byte, preallocBytesLen)
}
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
return rv
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
defer func() {
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
}()
if isIndexed == nil {
isIndexed = func(term []byte) bool {
if indexReader != nil {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
}
return true
}
}
var computeGeoRange func(term uint64, shift uint) // declare for recursion
relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start) minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start) minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end) maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end) maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 && within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat, geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel && if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat, geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) { sminLon, sminLat, smaxLon, smaxLat)) {
codedTerm := makePrefixCoded(int64(start), res)
if isIndexed(codedTerm) {
if !within && checkBoundaries { if !within && checkBoundaries {
return [][]byte{ onBoundary = append(onBoundary, codedTerm)
numeric.MustNewPrefixCodedInt64(int64(start), res), } else {
}, nil notOnBoundary = append(notOnBoundary, codedTerm)
} }
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
} }
} else if level < geoDetailLevel && } else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat, geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) { sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat, computeGeoRange(start, res-1)
checkBoundaries)
} }
return nil, nil }
computeGeoRange = func(term uint64, shift uint) {
if err != nil {
return
}
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
level := (GeoBitsShift1 - shift) >> 1
relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}
computeGeoRange(term, shift)
if err != nil {
return nil, nil, err
}
return onBoundary, notOnBoundary, err
} }
func buildRectFilter(dvReader index.DocValueReader, field string, func buildRectFilter(dvReader index.DocValueReader, field string,

View file

@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// build a searcher for the box // build a searcher for the box
boxSearcher, err := boxSearcher(indexReader, boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options) field, boost, options, false)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// two boxes joined through a disjunction searcher // two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader, func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64, topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) ( field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) { search.Searcher, error) {
if bottomRightLon < topLeftLon { if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts // cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader, leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat, -180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false) field, boost, options, checkBoundaries)
if err != nil { if err != nil {
return nil, err return nil, err
} }
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader, rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false) topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil { if err != nil {
_ = leftSearcher.Close() _ = leftSearcher.Close()
return nil, err return nil, err
@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
// build geoboundinggox searcher for that bounding box // build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader, boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost, topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false) options, checkBoundaries)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -0,0 +1,110 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"math"
)
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(boxSearcher,
buildPolygonFilter(dvReader, field, polygon)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
var inside bool
// check for a direct vertex match
if almostEqual(polygon[0].Lat, lat) &&
almostEqual(polygon[0].Lon, lon) {
return true
}
for i := 1; i < nVertices; i++ {
if almostEqual(polygon[i].Lat, lat) &&
almostEqual(polygon[i].Lon, lon) {
return true
}
if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
inside = !inside
}
}
return inside
}
return false
}
}

View file

@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMax && maxInt64 != math.MinInt64 { if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64-- maxInt64--
} }
var fieldDict index.FieldDictContains
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
// FIXME hard-coded precision, should match field declaration // FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4) termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate() terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
if len(terms) < 1 { if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with // cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132 // commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options, return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true) true)
} }
var err error
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field) terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil { if err != nil {
return nil, err return nil, err
} }
}
if tooManyClauses(len(terms)) { if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms)) return nil, tooManyClausesErr(len(terms))
} }
@ -125,11 +154,17 @@ type termRange struct {
endTerm []byte endTerm []byte
} }
func (t *termRange) Enumerate() [][]byte { func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte var rv [][]byte
next := t.startTerm next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 { for bytes.Compare(next, t.endTerm) <= 0 {
if filter != nil {
if filter(next) {
rv = append(rv, next) rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next) next = incrementBytes(next)
} }
return rv return rv
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte { func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte var rv [][]byte
for _, tri := range tr { for _, tri := range tr {
trie := tri.Enumerate() trie := tri.Enumerate(filter)
rv = append(rv, trie...) rv = append(rv, trie...)
} }
return rv return rv

View file

@ -38,6 +38,8 @@ type SearchSort interface {
RequiresScoring() bool RequiresScoring() bool
RequiresFields() []string RequiresFields() []string
Reverse()
Copy() SearchSort Copy() SearchSort
} }
@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
return rv return rv
} }
func (so SortOrder) Reverse() {
for _, soi := range so {
soi.Reverse()
}
}
// SortFieldType lets you control some internal sort behavior // SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine // normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int type SortFieldType int
@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
return &rv return &rv
} }
func (s *SortField) Reverse() {
s.Desc = !s.Desc
if s.Missing == SortFieldMissingFirst {
s.Missing = SortFieldMissingLast
} else {
s.Missing = SortFieldMissingFirst
}
}
// SortDocID will sort results by the document identifier // SortDocID will sort results by the document identifier
type SortDocID struct { type SortDocID struct {
Desc bool Desc bool
@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
return &rv return &rv
} }
func (s *SortDocID) Reverse() {
s.Desc = !s.Desc
}
// SortScore will sort results by the document match score // SortScore will sort results by the document match score
type SortScore struct { type SortScore struct {
Desc bool Desc bool
@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
return &rv return &rv
} }
func (s *SortScore) Reverse() {
s.Desc = !s.Desc
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0)) var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by // NewSortGeoDistance creates SearchSort instance for sorting documents by
@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
return &rv return &rv
} }
func (s *SortGeoDistance) Reverse() {
s.Desc = !s.Desc
}
type BytesSlice [][]byte type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) } func (p BytesSlice) Len() int { return len(p) }

View file

@ -1,10 +1,9 @@
language: go language: go
go: go:
- 1.4 - 1.7
script: script:
- go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/cover - go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls - go get github.com/mattn/goveralls
- go test -v -covermode=count -coverprofile=profile.out - go test -v -covermode=count -coverprofile=profile.out

View file

@ -18,7 +18,7 @@ import (
"bytes" "bytes"
) )
// Iterator represents a means of visity key/value pairs in order. // Iterator represents a means of visiting key/value pairs in order.
type Iterator interface { type Iterator interface {
// Current() returns the key/value pair currently pointed to. // Current() returns the key/value pair currently pointed to.
@ -186,29 +186,42 @@ func (i *FSTIterator) Next() error {
} }
func (i *FSTIterator) next(lastOffset int) error { func (i *FSTIterator) next(lastOffset int) error {
// remember where we started // remember where we started with keysStack in this next() call
i.nextStart = append(i.nextStart[:0], i.keysStack...) i.nextStart = append(i.nextStart[:0], i.keysStack...)
nextOffset := lastOffset + 1 nextOffset := lastOffset + 1
allowCompare := false
OUTER: OUTER:
for true { for true {
curr := i.statesStack[len(i.statesStack)-1] curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1] autCurr := i.autStatesStack[len(i.autStatesStack)-1]
if curr.Final() && i.aut.IsMatch(autCurr) && if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare {
bytes.Compare(i.keysStack, i.nextStart) > 0 { // check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
return ErrIteratorDone
}
cmp := bytes.Compare(i.keysStack, i.nextStart)
if cmp > 0 {
// in final state greater than start key // in final state greater than start key
return nil return nil
} }
}
numTrans := curr.NumTransitions() numTrans := curr.NumTransitions()
INNER: INNER:
for nextOffset < numTrans { for nextOffset < numTrans {
t := curr.TransitionAt(nextOffset) t := curr.TransitionAt(nextOffset)
autNext := i.aut.Accept(autCurr, t) autNext := i.aut.Accept(autCurr, t)
if !i.aut.CanMatch(autNext) { if !i.aut.CanMatch(autNext) {
// TODO: potential optimization to skip nextOffset
// forwards more directly to something that the
// automaton likes rather than a linear scan?
nextOffset += 1 nextOffset += 1
continue INNER continue INNER
} }
@ -234,30 +247,41 @@ OUTER:
i.valsStack = append(i.valsStack, v) i.valsStack = append(i.valsStack, v)
i.autStatesStack = append(i.autStatesStack, autNext) i.autStatesStack = append(i.autStatesStack, autNext)
// check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
return ErrIteratorDone
}
nextOffset = 0 nextOffset = 0
allowCompare = true
continue OUTER continue OUTER
} }
// no more transitions, so need to backtrack and stack pop
if len(i.statesStack) <= 1 { if len(i.statesStack) <= 1 {
// stack len is 1 (root), can't go back further, we're done // stack len is 1 (root), can't go back further, we're done
break break
} }
// no transitions, and still room to pop // if the top of the stack represents a linear chain of states
i.statesStack = i.statesStack[:len(i.statesStack)-1] // (i.e., a suffix of nodes linked by single transitions),
i.keysStack = i.keysStack[:len(i.keysStack)-1] // then optimize by popping the suffix in one shot without
// going back all the way to the OUTER loop
var popNum int
for j := len(i.statesStack) - 1; j > 0; j-- {
if i.statesStack[j].NumTransitions() != 1 {
popNum = len(i.statesStack) - 1 - j
break
}
}
if popNum < 1 { // always pop at least 1 entry from the stacks
popNum = 1
}
nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1 nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1
allowCompare = false
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1] i.statesStack = i.statesStack[:len(i.statesStack)-popNum]
i.valsStack = i.valsStack[:len(i.valsStack)-1] i.keysStack = i.keysStack[:len(i.keysStack)-popNum]
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1] i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum]
i.valsStack = i.valsStack[:len(i.valsStack)-popNum]
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum]
} }
return ErrIteratorDone return ErrIteratorDone

10
vendor/github.com/couchbase/vellum/go.mod generated vendored Normal file
View file

@ -0,0 +1,10 @@
module github.com/couchbase/vellum
go 1.12
require (
github.com/edsrzf/mmap-go v1.0.0
github.com/spf13/cobra v0.0.5
github.com/willf/bitset v1.1.10
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect
)

39
vendor/github.com/couchbase/vellum/go.sum generated vendored Normal file
View file

@ -0,0 +1,39 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
package levenshtein2 package levenshtein
import ( import (
"fmt" "fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
package levenshtein2 package levenshtein
import ( import (
"fmt" "fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
package levenshtein2 package levenshtein
import "fmt" import "fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
package levenshtein2 package levenshtein
import ( import (
"math" "math"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
package levenshtein2 package levenshtein
import ( import (
"crypto/md5" "crypto/md5"

View file

@ -75,8 +75,15 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) {
Rune0: [2]rune{r, r}, Rune0: [2]rune{r, r},
} }
next.Rune = next.Rune0[0:2] next.Rune = next.Rune0[0:2]
return c.c(&next) // try to find more folded runes
for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) {
next.Rune = append(next.Rune, r1, r1)
} }
err = c.c(&next)
if err != nil {
return err
}
} else {
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc( c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes) r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil { if err != nil {
@ -86,6 +93,7 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) {
c.compileUtf8Ranges(seq) c.compileUtf8Ranges(seq)
} }
} }
}
case syntax.OpAnyChar: case syntax.OpAnyChar:
next := syntax.Regexp{ next := syntax.Regexp{
Op: syntax.OpCharClass, Op: syntax.OpCharClass,

12
vendor/github.com/etcd-io/bbolt/bolt_riscv64.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// +build riscv64
package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = true

View file

@ -121,6 +121,7 @@ type DB struct {
AllocSize int AllocSize int
path string path string
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File file *os.File
dataref []byte // mmap'ed readonly, write throws SEGV dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte data *[maxMapSize]byte
@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.readOnly = true db.readOnly = true
} }
db.openFile = options.OpenFile
if db.openFile == nil {
db.openFile = os.OpenFile
}
// Open data file and separate sync handler for metadata writes. // Open data file and separate sync handler for metadata writes.
db.path = path db.path = path
var err error var err error
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil {
_ = db.close() _ = db.close()
return nil, err return nil, err
} }
@ -1054,6 +1060,10 @@ type Options struct {
// set directly on the DB itself when returned from Open(), but this option // set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB. // is useful in APIs which expose Options but not the underlying DB.
NoSync bool NoSync bool
// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)
} }
// DefaultOptions represent the options used if nil options are passed into Open(). // DefaultOptions represent the options used if nil options are passed into Open().

View file

@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) {
f.readIDs(a) f.readIDs(a)
} }
// noSyncReload reads the freelist from pgids and filters out pending items.
func (f *freelist) noSyncReload(pgids []pgid) {
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
pcache[pendingID] = true
}
}
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range pgids {
if !pcache[id] {
a = append(a, id)
}
}
f.readIDs(a)
}
// reindex rebuilds the free cache based on available and pending free lists. // reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() { func (f *freelist) reindex() {
ids := f.getFreePageIDs() ids := f.getFreePageIDs()

View file

@ -254,18 +254,37 @@ func (tx *Tx) Rollback() error {
if tx.db == nil { if tx.db == nil {
return ErrTxClosed return ErrTxClosed
} }
tx.rollback() tx.nonPhysicalRollback()
return nil return nil
} }
// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk.
func (tx *Tx) nonPhysicalRollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
}
tx.close()
}
// rollback needs to reload the free pages from disk in case some system error happens like fsync error.
func (tx *Tx) rollback() { func (tx *Tx) rollback() {
if tx.db == nil { if tx.db == nil {
return return
} }
if tx.writable { if tx.writable {
tx.db.freelist.rollback(tx.meta.txid) tx.db.freelist.rollback(tx.meta.txid)
if !tx.db.hasSyncedFreelist() {
// Reconstruct free page list by scanning the DB to get the whole free page list.
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
tx.db.freelist.noSyncReload(tx.db.freepages())
} else {
// Read free page list from freelist page.
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
} }
}
tx.close() tx.close()
} }
@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error {
// If err == nil then exactly tx.Size() bytes will be written into the writer. // If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader with WriteFlag // Attempt to open reader with WriteFlag
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
if err != nil { if err != nil {
return 0, err return 0, err
} }
@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// A reader transaction is maintained during the copy so it is safe to continue // A reader transaction is maintained during the copy so it is safe to continue
// using the database while a copy is in progress. // using the database while a copy is in progress.
func (tx *Tx) CopyFile(path string, mode os.FileMode) error { func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil { if err != nil {
return err return err
} }

View file

@ -1,3 +1,5 @@
The MIT license.
Copyright (c) 2014 the go-unsnap-stream authors. Copyright (c) 2014 the go-unsnap-stream authors.
Permission is hereby granted, free of charge, to any person obtaining a copy of Permission is hereby granted, free of charge, to any person obtaining a copy of
@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so, the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions: subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Permission is explicitly granted to relicense this material under new terms of
your choice when integrating this library with another library or project.

View file

@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4]. Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4].
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C. Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo.
For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead. However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead.

View file

@ -7,6 +7,7 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
"strings"
"hash/crc32" "hash/crc32"
@ -188,10 +189,15 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS
// continue below, processing the nread bytes // continue below, processing the nread bytes
err = nil err = nil
} }
} else {
// may be an odd already closed... don't panic on that
if strings.Contains(err.Error(), "file already closed") {
err = nil
} else { } else {
panic(err) panic(err)
} }
} }
}
// flag for printing chunk size alignment messages // flag for printing chunk size alignment messages
verbose := false verbose := false

View file

@ -5,6 +5,8 @@ import (
"reflect" "reflect"
) )
const resumableDefault = false
var ( var (
// ErrShortBytes is returned when the // ErrShortBytes is returned when the
// slice being decoded is too short to // slice being decoded is too short to
@ -30,95 +32,236 @@ type Error interface {
Resumable() bool Resumable() bool
} }
// contextError allows msgp Error instances to be enhanced with additional
// context about their origin.
type contextError interface {
Error
// withContext must not modify the error instance - it must clone and
// return a new error with the context added.
withContext(ctx string) error
}
// Cause returns the underlying cause of an error that has been wrapped
// with additional context.
func Cause(e error) error {
out := e
if e, ok := e.(errWrapped); ok && e.cause != nil {
out = e.cause
}
return out
}
// Resumable returns whether or not the error means that the stream of data is
// malformed and the information is unrecoverable.
func Resumable(e error) bool {
if e, ok := e.(Error); ok {
return e.Resumable()
}
return resumableDefault
}
// WrapError wraps an error with additional context that allows the part of the
// serialized type that caused the problem to be identified. Underlying errors
// can be retrieved using Cause()
//
// The input error is not modified - a new error should be returned.
//
// ErrShortBytes is not wrapped with any context due to backward compatibility
// issues with the public API.
//
func WrapError(err error, ctx ...interface{}) error {
switch e := err.(type) {
case errShort:
return e
case contextError:
return e.withContext(ctxString(ctx))
default:
return errWrapped{cause: err, ctx: ctxString(ctx)}
}
}
// ctxString converts the incoming interface{} slice into a single string.
func ctxString(ctx []interface{}) string {
out := ""
for idx, cv := range ctx {
if idx > 0 {
out += "/"
}
out += fmt.Sprintf("%v", cv)
}
return out
}
func addCtx(ctx, add string) string {
if ctx != "" {
return add + "/" + ctx
} else {
return add
}
}
// errWrapped allows arbitrary errors passed to WrapError to be enhanced with
// context and unwrapped with Cause()
type errWrapped struct {
cause error
ctx string
}
func (e errWrapped) Error() string {
if e.ctx != "" {
return fmt.Sprintf("%s at %s", e.cause, e.ctx)
} else {
return e.cause.Error()
}
}
func (e errWrapped) Resumable() bool {
if e, ok := e.cause.(Error); ok {
return e.Resumable()
}
return resumableDefault
}
type errShort struct{} type errShort struct{}
func (e errShort) Error() string { return "msgp: too few bytes left to read object" } func (e errShort) Error() string { return "msgp: too few bytes left to read object" }
func (e errShort) Resumable() bool { return false } func (e errShort) Resumable() bool { return false }
type errFatal struct{} type errFatal struct {
ctx string
}
func (f errFatal) Error() string {
out := "msgp: fatal decoding error (unreachable code)"
if f.ctx != "" {
out += " at " + f.ctx
}
return out
}
func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" }
func (f errFatal) Resumable() bool { return false } func (f errFatal) Resumable() bool { return false }
func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f }
// ArrayError is an error returned // ArrayError is an error returned
// when decoding a fix-sized array // when decoding a fix-sized array
// of the wrong size // of the wrong size
type ArrayError struct { type ArrayError struct {
Wanted uint32 Wanted uint32
Got uint32 Got uint32
ctx string
} }
// Error implements the error interface // Error implements the error interface
func (a ArrayError) Error() string { func (a ArrayError) Error() string {
return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got) out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
if a.ctx != "" {
out += " at " + a.ctx
}
return out
} }
// Resumable is always 'true' for ArrayErrors // Resumable is always 'true' for ArrayErrors
func (a ArrayError) Resumable() bool { return true } func (a ArrayError) Resumable() bool { return true }
func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a }
// IntOverflow is returned when a call // IntOverflow is returned when a call
// would downcast an integer to a type // would downcast an integer to a type
// with too few bits to hold its value. // with too few bits to hold its value.
type IntOverflow struct { type IntOverflow struct {
Value int64 // the value of the integer Value int64 // the value of the integer
FailedBitsize int // the bit size that the int64 could not fit into FailedBitsize int // the bit size that the int64 could not fit into
ctx string
} }
// Error implements the error interface // Error implements the error interface
func (i IntOverflow) Error() string { func (i IntOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize) str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
if i.ctx != "" {
str += " at " + i.ctx
}
return str
} }
// Resumable is always 'true' for overflows // Resumable is always 'true' for overflows
func (i IntOverflow) Resumable() bool { return true } func (i IntOverflow) Resumable() bool { return true }
func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i }
// UintOverflow is returned when a call // UintOverflow is returned when a call
// would downcast an unsigned integer to a type // would downcast an unsigned integer to a type
// with too few bits to hold its value // with too few bits to hold its value
type UintOverflow struct { type UintOverflow struct {
Value uint64 // value of the uint Value uint64 // value of the uint
FailedBitsize int // the bit size that couldn't fit the value FailedBitsize int // the bit size that couldn't fit the value
ctx string
} }
// Error implements the error interface // Error implements the error interface
func (u UintOverflow) Error() string { func (u UintOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize) str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
if u.ctx != "" {
str += " at " + u.ctx
}
return str
} }
// Resumable is always 'true' for overflows // Resumable is always 'true' for overflows
func (u UintOverflow) Resumable() bool { return true } func (u UintOverflow) Resumable() bool { return true }
func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u }
// UintBelowZero is returned when a call // UintBelowZero is returned when a call
// would cast a signed integer below zero // would cast a signed integer below zero
// to an unsigned integer. // to an unsigned integer.
type UintBelowZero struct { type UintBelowZero struct {
Value int64 // value of the incoming int Value int64 // value of the incoming int
ctx string
} }
// Error implements the error interface // Error implements the error interface
func (u UintBelowZero) Error() string { func (u UintBelowZero) Error() string {
return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value) str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value)
if u.ctx != "" {
str += " at " + u.ctx
}
return str
} }
// Resumable is always 'true' for overflows // Resumable is always 'true' for overflows
func (u UintBelowZero) Resumable() bool { return true } func (u UintBelowZero) Resumable() bool { return true }
func (u UintBelowZero) withContext(ctx string) error {
u.ctx = ctx
return u
}
// A TypeError is returned when a particular // A TypeError is returned when a particular
// decoding method is unsuitable for decoding // decoding method is unsuitable for decoding
// a particular MessagePack value. // a particular MessagePack value.
type TypeError struct { type TypeError struct {
Method Type // Type expected by method Method Type // Type expected by method
Encoded Type // Type actually encoded Encoded Type // Type actually encoded
ctx string
} }
// Error implements the error interface // Error implements the error interface
func (t TypeError) Error() string { func (t TypeError) Error() string {
return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method) out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
if t.ctx != "" {
out += " at " + t.ctx
}
return out
} }
// Resumable returns 'true' for TypeErrors // Resumable returns 'true' for TypeErrors
func (t TypeError) Resumable() bool { return true } func (t TypeError) Resumable() bool { return true }
func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t }
// returns either InvalidPrefixError or // returns either InvalidPrefixError or
// TypeError depending on whether or not // TypeError depending on whether or not
// the prefix is recognized // the prefix is recognized
@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false }
// to a function that takes `interface{}`. // to a function that takes `interface{}`.
type ErrUnsupportedType struct { type ErrUnsupportedType struct {
T reflect.Type T reflect.Type
ctx string
} }
// Error implements error // Error implements error
func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) } func (e *ErrUnsupportedType) Error() string {
out := fmt.Sprintf("msgp: type %q not supported", e.T)
if e.ctx != "" {
out += " at " + e.ctx
}
return out
}
// Resumable returns 'true' for ErrUnsupportedType // Resumable returns 'true' for ErrUnsupportedType
func (e *ErrUnsupportedType) Resumable() bool { return true } func (e *ErrUnsupportedType) Resumable() bool { return true }
func (e *ErrUnsupportedType) withContext(ctx string) error {
o := *e
o.ctx = addCtx(o.ctx, ctx)
return &o
}

View file

@ -685,7 +685,7 @@ func (mw *Writer) WriteIntf(v interface{}) error {
case reflect.Map: case reflect.Map:
return mw.writeMap(val) return mw.writeMap(val)
} }
return &ErrUnsupportedType{val.Type()} return &ErrUnsupportedType{T: val.Type()}
} }
func (mw *Writer) writeMap(v reflect.Value) (err error) { func (mw *Writer) writeMap(v reflect.Value) (err error) {

Some files were not shown because too many files have changed in this diff Show more