Upgrade blevesearch to v0.8.1 (#9177)

For #1441

a91b427b59
This commit is contained in:
Mura Li 2019-11-27 17:23:33 +08:00 committed by Lauris BH
parent b50dee5a61
commit 9591185c8f
180 changed files with 43400 additions and 41105 deletions

21
go.mod
View file

@ -16,14 +16,14 @@ require (
gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705
gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7
github.com/PuerkitoBio/goquery v1.5.0
github.com/RoaringBitmap/roaring v0.4.7 // indirect
github.com/RoaringBitmap/roaring v0.4.21 // indirect
github.com/bgentry/speakeasy v0.1.0 // indirect
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3
github.com/blevesearch/bleve v0.8.1
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f // indirect
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc // indirect
github.com/blevesearch/go-porterstemmer v1.0.2 // indirect
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe // indirect
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd // indirect
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect
@ -31,14 +31,13 @@ require (
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/editorconfig/editorconfig-core-go/v2 v2.1.1
github.com/emirpasic/gods v1.12.0
github.com/etcd-io/bbolt v1.3.2 // indirect
github.com/etcd-io/bbolt v1.3.3 // indirect
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
github.com/gliderlabs/ssh v0.2.2
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd // indirect
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e // indirect
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
github.com/go-openapi/jsonreference v0.19.3 // indirect
github.com/go-openapi/runtime v0.19.5 // indirect
github.com/go-redis/redis v6.15.2+incompatible
@ -68,12 +67,10 @@ require (
github.com/mattn/go-sqlite3 v1.11.0
github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae // indirect
github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc
github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5
github.com/niklasfasching/go-org v0.1.8
github.com/oliamb/cutter v0.2.2
github.com/philhofer/fwd v1.0.0 // indirect
github.com/pkg/errors v0.8.1
github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e
github.com/prometheus/client_golang v1.1.0
@ -90,19 +87,17 @@ require (
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect
github.com/stretchr/testify v1.4.0
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 // indirect
github.com/tstranex/u2f v1.0.0
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e
github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141
github.com/urfave/cli v1.20.0
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 // indirect
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53
golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f
golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sys v0.0.0-20191010194322-b09406accb47
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2
golang.org/x/text v0.3.2
golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect

55
go.sum
View file

@ -46,8 +46,8 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/RoaringBitmap/roaring v0.4.7 h1:eGUudvFzvF7Kxh7JjYvXfI1f7l22/2duFby7r5+d4oc=
github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w=
github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8=
github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68=
@ -72,14 +72,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 h1:vinCy/rcjbtxWnMiw11CbMKcuyNi+y4L4MbZUpk7m4M=
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw=
github.com/blevesearch/bleve v0.8.1 h1:20zBREtGe8dvBxCC+717SaxKcUVQOWk3/Fm75vabKpU=
github.com/blevesearch/bleve v0.8.1/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA=
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ=
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f h1:J9ZVHbB2X6JNxbKw/f3Y4E9Xq+Ro+zPiivzgmi3RTvg=
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA=
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc h1:7OfDAkuAGx71ruzOIFqCkHqGIsVZU0C7PMw5u1bIrwU=
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8=
github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I=
github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA=
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg=
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8=
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q=
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA=
@ -92,6 +92,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE
github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
@ -102,10 +103,11 @@ github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d h1:XMf4E1U+b
github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c=
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo=
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs=
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe h1:2o6Y7KMjJNsuMTF8f2H2eTKRhqH7+bQbjr+D+LnhE5M=
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g=
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd h1:zeuJhcG3f8eePshH3KxkNE+Xtl53pVln9MOUPMyr/1w=
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd/go.mod h1:xbc8Ff/oG7h2ejd7AlwOpfd+6QZntc92ygpAOfGwcKY=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY=
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8=
@ -135,8 +137,8 @@ github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0=
github.com/etcd-io/bbolt v1.3.2/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
github.com/etcd-io/bbolt v1.3.3 h1:gSJmxrs37LgTqR/oyJBWok6k6SvXEUerFTbltIhXkBM=
github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE=
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs=
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
@ -154,10 +156,11 @@ github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd h1:r04MMPyLHj/QwZuMJ5+7tJcBr1AQjpiAK/rZWRrQT7o=
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e h1:SiEs4J3BKVIeaWrH3tKaz3QLZhJ68iJ/A4xrzIoE5+Y=
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI=
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
@ -279,6 +282,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg=
@ -304,6 +309,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0=
github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio=
github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko=
@ -467,6 +473,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
@ -504,11 +512,13 @@ github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU=
github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
@ -529,8 +539,8 @@ github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 h1:HOxvxvnntLiPn1
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 h1:ZVvr38DYEyOPyelySqvF0I9I++85NnUMsWkroBDS4fs=
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
@ -538,6 +548,7 @@ github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw=
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU=
@ -549,8 +560,8 @@ github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 h1:Z79lyIznnziKA
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0=
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 h1:E8u341JM/N8LCnPXBV6ZFD1RKo/j+qHl1XOqSV+GstA=
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
@ -574,6 +585,7 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@ -642,6 +654,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -660,6 +673,8 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE=
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=

20
vendor/github.com/RoaringBitmap/roaring/.drone.yml generated vendored Normal file
View file

@ -0,0 +1,20 @@
kind: pipeline
name: default
workspace:
base: /go
path: src/github.com/RoaringBitmap/roaring
steps:
- name: test
image: golang
commands:
- go get -t
- go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build

View file

@ -8,10 +8,12 @@ install:
notifications:
email: false
go:
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- "1.7.x"
- "1.8.x"
- "1.9.x"
- "1.10.x"
- "1.11.x"
- "1.12.x"
- tip
# whitelist
@ -21,10 +23,14 @@ branches:
script:
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
- go test -race -run TestConcurrent*
- go build -tags appengine
- go test -tags appengine
- GOARCH=arm64 go build
- GOARCH=386 go build
- GOARCH=386 go test
- GOARCH=arm go build
- GOARCH=arm64 go build
matrix:
allow_failures:
- go: tip

View file

@ -7,4 +7,5 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp)
Maciej Biłas (@maciej)
Maciej Biłas (@maciej),
Joe Nall (@joenall)

View file

@ -9,4 +9,8 @@ Will Glynn (@willglynn),
Brent Pedersen (@brentp),
Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd)
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard),
Alexander Petrov (@alldroll)

View file

@ -200,3 +200,36 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================================================
Portions of runcontainer.go are from the Go standard library, which is licensed
under:
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,4 +1,4 @@
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
# Get the dependencies
deps:
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey
GOPATH=$(GOPATH) go get github.com/stretchr/testify
GOPATH=$(GOPATH) go get github.com/willf/bitset
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
GOPATH=$(GOPATH) go get github.com/mschoch/smat
@ -97,18 +97,8 @@ nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...
rle:
cp rle.go rle16.go
perl -pi -e 's/32/16/g' rle16.go
cp rle_test.go rle16_test.go
perl -pi -e 's/32/16/g' rle16_test.go
backrle:
cp rle16.go rle.go
perl -pi -e 's/16/32/g' rle.go
perl -pi -e 's/2032/2016/g' rle.go
ser: rle
ser:
go generate
cover:

View file

@ -1,4 +1,5 @@
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
=============
This is a go version of the Roaring bitmap data structure.
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
[lucene]: https://lucene.apache.org/
[solr]: https://lucene.apache.org/solr/
[elasticsearch]: https://www.elastic.co/products/elasticsearch
[druid]: http://druid.io/
[druid]: https://druid.apache.org/
[spark]: https://spark.apache.org/
[opensearchserver]: http://www.opensearchserver.com
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
they include
- github.com/smartystreets/goconvey/convey
- github.com/willf/bitset
- github.com/mschoch/smat
- github.com/glycerine/go-unsnap-stream
@ -133,6 +133,7 @@ func main() {
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
@ -206,7 +207,7 @@ You can use roaring with gore:
- go get -u github.com/motemen/gore
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
- go get github/RoaringBitmap/roaring
- go get github.com/RoaringBitmap/roaring
```go
$ gore

View file

@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
}
}
func (ac *arrayContainer) getShortIterator() shortIterable {
func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) getReverseIterator() shortIterable {
return &reverseIterator{ac.content, len(ac.content) - 1}
}
func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0}
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) minimum() uint16 {
@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,endx)
func (ac *arrayContainer) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
//p("arrayContainer.not(): exiting early with ac.clone()")
return ac.clone()
}
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
//p("arrayContainer.notClose(): exiting early with ac.clone()")
return ac.clone()
}
// determine the span of array indices to be affected^M
startIndex := binarySearch(ac.content, uint16(firstOfRange))
//p("startIndex=%v", startIndex)
if startIndex < 0 {
startIndex = -startIndex - 1
}
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
//p("lastIndex=%v", lastIndex)
if lastIndex < 0 {
lastIndex = -lastIndex - 2
}
@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
newValuesInRange := spanToBeFlipped - currentValuesInRange
cardinalityChange := newValuesInRange - currentValuesInRange
newCardinality := len(ac.content) + cardinalityChange
//p("new card is %v", newCardinality)
if newCardinality > arrayDefaultMaxSize {
//p("new card over arrayDefaultMaxSize, so returning bitmap")
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
}
func (ac *arrayContainer) and(a container) container {
//p("ac.and() called")
switch x := a.(type) {
case *arrayContainer:
return ac.andArray(x)
@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
return ac.iandBitmap(x)
case *runContainer16:
if x.isFull() {
return ac.clone()
return ac
}
return x.andArray(ac)
}
@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
//p("ac.inotClose() starting")
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
return ac
}
@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
if cardinalityChange > 0 {
if newCardinality > len(ac.content) {
if newCardinality > arrayDefaultMaxSize {
//p("ac.inotClose() converting to bitmap and doing inot there")
bcRet := ac.toBitmapContainer()
bcRet.inot(firstOfRange, lastOfRange+1)
*ac = *bcRet.toArrayContainer()
@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
}
}
ac.content = ac.content[:newCardinality]
//p("bottom of ac.inotClose(): returning ac")
return ac
}
@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
func (ac *arrayContainer) containerType() contype {
return arrayContype
}
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
high.content = append(high.content, lowbits(y))
} else {
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1
// write "content"
@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 1
@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *arrayContainer) Msgsize() (s int) {
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
return

View file

@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
return uint16(bcsi.i)
}
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
if bcsi.hasNext() && bcsi.peekNext() < minval {
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
}
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
}
func (bc *bitmapContainer) getShortIterator() shortIterable {
func (bc *bitmapContainer) getShortIterator() shortPeekable {
return newBitmapContainerShortIterator(bc)
}
type reverseBitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
}
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 {
if bcsi.i == -1 {
panic("reverseBitmapContainerShortIterator.next() going beyond what is available")
}
j := bcsi.i
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1)
return uint16(j)
}
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator {
if a.cardinality == 0 {
return &reverseBitmapContainerShortIterator{a, -1}
}
return &reverseBitmapContainerShortIterator{a, int(a.maximum())}
}
func (bc *bitmapContainer) getReverseIterator() shortIterable {
return newReverseBitmapContainerShortIterator(bc)
}
type bitmapContainerManyIterator struct {
ptr *bitmapContainer
base int
@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
for n < len(buf) {
if bitset == 0 {
base += 1
base++
if base >= len(bcmi.ptr.bitmap) {
bcmi.base = base
bcmi.bitset = bitset
@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int {
func bitmapEquals(a, b []uint64) bool {
if len(a) != len(b) {
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
return false
}
for i, v := range a {
if v != b[i] {
//p("bitmaps differ on element i=%v", i)
return false
}
}
//p("bitmapEquals returning true")
return true
}
@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui
func (bc *bitmapContainer) equals(o container) bool {
srb, ok := o.(*bitmapContainer)
if ok {
//p("bitmapContainers.equals: both are bitmapContainers")
if srb.cardinality != bc.cardinality {
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
return false
}
return bitmapEquals(bc.bitmap, srb.bitmap)
@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
// iremove returns true if i was found.
func (bc *bitmapContainer) iremove(i uint16) bool {
/* branchless code
w := bc.bitmap[i>>6]
mask := uint64(1) << (i % 64)
neww := w &^ mask
bc.cardinality -= int((w ^ neww) >> (i % 64))
bc.bitmap[i>>6] = neww */
if bc.contains(i) {
bc.cardinality--
bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container
// flip all values in range [firstOfRange,endx)
func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
if endx-firstOfRange == maxCapacity {
//p("endx-firstOfRange == maxCapacity")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.cardinality = maxCapacity - bc.cardinality
//p("bc.cardinality is now %v", bc.cardinality)
} else if endx-firstOfRange > maxCapacity/2 {
//p("endx-firstOfRange > maxCapacity/2")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.computeCardinality()
} else {
@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container {
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container {
answer := bc
c := value2.getCardinality()
for k := 0; k < c; k++ {
for k := 0; k+3 < c; k += 4 {
content := (*[4]uint16)(unsafe.Pointer(&value2.content[k]))
vc0 := content[0]
i0 := uint(vc0) >> 6
answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64))
vc1 := content[1]
i1 := uint(vc1) >> 6
answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64))
vc2 := content[2]
i2 := uint(vc2) >> 6
answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64))
vc3 := content[3]
i3 := uint(vc3) >> 6
answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64))
}
for k := c &^ 3; k < c; k++ {
vc := value2.content[k]
i := uint(vc) >> 6
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64))
}
answer.cardinality = invalidCardinality
return answer
}
@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container {
}
func (bc *bitmapContainer) iandNot(a container) container {
//p("bitmapContainer.iandNot() starting")
switch x := a.(type) {
case *arrayContainer:
return bc.iandNotArray(x)
@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container {
return ac
}
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer {
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container {
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
for k := 0; k < len(bc.bitmap); k++ {
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
}
bc.cardinality = newCardinality
if bc.getCardinality() <= arrayDefaultMaxSize {
return bc.toArrayContainer()
}
return bc
}
@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int {
return -1
}
func (bc *bitmapContainer) PrevSetBit(i int) int {
if i < 0 {
return -1
}
x := i / 64
if x >= len(bc.bitmap) {
return -1
}
w := bc.bitmap[x]
b := i % 64
w = w << uint(63-b)
if w != 0 {
return i - countLeadingZeros(w)
}
x--
for ; x >= 0; x-- {
if bc.bitmap[x] != 0 {
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
}
}
return -1
}
// reference the java implementation
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892
//
@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
func (bc *bitmapContainer) containerType() contype {
return bitmapContype
}
func (bc *bitmapContainer) addOffset(x uint16) []container {
low := newBitmapContainer()
high := newBitmapContainer()
b := uint32(x) >> 6
i := uint32(x) % 64
end := uint32(1024) - b
if i == 0 {
copy(low.bitmap[b:], bc.bitmap[:end])
copy(high.bitmap[:b], bc.bitmap[end:])
} else {
low.bitmap[b] = bc.bitmap[0] << i
for k := uint32(1); k < end; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
low.bitmap[b+k] = newval
}
for k := end; k < 1024; k++ {
newval := bc.bitmap[k] << i
if newval == 0 {
newval = bc.bitmap[k-1] >> (64 - i)
}
high.bitmap[k-end] = newval
}
high.bitmap[b] = bc.bitmap[1023] >> (64 - i)
}
low.computeCardinality()
high.computeCardinality()
return []container{low, high}
}

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "cardinality"
@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainer) Msgsize() (s int) {
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "ptr"
@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainerShortIterator) Msgsize() (s int) {
s = 1 + 4
if z.ptr == nil {

161
vendor/github.com/RoaringBitmap/roaring/byte_input.go generated vendored Normal file
View file

@ -0,0 +1,161 @@
package roaring
import (
"encoding/binary"
"io"
)
type byteInput interface {
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
next(n int) ([]byte, error)
// readUInt32 reads uint32 with LittleEndian order
readUInt32() (uint32, error)
// readUInt16 reads uint16 with LittleEndian order
readUInt16() (uint16, error)
// getReadBytes returns read bytes
getReadBytes() int64
// skipBytes skips exactly n bytes
skipBytes(n int) error
}
func newByteInputFromReader(reader io.Reader) byteInput {
return &byteInputAdapter{
r: reader,
readBytes: 0,
}
}
func newByteInput(buf []byte) byteInput {
return &byteBuffer{
buf: buf,
off: 0,
}
}
type byteBuffer struct {
buf []byte
off int
}
// next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *byteBuffer) next(n int) ([]byte, error) {
m := len(b.buf) - b.off
if n > m {
return nil, io.ErrUnexpectedEOF
}
data := b.buf[b.off : b.off+n]
b.off += n
return data, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteBuffer) readUInt32() (uint32, error) {
if len(b.buf)-b.off < 4 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint32(b.buf[b.off:])
b.off += 4
return v, nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteBuffer) readUInt16() (uint16, error) {
if len(b.buf)-b.off < 2 {
return 0, io.ErrUnexpectedEOF
}
v := binary.LittleEndian.Uint16(b.buf[b.off:])
b.off += 2
return v, nil
}
// getReadBytes returns read bytes
func (b *byteBuffer) getReadBytes() int64 {
return int64(b.off)
}
// skipBytes skips exactly n bytes
func (b *byteBuffer) skipBytes(n int) error {
m := len(b.buf) - b.off
if n > m {
return io.ErrUnexpectedEOF
}
b.off += n
return nil
}
// reset resets the given buffer with a new byte slice
func (b *byteBuffer) reset(buf []byte) {
b.buf = buf
b.off = 0
}
type byteInputAdapter struct {
r io.Reader
readBytes int
}
// next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *byteInputAdapter) next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
if err != nil {
return nil, err
}
return buf, nil
}
// readUInt32 reads uint32 with LittleEndian order
func (b *byteInputAdapter) readUInt32() (uint32, error) {
buf, err := b.next(4)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(buf), nil
}
// readUInt16 reads uint16 with LittleEndian order
func (b *byteInputAdapter) readUInt16() (uint16, error) {
buf, err := b.next(2)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint16(buf), nil
}
// getReadBytes returns read bytes
func (b *byteInputAdapter) getReadBytes() int64 {
return int64(b.readBytes)
}
// skipBytes skips exactly n bytes
func (b *byteInputAdapter) skipBytes(n int) error {
_, err := b.next(n)
return err
}
// reset resets the given buffer with a new stream
func (b *byteInputAdapter) reset(stream io.Reader) {
b.r = stream
b.readBytes = 0
}

11
vendor/github.com/RoaringBitmap/roaring/clz.go generated vendored Normal file
View file

@ -0,0 +1,11 @@
// +build go1.9
// "go1.9", from Go version 1.9 onward
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
package roaring
import "math/bits"
func countLeadingZeros(x uint64) int {
return bits.LeadingZeros64(x)
}

36
vendor/github.com/RoaringBitmap/roaring/clz_compat.go generated vendored Normal file
View file

@ -0,0 +1,36 @@
// +build !go1.9
package roaring
// LeadingZeroBits returns the number of consecutive most significant zero
// bits of x.
func countLeadingZeros(i uint64) int {
if i == 0 {
return 64
}
n := 1
x := uint32(i >> 32)
if x == 0 {
n += 32
x = uint32(i)
}
if (x >> 16) == 0 {
n += 16
x <<= 16
}
if (x >> 24) == 0 {
n += 8
x <<= 8
}
if x>>28 == 0 {
n += 4
x <<= 4
}
if x>>30 == 0 {
n += 2
x <<= 2
}
n -= int(x >> 31)
return n
}

16
vendor/github.com/RoaringBitmap/roaring/go.mod generated vendored Normal file
View file

@ -0,0 +1,16 @@
module github.com/RoaringBitmap/roaring
go 1.12
require (
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/golang/snappy v0.0.1 // indirect
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
github.com/philhofer/fwd v1.0.0 // indirect
github.com/stretchr/testify v1.4.0
github.com/tinylib/msgp v1.1.0
github.com/willf/bitset v1.1.10
)

30
vendor/github.com/RoaringBitmap/roaring/go.sum generated vendored Normal file
View file

@ -0,0 +1,30 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -4,12 +4,7 @@ type manyIterable interface {
nextMany(hs uint32, buf []uint32) int
}
type manyIterator struct {
slice []uint16
loc int
}
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
n := 0
l := si.loc
s := si.slice

View file

@ -143,8 +143,8 @@ func toBitmapContainer(c container) container {
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
expectedKeys := -1
appendedKeys := 0
keys := make([]uint16, 0)
containers := make([]container, 0)
var keys []uint16
var containers []container
for appendedKeys != expectedKeys {
select {
case item := <-resultChan:
@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
// (if it is set to 0, a default number of workers is chosen)
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
var lKey uint16 = MaxUint16
var hKey uint16 = 0
var hKey uint16
bitmapsFiltered := bitmaps[:0]
for _, b := range bitmaps {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,163 +0,0 @@
package roaring
import (
"fmt"
)
// common to rle32.go and rle16.go
// rleVerbose controls whether p() prints show up.
// The testing package sets this based on
// testing.Verbose().
var rleVerbose bool
// p is a shorthand for fmt.Printf with beginning and
// trailing newlines. p() makes it easy
// to add diagnostic print statements.
func p(format string, args ...interface{}) {
if rleVerbose {
fmt.Printf("\n"+format+"\n", args...)
}
}
// MaxUint32 is the largest uint32 value.
const MaxUint32 = 4294967295
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
const MaxUint16 = 65535
// searchOptions allows us to accelerate runContainer32.search with
// prior knowledge of (mostly lower) bounds. This is used by Union
// and Intersect.
type searchOptions struct {
// start here instead of at 0
startIndex int64
// upper bound instead of len(rc.iv);
// endxIndex == 0 means ignore the bound and use
// endxIndex == n ==len(rc.iv) which is also
// naturally the default for search()
// when opt = nil.
endxIndex int64
}
// And finds the intersection of rc and b.
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
for i := p.start; i <= p.last; i++ {
if b.Contains(i) {
out.Add(i)
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
if out.Contains(v) {
out.RemoveRange(uint64(v), uint64(v+1))
} else {
out.Add(v)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
for v := p.start; v <= p.last; v++ {
out.Add(v)
}
}
return out
}
// trial is used in the randomized testing of runContainers
type trial struct {
n int
percentFill float64
ntrial int
// only in the union test
// only subtract test
percentDelete float64
// only in 067 randomized operations
// we do this + 1 passes
numRandomOpsPass int
// allow sampling range control
// only recent tests respect this.
srang *interval16
}
// And finds the intersection of rc and b.
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
out := NewBitmap()
for _, p := range rc.iv {
plast := p.last()
for i := p.start; i <= plast; i++ {
if b.Contains(uint32(i)) {
out.Add(uint32(i))
}
}
}
return out
}
// Xor returns the exclusive-or of rc and b.
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
w := uint32(v)
if out.Contains(w) {
out.RemoveRange(uint64(w), uint64(w+1))
} else {
out.Add(w)
}
}
}
return out
}
// Or returns the union of rc and b.
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
out := b.Clone()
for _, p := range rc.iv {
plast := p.last()
for v := p.start; v <= plast; v++ {
out.Add(uint32(v))
}
}
return out
}
//func (rc *runContainer32) and(container) container {
// panic("TODO. not yet implemented")
//}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer16. This is for the
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
func (rc *runContainer16) serializedSizeInBytes() int {
// number of runs in one uint16, then each run
// needs two more uint16
return 2 + len(rc.iv)*4
}
// serializedSizeInBytes returns the number of bytes of memory
// required by this runContainer32.
func (rc *runContainer32) serializedSizeInBytes() int {
return 4 + len(rc.iv)*8
}

View file

@ -1,695 +0,0 @@
package roaring
///////////////////////////////////////////////////
//
// container interface methods for runContainer16
//
///////////////////////////////////////////////////
import (
"fmt"
)
// compile time verify we meet interface requirements
var _ container = &runContainer16{}
func (rc *runContainer16) clone() container {
return newRunContainer16CopyIv(rc.iv)
}
func (rc *runContainer16) minimum() uint16 {
return rc.iv[0].start // assume not empty
}
func (rc *runContainer16) maximum() uint16 {
return rc.iv[len(rc.iv)-1].last() // assume not empty
}
func (rc *runContainer16) isFull() bool {
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
}
func (rc *runContainer16) and(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.intersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.andBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) andCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.intersectCardinality(c))
case *arrayContainer:
return rc.andArrayCardinality(c)
case *bitmapContainer:
return rc.andBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// andBitmapContainer finds the intersection of rc and b.
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.andBitmap(bc)
}
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
pos := 0
answer := 0
maxpos := ac.getCardinality()
if maxpos == 0 {
return 0 // won't happen in actual code
}
v := ac.content[pos]
mainloop:
for _, p := range rc.iv {
for v < p.start {
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
for v <= p.last() {
answer++
pos++
if pos == maxpos {
break mainloop
}
v = ac.content[pos]
}
}
return answer
}
func (rc *runContainer16) iand(a container) container {
if rc.isFull() {
return a.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceIntersect(c)
case *arrayContainer:
return rc.andArray(c)
case *bitmapContainer:
return rc.iandBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
// TODO: optimize by doing less allocation, possibly?
// sect will be new
sect := rc.intersect(rc2)
*rc = *sect
return rc
}
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
isect := rc.andBitmapContainer(bc)
*rc = *newRunContainer16FromContainer(isect)
return rc
}
func (rc *runContainer16) andArray(ac *arrayContainer) container {
if len(rc.iv) == 0 {
return newArrayContainer()
}
acCardinality := ac.getCardinality()
c := newArrayContainerCapacity(acCardinality)
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
iv := rc.iv[rlePos]
arrayVal := ac.content[arrayPos]
for iv.last() < arrayVal {
rlePos++
if rlePos == len(rc.iv) {
return c
}
iv = rc.iv[rlePos]
}
if iv.start > arrayVal {
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
} else {
c.content = append(c.content, arrayVal)
arrayPos++
}
}
return c
}
func (rc *runContainer16) andNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.andNotArray(c)
case *bitmapContainer:
return rc.andNotBitmap(c)
case *runContainer16:
return rc.andNotRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
k := 0
var val int64
for _, p := range rc.iv {
n := p.runlen()
for j := int64(0); j < n; j++ {
val = int64(p.start) + j
x[k+i] = uint32(val) | mask
k++
}
}
}
func (rc *runContainer16) getShortIterator() shortIterable {
return rc.newRunIterator16()
}
func (rc *runContainer16) getManyIterator() manyIterable {
return rc.newManyRunIterator16()
}
// add the values in the range [firstOfRange, endx). endx
// is still abe to express 2^16 because it is an int not an uint16.
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
}
addme := newRunContainer16TakeOwnership([]interval16{
{
start: uint16(firstOfRange),
length: uint16(endx - 1 - firstOfRange),
},
})
*rc = *rc.union(addme)
return rc
}
// remove the values in the range [firstOfRange,endx)
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
" nothing to do.", firstOfRange, endx))
//return rc
}
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
rc.isubtract(x)
return rc
}
// not flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
return rc.Not(firstOfRange, endx)
}
// Not flips the values in the range [firstOfRange,endx).
// This is not inplace. Only the returned value has the flipped bits.
//
// Currently implemented as (!A intersect B) union (A minus B),
// where A is rc, and B is the supplied [firstOfRange, endx) interval.
//
// TODO(time optimization): convert this to a single pass
// algorithm by copying AndNotRunContainer16() and modifying it.
// Current routine is correct but
// makes 2 more passes through the arrays than should be
// strictly necessary. Measure both ways though--this may not matter.
//
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
}
if firstOfRange >= endx {
return rc.Clone()
}
a := rc
// algo:
// (!A intersect B) union (A minus B)
nota := a.invert()
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
b := newRunContainer16TakeOwnership(bs)
notAintersectB := nota.intersect(b)
aMinusB := a.AndNotRunContainer16(b)
rc2 := notAintersectB.union(aMinusB)
return rc2
}
// equals is now logical equals; it does not require the
// same underlying container type.
func (rc *runContainer16) equals(o container) bool {
srb, ok := o.(*runContainer16)
if !ok {
// maybe value instead of pointer
val, valok := o.(*runContainer16)
if valok {
srb = val
ok = true
}
}
if ok {
// Check if the containers are the same object.
if rc == srb {
return true
}
if len(srb.iv) != len(rc.iv) {
return false
}
for i, v := range rc.iv {
if v != srb.iv[i] {
return false
}
}
return true
}
// use generic comparison
if o.getCardinality() != rc.getCardinality() {
return false
}
rit := rc.getShortIterator()
bit := o.getShortIterator()
//k := 0
for rit.hasNext() {
if bit.next() != rit.next() {
return false
}
//k++
}
return true
}
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
rc.Add(x)
return rc
}
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
return rc.Add(x)
}
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
rc.removeKey(x)
return rc
}
func (rc *runContainer16) iremove(x uint16) bool {
return rc.removeKey(x)
}
func (rc *runContainer16) or(a container) container {
if rc.isFull() {
return rc.clone()
}
switch c := a.(type) {
case *runContainer16:
return rc.union(c)
case *arrayContainer:
return rc.orArray(c)
case *bitmapContainer:
return rc.orBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) orCardinality(a container) int {
switch c := a.(type) {
case *runContainer16:
return int(rc.unionCardinality(c))
case *arrayContainer:
return rc.orArrayCardinality(c)
case *bitmapContainer:
return rc.orBitmapContainerCardinality(c)
}
panic("unsupported container type")
}
// orBitmapContainer finds the union of rc and bc.
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
bc2 := newBitmapContainerFromRun(rc)
return bc2.iorBitmap(bc)
}
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
answer := 0
for i := range rc.iv {
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
}
//bc.computeCardinality()
return answer
}
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArray(ac *arrayContainer) container {
bc1 := newBitmapContainerFromRun(rc)
bc2 := ac.toBitmapContainer()
return bc1.orBitmap(bc2)
}
// orArray finds the union of rc and ac.
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
}
func (rc *runContainer16) ior(a container) container {
if rc.isFull() {
return rc
}
switch c := a.(type) {
case *runContainer16:
return rc.inplaceUnion(c)
case *arrayContainer:
return rc.iorArray(c)
case *bitmapContainer:
return rc.iorBitmapContainer(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
for _, p := range rc2.iv {
last := int64(p.last())
for i := int64(p.start); i <= last; i++ {
rc.Add(uint16(i))
}
}
return rc
}
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
it := bc.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
it := ac.getShortIterator()
for it.hasNext() {
rc.Add(it.next())
}
return rc
}
// lazyIOR is described (not yet implemented) in
// this nice note from @lemire on
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
//
// Description of lazyOR and lazyIOR from @lemire:
//
// Lazy functions are optional and can be simply
// wrapper around non-lazy functions.
//
// The idea of "laziness" is as follows. It is
// inspired by the concept of lazy evaluation
// you might be familiar with (functional programming
// and all that). So a roaring bitmap is
// such that all its containers are, in some
// sense, chosen to use as little memory as
// possible. This is nice. Also, all bitsets
// are "cardinality aware" so that you can do
// fast rank/select queries, or query the
// cardinality of the whole bitmap... very fast,
// without latency.
//
// However, imagine that you are aggregating 100
// bitmaps together. So you OR the first two, then OR
// that with the third one and so forth. Clearly,
// intermediate bitmaps don't need to be as
// compressed as possible, right? They can be
// in a "dirty state". You only need the end
// result to be in a nice state... which you
// can achieve by calling repairAfterLazy at the end.
//
// The Java/C code does something special for
// the in-place lazy OR runs. The idea is that
// instead of taking two run containers and
// generating a new one, we actually try to
// do the computation in-place through a
// technique invented by @gssiyankai (pinging him!).
// What you do is you check whether the host
// run container has lots of extra capacity.
// If it does, you move its data at the end of
// the backing array, and then you write
// the answer at the beginning. What this
// trick does is minimize memory allocations.
//
func (rc *runContainer16) lazyIOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.ior(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyIorArray(c)
case *bitmapContainer:
return rc.lazyIorBitmap(c)
case *runContainer16:
return rc.lazyIorRun16(c)
}
panic("unsupported container type")
*/
}
// lazyOR is described above in lazyIOR.
func (rc *runContainer16) lazyOR(a container) container {
// not lazy at the moment
// TODO: make it lazy
return rc.or(a)
/*
switch c := a.(type) {
case *arrayContainer:
return rc.lazyOrArray(c)
case *bitmapContainer:
return rc.lazyOrBitmap(c)
case *runContainer16:
return rc.lazyOrRunContainer16(c)
}
panic("unsupported container type")
*/
}
func (rc *runContainer16) intersects(a container) bool {
// TODO: optimize by doing inplace/less allocation, possibly?
isect := rc.and(a)
return isect.getCardinality() > 0
}
func (rc *runContainer16) xor(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.xorArray(c)
case *bitmapContainer:
return rc.xorBitmap(c)
case *runContainer16:
return rc.xorRunContainer16(c)
}
panic("unsupported container type")
}
func (rc *runContainer16) iandNot(a container) container {
switch c := a.(type) {
case *arrayContainer:
return rc.iandNotArray(c)
case *bitmapContainer:
return rc.iandNotBitmap(c)
case *runContainer16:
return rc.iandNotRunContainer16(c)
}
panic("unsupported container type")
}
// flip the values in the range [firstOfRange,endx)
func (rc *runContainer16) inot(firstOfRange, endx int) container {
if firstOfRange >= endx {
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
}
// TODO: minimize copies, do it all inplace; not() makes a copy.
rc = rc.Not(firstOfRange, endx)
return rc
}
func (rc *runContainer16) getCardinality() int {
return int(rc.cardinality())
}
func (rc *runContainer16) rank(x uint16) int {
n := int64(len(rc.iv))
xx := int64(x)
w, already, _ := rc.search(xx, nil)
if w < 0 {
return 0
}
if !already && w == n-1 {
return rc.getCardinality()
}
var rnk int64
if !already {
for i := int64(0); i <= w; i++ {
rnk += rc.iv[i].runlen()
}
return int(rnk)
}
for i := int64(0); i < w; i++ {
rnk += rc.iv[i].runlen()
}
rnk += int64(x-rc.iv[w].start) + 1
return int(rnk)
}
func (rc *runContainer16) selectInt(x uint16) int {
return rc.selectInt16(x)
}
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
return rc.AndNotRunContainer16(b)
}
func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.andNotBitmap(acb)
}
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.andNotBitmap(bc)
}
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
bc := newBitmapContainer()
for i := range rc.iv {
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
bc.computeCardinality()
return bc
}
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
rcb.iandNotBitmapSurely(x2b)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
rcb.iandNotBitmapSurely(acb)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
rcb.iandNotBitmapSurely(bc)
// TODO: check size and optimize the return value
// TODO: is inplace modification really required? If not, elide the copy.
rc2 := newRunContainer16FromBitmapContainer(rcb)
*rc = *rc2
return rc
}
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
rcb := rc.toBitmapContainer()
x2b := x2.toBitmapContainer()
return rcb.xorBitmap(x2b)
}
func (rc *runContainer16) xorArray(ac *arrayContainer) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
return rcb.xorBitmap(acb)
}
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
rcb := rc.toBitmapContainer()
return rcb.xorBitmap(bc)
}
// convert to bitmap or array *if needed*
func (rc *runContainer16) toEfficientContainer() container {
// runContainer16SerializedSizeInBytes(numRuns)
sizeAsRunContainer := rc.getSizeInBytes()
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
card := int(rc.cardinality())
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return rc
}
if card <= arrayDefaultMaxSize {
return rc.toArrayContainer()
}
bc := newBitmapContainerFromRun(rc)
return bc
}
func (rc *runContainer16) toArrayContainer() *arrayContainer {
ac := newArrayContainer()
for i := range rc.iv {
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
}
return ac
}
func newRunContainer16FromContainer(c container) *runContainer16 {
switch x := c.(type) {
case *runContainer16:
return x.Clone()
case *arrayContainer:
return newRunContainer16FromArray(x)
case *bitmapContainer:
return newRunContainer16FromBitmapContainer(x)
}
panic("unsupported container type")
}

View file

@ -6,12 +6,12 @@
package roaring
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"strconv"
"sync"
)
// Bitmap represents a compressed bitmap where you can add integers.
@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
return rb.highlowcontainer.toBytes()
}
// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
// Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
// version of this bitmap to stream. The format is not
// compatible with the WriteTo() format, and is
// experimental: it may produce smaller on disk
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
return rb.highlowcontainer.readFrom(stream)
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
stream := byteInputAdapterPool.Get().(*byteInputAdapter)
stream.reset(reader)
p, err = rb.highlowcontainer.readFrom(stream)
byteInputAdapterPool.Put(stream)
return
}
// FromBuffer creates a bitmap from its serialized version stored in buffer
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
// You should *not* change the copy-on-write status of the resulting
// bitmaps (SetCopyOnWrite).
//
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) {
return rb.highlowcontainer.fromBuffer(buf)
// If buf becomes unavailable, then a bitmap created with
// FromBuffer would be effectively broken. Furthermore, any
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := byteBufferPool.Get().(*byteBuffer)
stream.reset(buf)
p, err = rb.highlowcontainer.readFrom(stream)
byteBufferPool.Put(stream)
return
}
var (
byteBufferPool = sync.Pool{
New: func() interface{} {
return &byteBuffer{}
},
}
byteInputAdapterPool = sync.Pool{
New: func() interface{} {
return &byteInputAdapter{}
},
}
)
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
func (rb *Bitmap) RunOptimize() {
rb.highlowcontainer.runOptimize()
@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool {
return rb.highlowcontainer.hasRunCompression()
}
// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
// Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
// version of this bitmap from stream. The format is
// expected is that written by the WriteToMsgpack()
// call; see additional notes there.
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
}
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
// (same as ToBytes)
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
writer := bufio.NewWriter(&buf)
_, err := rb.WriteTo(writer)
if err != nil {
return nil, err
}
err = writer.Flush()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
return rb.ToBytes()
}
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
func (rb *Bitmap) UnmarshalBinary(data []byte) error {
var buf bytes.Buffer
_, err := buf.Write(data)
if err != nil {
return err
}
reader := bufio.NewReader(&buf)
_, err = rb.ReadFrom(reader)
r := bytes.NewReader(data)
_, err := rb.ReadFrom(r)
return err
}
@ -215,10 +233,20 @@ type IntIterable interface {
Next() uint32
}
// IntPeekable allows you to look at the next value without advancing and
// advance as long as the next value is smaller than minval
type IntPeekable interface {
IntIterable
// PeekNext peeks the next value without advancing the iterator
PeekNext() uint32
// AdvanceIfNeeded advances as long as the next value is smaller than minval
AdvanceIfNeeded(minval uint32)
}
type intIterator struct {
pos int
hs uint32
iter shortIterable
iter shortPeekable
highlowcontainer *roaringArray
}
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
return x
}
// PeekNext peeks the next value without advancing the iterator
func (ii *intIterator) PeekNext() uint32 {
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
}
// AdvanceIfNeeded advances as long as the next value is smaller than minval
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
to := minval >> 16
for ii.HasNext() && (ii.hs>>16) < to {
ii.pos++
ii.init()
}
if ii.HasNext() && (ii.hs>>16) == to {
ii.iter.advanceIfNeeded(lowbits(minval))
if !ii.iter.hasNext() {
ii.pos++
ii.init()
}
}
}
func newIntIterator(a *Bitmap) *intIterator {
p := new(intIterator)
p.pos = 0
@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator {
return p
}
type intReverseIterator struct {
pos int
hs uint32
iter shortIterable
highlowcontainer *roaringArray
}
// HasNext returns true if there are more integers to iterate over
func (ii *intReverseIterator) HasNext() bool {
return ii.pos >= 0
}
func (ii *intReverseIterator) init() {
if ii.pos >= 0 {
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
} else {
ii.iter = nil
}
}
// Next returns the next integer
func (ii *intReverseIterator) Next() uint32 {
x := uint32(ii.iter.next()) | ii.hs
if !ii.iter.hasNext() {
ii.pos = ii.pos - 1
ii.init()
}
return x
}
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
p := new(intReverseIterator)
p.highlowcontainer = &a.highlowcontainer
p.pos = a.highlowcontainer.size() - 1
p.init()
return p
}
// ManyIntIterable allows you to iterate over the values in a Bitmap
type ManyIntIterable interface {
// pass in a buffer to fill up with values, returns how many values were returned
@ -325,12 +416,20 @@ func (rb *Bitmap) String() string {
return buffer.String()
}
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order
func (rb *Bitmap) Iterator() IntIterable {
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
return newIntIterator(rb)
}
// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order
// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ReverseIterator() IntIterable {
return newIntReverseIterator(rb)
}
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) ManyIterator() ManyIntIterable {
return newManyIntIterator(rb)
}
@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool {
return false
}
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
containerOffset := highbits(offset)
inOffset := lowbits(offset)
if inOffset == 0 {
answer = x.Clone()
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
key := answer.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
answer.highlowcontainer.keys[pos] = key
}
} else {
answer = New()
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
key := x.highlowcontainer.getKeyAtIndex(pos)
key += containerOffset
c := x.highlowcontainer.getContainerAtIndex(pos)
offsetted := c.addOffset(inOffset)
if offsetted[0].getCardinality() > 0 {
curSize := answer.highlowcontainer.size()
lastkey := uint16(0)
if curSize > 0 {
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1)
}
if curSize > 0 && lastkey == key {
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
orrseult := prev.ior(offsetted[0])
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
} else {
answer.highlowcontainer.appendContainer(key, offsetted[0], false)
}
}
if offsetted[1].getCardinality() > 0 {
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false)
}
}
}
return answer
}
// Add the integer x to the bitmap
func (rb *Bitmap) Add(x uint32) {
hb := highbits(x)
@ -794,11 +933,6 @@ main:
}
}
/*func (rb *Bitmap) Or(x2 *Bitmap) {
results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage
rb.highlowcontainer = results.highlowcontainer
}*/
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap
func (rb *Bitmap) AndNot(x2 *Bitmap) {
pos1 := 0
@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
return
}
hbStart := highbits(uint32(rangeStart))
lbStart := lowbits(uint32(rangeStart))
hbLast := highbits(uint32(rangeEnd - 1))
lbLast := lowbits(uint32(rangeEnd - 1))
hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ {
@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
containerLast = uint32(lbLast)
}
i := rb.highlowcontainer.getIndex(hb)
i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
}
} else { // *think* the range of ones must never be
// empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
}
}
}
@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
var max uint32 = maxLowBit
for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ {
for hb := hbStart; hb <= hbLast; hb++ {
containerStart := uint32(0)
if hb == uint16(hbStart) {
if hb == hbStart {
containerStart = lbStart
}
containerLast := max
if hb == uint16(hbLast) {
if hb == hbLast {
containerLast = lbLast
}
i := rb.highlowcontainer.getIndex(hb)
i := rb.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
rb.highlowcontainer.setContainerAtIndex(i, c)
} else { // *think* the range of ones must never be
// empty.
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
}
}
}
@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
}
answer := NewBitmap()
hbStart := highbits(uint32(rangeStart))
lbStart := lowbits(uint32(rangeStart))
hbLast := highbits(uint32(rangeEnd - 1))
lbLast := lowbits(uint32(rangeEnd - 1))
hbStart := uint32(highbits(uint32(rangeStart)))
lbStart := uint32(lowbits(uint32(rangeStart)))
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
// copy the containers before the active area
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart)
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
var max uint32 = maxLowBit
for hb := hbStart; hb <= hbLast; hb++ {
@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
containerLast = uint32(lbLast)
}
i := bm.highlowcontainer.getIndex(hb)
j := answer.highlowcontainer.getIndex(hb)
i := bm.highlowcontainer.getIndex(uint16(hb))
j := answer.highlowcontainer.getIndex(uint16(hb))
if i >= 0 {
c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
if c.getCardinality() > 0 {
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c)
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
}
} else { // *think* the range of ones must never be
// empty.
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb,
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
rangeOfOnes(int(containerStart), int(containerLast)))
}
}
// copy the containers after the active area.
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast)
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
return answer
}
@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
return rb.highlowcontainer.copyOnWrite
}
// CloneCopyOnWriteContainers clones all containers which have
// needCopyOnWrite set to true.
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to, after
// calling FromBuffer.
// More generally this function is useful if you call FromBuffer
// to construct a bitmap with a backing array buf
// and then later discard the buf array. Note that you should call
// CloneCopyOnWriteContainers on all bitmaps that were derived
// from the 'FromBuffer' bitmap since they map have dependencies
// on the buf array as well.
func (rb *Bitmap) CloneCopyOnWriteContainers() {
rb.highlowcontainer.cloneCopyOnWriteContainers()
}
// FlipInt calls Flip after casting the parameters (convenience method)
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))

View file

@ -4,16 +4,16 @@ import (
"bytes"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp"
"io"
)
//go:generate msgp -unexported
type container interface {
addOffset(uint16) []container
clone() container
and(container) container
andCardinality(container) int
@ -37,7 +37,8 @@ type container interface {
not(start, final int) container // range is [firstOfRange,lastOfRange)
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container
getShortIterator() shortIterable
getShortIterator() shortPeekable
getReverseIterator() shortIterable
getManyIterator() manyIterable
contains(i uint16) bool
maximum() uint16
@ -61,7 +62,6 @@ type container interface {
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
selectInt(x uint16) int // selectInt returns the xth integer in the container
serializedSizeInBytes() int
readFrom(io.Reader) (int, error)
writeTo(io.Writer) (int, error)
numberOfRuns() int
@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray {
return &sa
}
// clone all containers which have needCopyOnWrite set to true
// This can be used to make sure it is safe to munmap a []byte
// that the roaring array may still have a reference to.
func (ra *roaringArray) cloneCopyOnWriteContainers() {
for i, needCopyOnWrite := range ra.needCopyOnWrite {
if needCopyOnWrite {
ra.containers[i] = ra.containers[i].clone()
ra.needCopyOnWrite[i] = false
}
}
}
// unused function:
//func (ra *roaringArray) containsKey(x uint16) bool {
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 {
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) toBytes() ([]byte, error) {
stream := &bytes.Buffer{}
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
hasRun := ra.hasRunCompression()
isRunSizeInBytes := 0
cookieSize := 8
@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
}
}
_, err := stream.Write(buf[:nw])
written, err := w.Write(buf[:nw])
if err != nil {
return nil, err
return n, err
}
for i, c := range ra.containers {
_ = i
_, err := c.writeTo(stream)
n += int64(written)
for _, c := range ra.containers {
written, err := c.writeTo(w)
if err != nil {
return nil, err
return n, err
}
n += int64(written)
}
return stream.Bytes(), nil
return n, nil
}
//
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
//
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
by, err := ra.toBytes()
if err != nil {
return 0, err
}
n, err := out.Write(by)
if err == nil && n < len(by) {
err = io.ErrShortWrite
}
return int64(n), err
func (ra *roaringArray) toBytes() ([]byte, error) {
var buf bytes.Buffer
_, err := ra.writeTo(&buf)
return buf.Bytes(), err
}
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
pos := 0
if len(buf) < 8 {
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
cookie, err := stream.readUInt32()
if err != nil {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
cookie := binary.LittleEndian.Uint32(buf)
pos += 4
var size uint32 // number of containers
haveRunContainers := false
var size uint32
var isRunBitmap []byte
// cookie header
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1) // number of containers
size = uint32(uint16(cookie>>16) + 1)
// create is-run-container bitmap
isRunBitmapSize := (int(size) + 7) / 8
if pos+isRunBitmapSize > len(buf) {
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
isRunBitmap, err = stream.next(isRunBitmapSize)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
}
isRunBitmap = buf[pos : pos+isRunBitmapSize]
pos += isRunBitmapSize
} else if cookie == serialCookieNoRunContainer {
size = binary.LittleEndian.Uint32(buf[pos:])
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
// keycard - is {key, cardinality} tuple slice
if pos+2*2*int(size) > len(buf) {
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
}
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
pos += 2 * 2 * int(size)
size, err = stream.readUInt32()
if !haveRunContainers || size >= noOffsetThreshold {
pos += 4 * int(size)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
} else {
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
}
// descriptive header
buf, err := stream.next(2 * 2 * int(size))
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
}
keycard := byteSliceAsUint16Slice(buf)
if isRunBitmap == nil || size >= noOffsetThreshold {
if err := stream.skipBytes(int(size) * 4); err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
}
}
// Allocate slices upfront as number of containers is known
@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
} else {
ra.containers = make([]container, size)
}
if cap(ra.keys) >= int(size) {
ra.keys = ra.keys[:size]
} else {
ra.keys = make([]uint16, size)
}
if cap(ra.needCopyOnWrite) >= int(size) {
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
} else {
@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
}
for i := uint32(0); i < size; i++ {
key := uint16(keycard[2*i])
key := keycard[2*i]
card := int(keycard[2*i+1]) + 1
ra.keys[i] = key
ra.needCopyOnWrite[i] = true
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
// run container
nr := binary.LittleEndian.Uint16(buf[pos:])
pos += 2
if pos+int(nr)*4 > len(buf) {
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
nr, err := stream.readUInt16()
if err != nil {
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
}
buf, err := stream.next(int(nr) * 4)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
}
nb := runContainer16{
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
iv: byteSliceAsInterval16Slice(buf),
card: int64(card),
}
pos += int(nr) * 4
ra.containers[i] = &nb
} else if card > arrayDefaultMaxSize {
// bitmap container
buf, err := stream.next(arrayDefaultMaxSize * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
}
nb := bitmapContainer{
cardinality: card,
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
bitmap: byteSliceAsUint64Slice(buf),
}
pos += arrayDefaultMaxSize * 2
ra.containers[i] = &nb
} else {
// array container
nb := arrayContainer{
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
buf, err := stream.next(card * 2)
if err != nil {
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
}
pos += card * 2
nb := arrayContainer{
byteSliceAsUint16Slice(buf),
}
ra.containers[i] = &nb
}
}
return int64(pos), nil
}
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
pos := 0
var cookie uint32
err := binary.Read(stream, binary.LittleEndian, &cookie)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
pos += 4
var size uint32
haveRunContainers := false
var isRun *bitmapContainer
if cookie&0x0000FFFF == serialCookie {
haveRunContainers = true
size = uint32(uint16(cookie>>16) + 1)
bytesToRead := (int(size) + 7) / 8
numwords := (bytesToRead + 7) / 8
by := make([]byte, bytesToRead, numwords*8)
nr, err := io.ReadFull(stream, by)
if err != nil {
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
}
pos += bytesToRead
by = by[:cap(by)]
isRun = newBitmapContainer()
for i := 0; i < numwords; i++ {
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
by = by[8:]
}
} else if cookie == serialCookieNoRunContainer {
err = binary.Read(stream, binary.LittleEndian, &size)
if err != nil {
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
}
pos += 4
} else {
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
}
if size > (1 << 16) {
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
}
// descriptive header
keycard := make([]uint16, 2*size, 2*size)
err = binary.Read(stream, binary.LittleEndian, keycard)
if err != nil {
return 0, err
}
pos += 2 * 2 * int(size)
// offset header
if !haveRunContainers || size >= noOffsetThreshold {
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
pos += 4 * int(size)
}
for i := uint32(0); i < size; i++ {
key := int(keycard[2*i])
card := int(keycard[2*i+1]) + 1
if haveRunContainers && isRun.contains(uint16(i)) {
nb := newRunContainer16()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(uint16(key), nb, false)
} else if card > arrayDefaultMaxSize {
nb := newBitmapContainer()
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
nb.cardinality = card
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
} else {
nb := newArrayContainerSize(card)
nr, err := nb.readFrom(stream)
if err != nil {
return 0, err
}
pos += nr
ra.appendContainer(keycard[2*i], nb, false)
}
}
return int64(pos), nil
return stream.getReadBytes(), nil
}
func (ra *roaringArray) hasRunCompression() bool {

View file

@ -8,7 +8,7 @@ import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "t"
@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *containerSerz) Msgsize() (s int) {
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize()
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
{
var zajw uint8
@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteUint8(uint8(z))
if err != nil {
@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z contype) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendUint8(o, uint8(z))
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
{
var zwht uint8
@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z contype) Msgsize() (s int) {
s = msgp.Uint8Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// write "keys"
@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *roaringArray) Msgsize() (s int) {
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize
for zxhx := range z.conserz {

View file

@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 5
// write "runstart"
@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 5
@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *addHelper16) Msgsize() (s int) {
s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3
if z.rc == nil {
@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) {
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "start"
@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z interval16) Msgsize() (s int) {
s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "iv"
@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runContainer16) Msgsize() (s int) {
s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, err = dc.ReadInt64()
if err != nil {
return
}
default:
err = dc.Skip()
if err != nil {
@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// map header, size 3
// write "rc"
err = en.Append(0x84, 0xa2, 0x72, 0x63)
err = en.Append(0x83, 0xa2, 0x72, 0x63)
if err != nil {
return err
}
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
if err != nil {
return
}
// write "curSeq"
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
if err != nil {
return err
}
err = en.WriteInt64(z.curSeq)
if err != nil {
return
}
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
// map header, size 3
// string "rc"
o = append(o, 0x84, 0xa2, 0x72, 0x63)
o = append(o, 0x83, 0xa2, 0x72, 0x63)
if z.rc == nil {
o = msgp.AppendNil(o)
} else {
@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
// string "curPosInIndex"
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
o = msgp.AppendUint16(o, z.curPosInIndex)
// string "curSeq"
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
o = msgp.AppendInt64(o, z.curSeq)
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
if err != nil {
return
}
case "curSeq":
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *runIterator16) Msgsize() (s int) {
s = 1 + 3
if z.rc == nil {
@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) {
} else {
s += z.rc.Msgsize()
}
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
var zjpj uint32
zjpj, err = dc.ReadArrayHeader()
@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteArrayHeader(uint32(len(z)))
if err != nil {
@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendArrayHeader(o, uint32(len(z)))
@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
var zgmo uint32
zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts)
@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z uint16Slice) Msgsize() (s int) {
s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size))
return

View file

@ -2,8 +2,6 @@ package roaring
import (
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/tinylib/msgp/msgp"
@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
return stream.Write(buf)
}
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil)
if err != nil {
return 0, err
}
return stream.Write(bts)
}
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
bts, err := b.MarshalMsg(nil)
if err != nil {
@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
return stream.Write(bts)
}
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
err := msgp.Decode(stream, b)
return 0, err
}
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
b.iv = b.iv[:0]
b.card = 0
var numRuns uint16
err := binary.Read(stream, binary.LittleEndian, &numRuns)
if err != nil {
return 0, err
}
nr := int(numRuns)
encRun := make([]uint16, 2*nr)
by := make([]byte, 4*nr)
err = binary.Read(stream, binary.LittleEndian, &by)
if err != nil {
return 0, err
}
for i := range encRun {
if len(by) < 2 {
return 0, errCorruptedStream
}
encRun[i] = binary.LittleEndian.Uint16(by)
by = by[2:]
}
for i := 0; i < nr; i++ {
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
}
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
b.card += int64(encRun[i*2+1]) + 1
}
return 0, err
}

View file

@ -4,6 +4,7 @@ package roaring
import (
"encoding/binary"
"errors"
"io"
)
@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
}
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if b.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
// Write set
buf := make([]byte, 8*len(b.bitmap))
for i, v := range b.bitmap {
@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
return by
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
by := make([]byte, len(slice)*2)
for i, v := range slice {
binary.LittleEndian.PutUint16(by[i*2:], v)
}
return by
}
func byteSliceAsUint16Slice(slice []byte) []uint16 {
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")

View file

@ -3,8 +3,10 @@
package roaring
import (
"errors"
"io"
"reflect"
"runtime"
"unsafe"
)
@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
}
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
if bc.cardinality <= arrayDefaultMaxSize {
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
}
buf := uint64SliceAsByteSlice(bc.bitmap)
return stream.Write(buf)
}
// readFrom reads an arrayContainer from stream.
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
// *before* you call readFrom. We can't guess the size in the stream
// by this point.
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
buf := uint16SliceAsByteSlice(ac.content)
return io.ReadFull(stream, buf)
}
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
buf := uint64SliceAsByteSlice(bc.bitmap)
n, err := io.ReadFull(stream, buf)
bc.computeCardinality()
return n, err
}
func uint64SliceAsByteSlice(slice []uint64) []byte {
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
header.Len *= 8
header.Cap *= 8
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func uint16SliceAsByteSlice(slice []uint16) []byte {
@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
header.Len *= 2
header.Cap *= 2
// instantiate result and use KeepAlive so data isn't unmapped.
result := *(*[]byte)(unsafe.Pointer(&header))
runtime.KeepAlive(&slice)
// return it
return *(*[]byte)(unsafe.Pointer(&header))
return result
}
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
// Deserialization code follows
func byteSliceAsUint16Slice(slice []byte) []uint16 {
////
// These methods (byteSliceAsUint16Slice,...) do not make copies,
// they are pointer-based (unsafe). The caller is responsible to
// ensure that the input slice does not get garbage collected, deleted
// or modified while you hold the returned slince.
////
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
if len(slice)%2 != 0 {
panic("Slice size should be divisible by 2")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 2
header.Cap /= 2
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 2
rHeader.Cap = bHeader.Cap / 2
// return it
return *(*[]uint16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsUint64Slice(slice []byte) []uint64 {
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
if len(slice)%8 != 0 {
panic("Slice size should be divisible by 8")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 8
header.Cap /= 8
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 8
rHeader.Cap = bHeader.Cap / 8
// return it
return *(*[]uint64)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}
func byteSliceAsInterval16Slice(slice []byte) []interval16 {
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
if len(slice)%4 != 0 {
panic("Slice size should be divisible by 4")
}
// reference: https://go101.org/article/unsafe.html
// make a new slice header
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
// update its capacity and length
header.Len /= 4
header.Cap /= 4
// transfer the data from the given slice to a new variable (our result)
rHeader.Data = bHeader.Data
rHeader.Len = bHeader.Len / 4
rHeader.Cap = bHeader.Cap / 4
// return it
return *(*[]interval16)(unsafe.Pointer(&header))
// instantiate result and use KeepAlive so data isn't unmapped.
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
// return result
return
}

View file

@ -5,6 +5,12 @@ type shortIterable interface {
next() uint16
}
type shortPeekable interface {
shortIterable
peekNext() uint16
advanceIfNeeded(minval uint16)
}
type shortIterator struct {
slice []uint16
loc int
@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 {
si.loc++
return a
}
func (si *shortIterator) peekNext() uint16 {
return si.slice[si.loc]
}
func (si *shortIterator) advanceIfNeeded(minval uint16) {
if si.hasNext() && si.peekNext() < minval {
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
}
}
type reverseIterator struct {
slice []uint16
loc int
}
func (si *reverseIterator) hasNext() bool {
return si.loc >= 0
}
func (si *reverseIterator) next() uint16 {
a := si.slice[si.loc]
si.loc--
return a
}

View file

@ -14,6 +14,17 @@ const (
serialCookie = 12347 // runs, arrays, and bitmaps
noOffsetThreshold = 4
// MaxUint32 is the largest uint32 value.
MaxUint32 = 4294967295
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
// bound for ranges.
MaxRange uint64 = MaxUint32 + 1
// MaxUint16 is the largest 16 bit unsigned int.
// This is the largest value an interval16 can store.
MaxUint16 = 65535
// Compute wordSizeInBytes, the size of a word in bytes.
_m = ^uint64(0)
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) {
endword := (end - 1) / 64
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
for i := firstword; i < endword; i++ {
//p("flipBitmapRange on i=%v", i)
bitmap[i] = ^bitmap[i]
}
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
}
return b
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}
func maxUint16(a, b uint16) uint16 {
if a > b {
return a
}
return b
}
func minUint16(a, b uint16) uint16 {
if a < b {
return a
}
return b
}

View file

@ -3,9 +3,9 @@ sudo: false
language: go
go:
- "1.9.x"
- "1.10.x"
- "1.11.x"
- "1.12.x"
script:
- go get golang.org/x/tools/cmd/cover
@ -15,7 +15,12 @@ script:
- gvt restore
- go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/)
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
- go test ./test -v -indexType scorch
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
else
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
fi
- docs/project-code-coverage.sh
- docs/build_children.sh

View file

@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
return fieldLength, tokenFreqs
}
func (t *TextField) Analyzer() *analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte {
return t.value
}

View file

@ -37,6 +37,12 @@ var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
// Point represents a geo point.
type Point struct {
Lon float64
Lat float64
}
// MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 {
@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
}
return nil
}
func BoundingRectangleForPolygon(polygon []Point) (
float64, float64, float64, float64, error) {
err := checkLongitude(polygon[0].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[0].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY, minY := polygon[0].Lat, polygon[0].Lat
maxX, minX := polygon[0].Lon, polygon[0].Lon
for i := 1; i < len(polygon); i++ {
err := checkLongitude(polygon[i].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[i].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY = math.Max(maxY, polygon[i].Lat)
minY = math.Min(minY, polygon[i].Lat)
maxX = math.Max(maxX, polygon[i].Lon)
minX = math.Min(minX, polygon[i].Lon)
}
return minX, maxY, maxX, minY, nil
}

View file

@ -1,32 +1,21 @@
// The code here was obtained from:
// https://github.com/mmcloughlin/geohash
// The MIT License (MIT)
// Copyright (c) 2015 Michael McLoughlin
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This implementation is inspired from the geohash-js
// ref: https://github.com/davetroy/geohash-js
package geo
import (
"math"
)
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
return e
}
// Decode string into bits of a 64-bit word. The string s may be at most 12
// characters.
func (e *encoding) decode(s string) uint64 {
x := uint64(0)
for i := 0; i < len(s); i++ {
x = (x << 5) | uint64(e.dec[s[i]])
}
return x
}
// Encode bits of 64-bit word into a string.
func (e *encoding) encode(x uint64) string {
b := [12]byte{}
for i := 0; i < 12; i++ {
b[11-i] = e.enc[x&0x1f]
x >>= 5
}
return string(b[:])
}
// Base32Encoding with the Geohash alphabet.
// base32encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
// BoundingBox returns the region encoded by the given string geohash.
func geoBoundingBox(hash string) geoBox {
bits := uint(5 * len(hash))
inthash := base32encoding.decode(hash)
return geoBoundingBoxIntWithPrecision(inthash, bits)
}
var masks = []uint64{16, 8, 4, 2, 1}
// Box represents a rectangle in latitude/longitude space.
type geoBox struct {
minLat float64
maxLat float64
minLng float64
maxLng float64
}
// DecodeGeoHash decodes the string geohash faster with
// higher precision. This api is in experimental phase.
func DecodeGeoHash(geoHash string) (float64, float64) {
even := true
lat := []float64{-90.0, 90.0}
lon := []float64{-180.0, 180.0}
// Round returns a point inside the box, making an effort to round to minimal
// precision.
func (b geoBox) round() (lat, lng float64) {
x := maxDecimalPower(b.maxLat - b.minLat)
lat = math.Ceil(b.minLat/x) * x
x = maxDecimalPower(b.maxLng - b.minLng)
lng = math.Ceil(b.minLng/x) * x
return
}
// precalculated for performance
var exp232 = math.Exp2(32)
// errorWithPrecision returns the error range in latitude and longitude for in
// integer geohash with bits of precision.
func errorWithPrecision(bits uint) (latErr, lngErr float64) {
b := int(bits)
latBits := b / 2
lngBits := b - latBits
latErr = math.Ldexp(180.0, -latBits)
lngErr = math.Ldexp(360.0, -lngBits)
return
}
// minDecimalPlaces returns the minimum number of decimal places such that
// there must exist an number with that many places within any range of width
// r. This is intended for returning minimal precision coordinates inside a
// box.
func maxDecimalPower(r float64) float64 {
m := int(math.Floor(math.Log10(r)))
return math.Pow10(m)
}
// Encode the position of x within the range -r to +r as a 32-bit integer.
func encodeRange(x, r float64) uint32 {
p := (x + r) / (2 * r)
return uint32(p * exp232)
}
// Decode the 32-bit range encoding X back to a value in the range -r to +r.
func decodeRange(X uint32, r float64) float64 {
p := float64(X) / exp232
x := 2*r*p - r
return x
}
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
// ignored, and may take any value.
func squash(X uint64) uint32 {
X &= 0x5555555555555555
X = (X | (X >> 1)) & 0x3333333333333333
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
X = (X | (X >> 8)) & 0x0000ffff0000ffff
X = (X | (X >> 16)) & 0x00000000ffffffff
return uint32(X)
}
// Deinterleave the bits of X into 32-bit words containing the even and odd
// bitlevels of X, respectively.
func deinterleave(X uint64) (uint32, uint32) {
return squash(X), squash(X >> 1)
}
// BoundingBoxIntWithPrecision returns the region encoded by the integer
// geohash with the specified precision.
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
fullHash := hash << (64 - bits)
latInt, lngInt := deinterleave(fullHash)
lat := decodeRange(latInt, 90)
lng := decodeRange(lngInt, 180)
latErr, lngErr := errorWithPrecision(bits)
return geoBox{
minLat: lat,
maxLat: lat + latErr,
minLng: lng,
maxLng: lng + lngErr,
for i := 0; i < len(geoHash); i++ {
cd := uint64(base32encoding.dec[geoHash[i]])
for j := 0; j < 5; j++ {
if even {
if cd&masks[j] > 0 {
lon[0] = (lon[0] + lon[1]) / 2
} else {
lon[1] = (lon[0] + lon[1]) / 2
}
} else {
if cd&masks[j] > 0 {
lat[0] = (lat[0] + lat[1]) / 2
} else {
lat[1] = (lat[0] + lat[1]) / 2
}
}
even = !even
}
}
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
}
// ----------------------------------------------------------------------
func EncodeGeoHash(lat, lon float64) string {
even := true
lats := []float64{-90.0, 90.0}
lons := []float64{-180.0, 180.0}
precision := 12
var ch, bit uint64
var geoHash string
// Decode the string geohash to a (lat, lng) point.
func GeoHashDecode(hash string) (lat, lng float64) {
box := geoBoundingBox(hash)
return box.round()
for len(geoHash) < precision {
if even {
mid := (lons[0] + lons[1]) / 2
if lon > mid {
ch |= masks[bit]
lons[0] = mid
} else {
lons[1] = mid
}
} else {
mid := (lats[0] + lats[1]) / 2
if lat > mid {
ch |= masks[bit]
lats[0] = mid
} else {
lats[1] = mid
}
}
even = !even
if bit < 4 {
bit++
} else {
geoHash += string(base32encoding.enc[ch])
ch = 0
bit = 0
}
}
return geoHash
}

View file

@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
} else {
// geohash
lat, lon = GeoHashDecode(geoStr)
lat, lon = DecodeGeoHash(geoStr)
foundLat = true
foundLon = true
}

View file

@ -117,6 +117,8 @@ func (b *Batch) String() string {
// be re-used in the future.
func (b *Batch) Reset() {
b.internal.Reset()
b.lastDocSize = 0
b.totalSize = 0
}
func (b *Batch) Merge(o *Batch) {

View file

@ -121,6 +121,10 @@ type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
}
type IndexReaderContains interface {
FieldDictContains(field string) (FieldDictContains, error)
}
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@ -230,6 +234,10 @@ type FieldDict interface {
Close() error
}
type FieldDictContains interface {
Contains(key []byte) (bool, error)
}
// DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources.
type DocIDReader interface {

View file

@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
fileSegments++
}
}
}
// before the newMerge introduction, need to clean the newly
@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}
// In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil &&
@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
s.rootLock.Lock()
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
}
newSnapshot.updateSize()
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"os"
"strings"
"sync/atomic"
"time"
@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
var notifications []chan *IndexSnapshot
var filenames []string
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
segmentsToMerge = append(segmentsToMerge, zapSeg)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := zapSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
}
}
@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
if err != nil {
s.unmarkIneligibleForRemoval(filename)
return fmt.Errorf("merge validation failed: %v", err)
}
oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
}
}
// once all the newly merged segment introductions are done,
// its safe to unflip the removal ineligibility for the replaced
// older segments
for _, f := range filenames {
s.unmarkIneligibleForRemoval(f)
}
return nil
}
@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err
}
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
if err != nil {
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
}
// update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())

View file

@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@ -111,7 +114,6 @@ OUTER:
if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch
}
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers, po)
@ -150,11 +152,25 @@ OUTER:
_ = ourSnapshot.DecRef()
break OUTER
}
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
@ -179,7 +195,6 @@ OUTER:
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed {
s.removeOldData()
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER
}
@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext
}
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind
// First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// check the merger lag by counting the segment files on disk,
// Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _ := s.diskFileStats()
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
// On finding too many files on disk, persister pause until the merger
// catches up to reduce the segment file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
numFilesOnDisk, _ := s.diskFileStats()
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select {
@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
return lastMergedEpoch, persistWatchers
}
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
numFilesOnDisk, _ = s.diskFileStats()
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER:
for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
rv := &IndexSnapshot{
parent: s,
internal: make(map[string][]byte),
refs: 1,
creator: "loadSnapshot",
}
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
running += segmentSnapshot.segment.Count()
}
}
return rv, nil
}
@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() {
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
}
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
if removed > 0 {
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
}

View file

@ -41,12 +41,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
stats Stats
nextSegmentID uint64
path string
unsafeBatch bool
@ -73,8 +75,6 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
iStats internalStats
pauseLock sync.RWMutex
pauseCount uint64
@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis?
if len(batch.IndexOps) > 0 {
if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
s.rootLock.RLock()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
m["updates"] = m["TotUpdates"]

View file

@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return false, nil
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}

View file

@ -19,7 +19,10 @@
package segment
import "fmt"
import (
"errors"
"fmt"
)
const (
MaxVarintSize = 9
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
}
return b[length:], v, nil
}
// ------------------------------------------------------------
type MemUvarintReader struct {
C int // index of next byte to read from S
S []byte
}
func NewMemUvarintReader(s []byte) *MemUvarintReader {
return &MemUvarintReader{S: s}
}
// Len returns the number of unread bytes.
func (r *MemUvarintReader) Len() int {
n := len(r.S) - r.C
if n < 0 {
return 0
}
return n
}
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
// ReadUvarint reads an encoded uint64. The original code this was
// based on is at encoding/binary/ReadUvarint().
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
var x uint64
var s uint
var C = r.C
var S = r.S
for {
b := S[C]
C++
if b < 0x80 {
r.C = C
// why 63? The original code had an 'i += 1' loop var and
// checked for i > 9 || i == 9 ...; but, we no longer
// check for the i var, but instead check here for s,
// which is incremented by 7. So, 7*9 == 63.
//
// why the "extra" >= check? The normal case is that s <
// 63, so we check this single >= guard first so that we
// hit the normal, nil-error return pathway sooner.
if s >= 63 && (s > 63 || s == 63 && b > 1) {
return 0, ErrMemUvarintReaderOverflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}
// SkipUvarint skips ahead one encoded uint64.
func (r *MemUvarintReader) SkipUvarint() {
for {
b := r.S[r.C]
r.C++
if b < 0x80 {
return
}
}
}
// SkipBytes skips a count number of bytes.
func (r *MemUvarintReader) SkipBytes(count int) {
r.C = r.C + count
}
func (r *MemUvarintReader) Reset(s []byte) {
r.C = 0
r.S = s
}

View file

@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
s = s.Sub[0]
}
if s.Op == syntax.OpLiteral {
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}

View file

@ -59,6 +59,8 @@ type TermDictionary interface {
AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
Contains(key []byte) (bool, error)
}
type DictionaryIterator interface {

View file

@ -16,6 +16,7 @@ package zap
import (
"bufio"
"github.com/couchbase/vellum"
"math"
"os"
)
@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
}
sb.updateSize()

View file

@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
return rv
}
func (d *Dictionary) Contains(key []byte) (bool, error) {
return d.fst.Contains(key)
}
// Iterator returns an iterator for this dictionary
func (d *Dictionary) Iterator() segment.DictionaryIterator {
rv := &DictionaryIterator{
@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
}
// need to increment the end position to be inclusive
endBytes := []byte(end)
if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++
} else {
endBytes = append(endBytes, 0xff)
var endBytes []byte
if len(end) > 0 {
endBytes = []byte(end)
if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++
} else {
endBytes = append(endBytes, 0xff)
}
}
if d.fst != nil {

View file

@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
type docVisitState struct {
dvrs map[uint16]*docValueReader
segment *Segment
segment *SegmentBase
}
type docValueReader struct {
@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted {
return nil, fmt.Errorf("loadFieldDocValueReader: "+
"no docValues found for field: %s", field)
// no docValues found, nothing to do
return nil, nil
}
// read the number of chunks, and chunk offsets position
@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
} else {
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
}
fdvIter := &docValueReader{
@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
// VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState)
@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() {
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
err := dvr.loadDvChunk(docInChunk, s)
if err != nil {
return dvs, err
}
@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
func (s *Segment) VisitableDocValueFields() ([]string, error) {
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil
}

View file

@ -31,6 +31,14 @@ import (
var DefaultFileMergerBufferSize = 1024 * 1024
// ValidateMerge can be set by applications to perform additional checks
// on a new segment produced by a merge, by default this does nothing.
// Caller should provide EITHER segments or memSegments, but not both.
// This API is experimental and may be removed at any time.
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
return nil
}
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which

View file

@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
// ValidateDocFields can be set by applications to perform additional checks
// on fields in a document being added to a new segment, by default it does
// nothing.
// This API is experimental and may be removed at any time.
var ValidateDocFields = func(field document.Field) error {
return nil
}
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
// SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
if opts.IncludeDocValues() {
s.IncludeDocValues[fieldID] = true
}
err := ValidateDocFields(field)
if err != nil {
return 0, err
}
}
var curr int

View file

@ -15,10 +15,8 @@
package zap
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"reflect"
@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
}
rv.postings = p
rv.includeFreqNorm = includeFreq || includeNorm
rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
rv.includeLocs = includeLocs
if p.normBits1Hit != 0 {
@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
var n uint64
var n, e uint64
if p.normBits1Hit != 0 {
n = 1
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
e = 1
}
} else if p.postings != nil {
n = p.postings.GetCardinality()
}
var e uint64
if p.except != nil {
e = p.except.GetCardinality()
}
if n <= e {
return 0
if p.except != nil {
e = p.postings.AndCardinality(p.except)
}
}
return n - e
}
@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
postings *PostingsList
all roaring.IntIterable
Actual roaring.IntIterable
all roaring.IntPeekable
Actual roaring.IntPeekable
ActualBM *roaring.Bitmap
currChunk uint32
currChunkFreqNorm []byte
currChunkLoc []byte
freqNormReader *bytes.Reader
locReader *bytes.Reader
freqNormReader *segment.MemUvarintReader
locReader *segment.MemUvarintReader
freqChunkOffsets []uint64
freqChunkStart uint64
@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil {
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
} else {
i.freqNormReader.Reset(i.currChunkFreqNorm)
}
@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil {
i.locReader = bytes.NewReader(i.currChunkLoc)
i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
} else {
i.locReader.Reset(i.currChunkLoc)
}
@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
return 1, i.normBits1Hit, false, nil
}
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
}
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
normBits, err := binary.ReadUvarint(i.freqNormReader)
normBits, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
}
return freq, normBits, hasLocs, err
return freq, normBits, hasLocs, nil
}
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
if i.normBits1Hit != 0 {
return false, nil
}
freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return false, fmt.Errorf("error reading freqHasLocs: %v", err)
}
i.freqNormReader.SkipUvarint() // Skip normBits.
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
}
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
}
// readLocation processes all the integers on the stream representing a single
// location. if you care about it, pass in a non-nil location struct, and we
// will fill it. if you don't care about it, pass in nil and we safely consume
// the contents.
// location.
func (i *PostingsIterator) readLocation(l *Location) error {
// read off field
fieldID, err := binary.ReadUvarint(i.locReader)
fieldID, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location field: %v", err)
}
// read off pos
pos, err := binary.ReadUvarint(i.locReader)
pos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location pos: %v", err)
}
// read off start
start, err := binary.ReadUvarint(i.locReader)
start, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location start: %v", err)
}
// read off end
end, err := binary.ReadUvarint(i.locReader)
end, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location end: %v", err)
}
// read off num array pos
numArrayPos, err := binary.ReadUvarint(i.locReader)
numArrayPos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err)
}
// group these together for less branching
if l != nil {
l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos
l.start = start
l.end = end
if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
} else {
l.ap = l.ap[:int(numArrayPos)]
}
l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos
l.start = start
l.end = end
if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
} else {
l.ap = l.ap[:int(numArrayPos)]
}
// read off array positions
for k := 0; k < int(numArrayPos); k++ {
ap, err := binary.ReadUvarint(i.locReader)
ap, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading array position: %v", err)
}
if l != nil {
l.ap[k] = ap
}
l.ap[k] = ap
}
return nil
@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
}
rv.locs = i.nextSegmentLocs[:0]
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
}
@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
if hasLocs {
startLoc := len(i.currChunkLoc) - i.locReader.Len()
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return 0, 0, 0, nil, nil,
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return 0, 0, 0, nil, nil, err
}
i.locReader.SkipBytes(int(numLocsBytes))
endLoc := len(i.currChunkLoc) - i.locReader.Len()
bytesLoc = i.currChunkLoc[startLoc:endLoc]
@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
return i.nextDocNumAtOrAfterClean(atOrAfter)
}
n := i.Actual.Next()
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next()
}
if uint64(n) < atOrAfter {
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
if !i.Actual.HasNext() {
// couldn't find anything
return 0, false, nil
}
n := i.Actual.Next()
allN := i.all.Next()
nChunk := n / i.postings.sb.chunkFactor
@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
// no deletions) where the all bitmap is the same as the actual bitmap
func (i *PostingsIterator) nextDocNumAtOrAfterClean(
atOrAfter uint64) (uint64, bool, error) {
n := i.Actual.Next()
if !i.includeFreqNorm {
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next()
}
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
if uint64(n) < atOrAfter {
if !i.Actual.HasNext() {
return 0, false, nil // couldn't find anything
}
return uint64(n), true, nil
return uint64(i.Actual.Next()), true, nil
}
// freq-norm's needed, so maintain freq-norm chunk reader
sameChunkNexts := 0 // # of times we called Next() in the same chunk
n := i.Actual.Next()
nChunk := n / i.postings.sb.chunkFactor
for uint64(n) < atOrAfter && i.Actual.HasNext() {
@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
}
// read off freq/offsets even though we don't care about them
_, _, hasLocs, err := i.readFreqNormHasLocs()
hasLocs, err := i.skipFreqNormReadHasLocs()
if err != nil {
return err
}
if i.includeLocs && hasLocs {
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location numLocsBytes: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return err
}
i.locReader.SkipBytes(int(numLocsBytes))
}
return nil

View file

@ -20,8 +20,8 @@ import (
"fmt"
"io"
"os"
"reflect"
"sync"
"unsafe"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
}
// Open returns a zap impl of a segment
@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16),
fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
},
f: f,
mm: mm,
@ -101,6 +102,9 @@ type SegmentBase struct {
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
m sync.Mutex
fieldFSTs map[uint16]*vellum.FST
}
func (sb *SegmentBase) Size() int {
@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
dictStart := sb.dictLocs[rv.fieldID]
if dictStart > 0 {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
if fstBytes != nil {
var ok bool
sb.m.Lock()
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
rv.fst, err = vellum.Load(fstBytes)
if err != nil {
sb.m.Unlock()
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
}
rv.fstReader, err = rv.fst.Reader()
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
}
sb.fieldFSTs[rv.fieldID] = rv.fst
}
sb.m.Unlock()
rv.fstReader, err = rv.fst.Reader()
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
}
}
}
@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
}
func (s *SegmentBase) loadDvReaders() error {
if s.docValueOffset == fieldNotUninverted {
if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
return nil
}
@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
}
read += uint64(n)
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
if err != nil {
return err
}
if fieldDvReader != nil {
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, field)

View file

@ -28,13 +28,14 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
lev2 "github.com/couchbase/vellum/levenshtein2"
lev "github.com/couchbase/vellum/levenshtein"
)
// re usable, threadsafe levenshtein builders
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator
index int
@ -51,11 +52,11 @@ func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
}
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
}
@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
}
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
}
}
}(index, segment)
}
@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil {
err = asr.err
} else {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
if !randomLookup {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
})
}
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
dict: asr.dict,
})
}
}
@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
return nil, err
}
// prepare heap
heap.Init(rv)
if !randomLookup {
// prepare heap
heap.Init(rv)
}
return rv, nil
}
@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator()
})
}, false)
}
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm))
})
}, false)
}
func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix))
})
}, false)
}
func (i *IndexSnapshot) FieldDictRegexp(field string,
@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}, false)
}
func (i *IndexSnapshot) getLevAutomaton(term string,
@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}, false)
}
func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount)
})
}, false)
}
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return i.newIndexSnapshotFieldDict(field, nil, true)
}
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {

View file

@ -22,6 +22,7 @@ import (
)
type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator
curr index.DictEntry
}
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error {
return nil
}
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}

View file

@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
}
type cachedDocs struct {
size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {

View file

@ -107,6 +107,9 @@ type Stats struct {
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64

View file

@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return
}
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update
indexStart := time.Now()
var kvwriter store.KVWriter
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
}
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
if len(batch.IndexOps) > 0 {
if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.errors, 1)
}
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
persistedCallback(err)
}
return
}

View file

@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
}
return &rv
}
@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes))
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
// sort all hits with the requested order
if len(req.Sort) > 0 {
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
sorter := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter)
}
@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
sr.Facets.Fixup(name, fr.Size)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
// fix up original request
sr.Request = req
searchDuration := time.Since(searchStart)
@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
return &multiSearchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -19,6 +19,7 @@ import (
"encoding/json"
"fmt"
"os"
"sort"
"sync"
"sync/atomic"
"time"
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed
}
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
var coll *collector.TopNCollector
if req.SearchAfter != nil {
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
} else {
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
}
// open a reader for this search
indexReader, err := i.i.Reader()
@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
facetsBuilder.Add(facetName, facetBuilder)
}
}
collector.SetFacetsBuilder(facetsBuilder)
coll.SetFacetsBuilder(facetsBuilder)
}
memNeeded := memNeededForSearch(req, searcher, collector)
memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded)
@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
err = collector.Collect(ctx, searcher, indexReader)
err = coll.Collect(ctx, searcher, indexReader)
if err != nil {
return nil, err
}
hits := collector.Results()
hits := coll.Results()
var highlighter highlight.Highlighter
@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
for _, hit := range hits {
if len(req.Fields) > 0 || highlighter != nil {
doc, err := indexReader.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case *document.TextField:
value = string(docF.Value())
case *document.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
}
}
}
}
}
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency
}
}
if i.name != "" {
hit.Index = i.name
}
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
if err != nil {
return nil, err
}
}
atomic.AddUint64(&i.stats.searches, 1)
@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
logger.Printf("slow search took %s - %v", searchDuration, req)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
},
Request: req,
Hits: hits,
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Total: coll.Total(),
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: collector.FacetResults(),
Facets: coll.FacetResults(),
}, nil
}
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
indexName string, r index.IndexReader,
highlighter highlight.Highlighter) error {
if len(req.Fields) > 0 || highlighter != nil {
doc, err := r.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case *document.TextField:
value = string(docF.Value())
case *document.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
}
}
}
}
}
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return ErrorIndexReadInconsistency
}
}
return nil
}
// Fields returns the name of all the fields this
// Index has operated on.
func (i *indexImpl) Fields() (fields []string, err error) {
@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string {
}
return ret
}
type searchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
return &searchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *searchHitSorter) Len() int { return len(m.hits) }
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *searchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
// ONLY process TextMarshaler if there is an explicit mapping
// AND all of the fiels are of type text
// OTHERWISE process field without TextMarshaler
if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
if fieldMapping.Type != "text" {
allFieldsText = false
break
}
}
} else {
dm.walkDocument(property, path, indexes, context)
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
}
}
dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}

View file

@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 {
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
}
nChars := ((63 - shift) / 7) + 1
rv := make(PrefixCoded, nChars+1)
size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}
rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars--
sortableBits = int64(uint64(sortableBits) >> 7)
}
return rv, nil
return rv, preallocRest, nil
}
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {

View file

@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
// result score explanations.
// Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed
// SearchAfter supports deep paging by providing a minimum sort key
// SearchBefore supports deep paging by providing a maximum sort key
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
@ -275,6 +277,8 @@ type SearchRequest struct {
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
}
func (r *SearchRequest) Validate() error {
@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
}
}
if r.SearchAfter != nil && r.SearchBefore != nil {
return fmt.Errorf("cannot use search after and search before together")
}
if r.SearchAfter != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search after with from !=0")
}
if len(r.SearchAfter) != len(r.Sort) {
return fmt.Errorf("search after must have same size as sort order")
}
}
if r.SearchBefore != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search before with from !=0")
}
if len(r.SearchBefore) != len(r.Sort) {
return fmt.Errorf("search before must have same size as sort order")
}
}
return r.Facets.Validate()
}
@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order
}
// SetSearchAfter sets the request to skip over hits with a sort
// value less than the provided sort after key
func (r *SearchRequest) SetSearchAfter(after []string) {
r.SearchAfter = after
}
// SetSearchBefore sets the request to skip over hits with a sort
// value greater than the provided sort before key
func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
}
err := json.Unmarshal(input, &temp)
@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err

View file

@ -69,6 +69,7 @@ type TopNCollector struct {
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader
searchAfter *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort)
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing
@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
IndexReader: reader,
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
if d == nil {
return nil
}
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed

View file

@ -41,6 +41,14 @@ type BleveQueryTime struct {
time.Time
}
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
min = numeric.Int64ToFloat64(q.Start.UnixNano())
if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
}
if !q.End.IsZero() {
max = numeric.Int64ToFloat64(q.End.UnixNano())
if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
}
return nil
}
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}

View file

@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
// apply optimization only if both conditions below are satisfied:
// - disjunction searcher has only 1 child searcher
// - parent searcher's min setting is equal to child searcher's min
return ss[0], nil
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)

View file

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}

View file

@ -40,6 +40,7 @@ type TermQueryScorer struct {
idf float64
options search.SearcherOptions
idfExplanation *search.Explanation
includeScore bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options,
queryWeight: 1.0,
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options,
queryWeight: 1.0,
includeScore: options.Score != "none",
}
if options.Explain {
@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
var scoreExplanation *search.Explanation
// need to compute score
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score := tf * termMatch.Norm * s.idf
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
score := tf * termMatch.Norm * s.idf
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
}
if s.includeScore {
rv.Score = score
}
if s.options.Explain {
rv.Expl = scoreExplanation
}
}
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {

View file

@ -17,6 +17,7 @@ package search
import (
"fmt"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return true
}
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
@ -68,6 +87,46 @@ func (l *Location) Size() int {
type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var lastField string
var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
tlm[ftl.Term] = append(tlm[ftl.Term], loc)
locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
},
}
}
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
@ -279,6 +359,7 @@ type SearcherOptions struct {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
Collector Collector
IndexReader index.IndexReader
}
func (sc *SearchContext) Size() int {

View file

@ -45,6 +45,7 @@ type BooleanSearcher struct {
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
done bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
}
}
if rv == nil {
s.done = true
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
}
// Advance the searchers only if the currentID cursor is trailing the lookup ID,
// additionally if the mustNotSearcher has been initialized, ensure that the
// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
// currentID) is trailing the lookup ID as well - for in the case where currentID
// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
// advance the currentID or the currMustNot cursors.
if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
// Advance the searcher only if the cursor is trailing the lookup ID
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
}

View file

@ -22,6 +22,11 @@ import (
"github.com/blevesearch/bleve/search"
)
type filterFunc func(key []byte) bool
var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
preallocBytesLen := 32
preallocBytes := make([]byte, preallocBytesLen)
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(preallocBytes) <= 0 {
preallocBytesLen = preallocBytesLen * 2
preallocBytes = make([]byte, preallocBytesLen)
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
return rv
}
return nil, nil
var fieldDict index.FieldDictContains
var isIndexed filterFunc
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
defer func() {
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
}()
if isIndexed == nil {
isIndexed = func(term []byte) bool {
if indexReader != nil {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
}
return true
}
}
var computeGeoRange func(term uint64, shift uint) // declare for recursion
relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
codedTerm := makePrefixCoded(int64(start), res)
if isIndexed(codedTerm) {
if !within && checkBoundaries {
onBoundary = append(onBoundary, codedTerm)
} else {
notOnBoundary = append(notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
computeGeoRange(start, res-1)
}
}
computeGeoRange = func(term uint64, shift uint) {
if err != nil {
return
}
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
level := (GeoBitsShift1 - shift) >> 1
relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}
computeGeoRange(term, shift)
if err != nil {
return nil, nil, err
}
return onBoundary, notOnBoundary, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,

View file

@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
field, boost, options, false)
if err != nil {
return nil, err
}
@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false)
field, boost, options, checkBoundaries)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil {
_ = leftSearcher.Close()
return nil, err
@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
options, checkBoundaries)
if err != nil {
return nil, err
}

View file

@ -0,0 +1,110 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"math"
)
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(boxSearcher,
buildPolygonFilter(dvReader, field, polygon)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
var inside bool
// check for a direct vertex match
if almostEqual(polygon[0].Lat, lat) &&
almostEqual(polygon[0].Lon, lon) {
return true
}
for i := 1; i < nVertices; i++ {
if almostEqual(polygon[i].Lat, lat) &&
almostEqual(polygon[i].Lon, lon) {
return true
}
if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
inside = !inside
}
}
return inside
}
return false
}
}

View file

@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
var err error
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
@ -125,11 +154,17 @@ type termRange struct {
endTerm []byte
}
func (t *termRange) Enumerate() [][]byte {
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
if filter != nil {
if filter(next) {
rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next)
}
return rv
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
for _, tri := range tr {
trie := tri.Enumerate()
trie := tri.Enumerate(filter)
rv = append(rv, trie...)
}
return rv

View file

@ -38,6 +38,8 @@ type SearchSort interface {
RequiresScoring() bool
RequiresFields() []string
Reverse()
Copy() SearchSort
}
@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
return rv
}
func (so SortOrder) Reverse() {
for _, soi := range so {
soi.Reverse()
}
}
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
return &rv
}
func (s *SortField) Reverse() {
s.Desc = !s.Desc
if s.Missing == SortFieldMissingFirst {
s.Missing = SortFieldMissingLast
} else {
s.Missing = SortFieldMissingFirst
}
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
return &rv
}
func (s *SortDocID) Reverse() {
s.Desc = !s.Desc
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
return &rv
}
func (s *SortScore) Reverse() {
s.Desc = !s.Desc
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
return &rv
}
func (s *SortGeoDistance) Reverse() {
s.Desc = !s.Desc
}
type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) }

View file

@ -1,10 +1,9 @@
language: go
go:
- 1.4
- 1.7
script:
- go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
- go test -v -covermode=count -coverprofile=profile.out

View file

@ -18,7 +18,7 @@ import (
"bytes"
)
// Iterator represents a means of visity key/value pairs in order.
// Iterator represents a means of visiting key/value pairs in order.
type Iterator interface {
// Current() returns the key/value pair currently pointed to.
@ -186,20 +186,29 @@ func (i *FSTIterator) Next() error {
}
func (i *FSTIterator) next(lastOffset int) error {
// remember where we started
// remember where we started with keysStack in this next() call
i.nextStart = append(i.nextStart[:0], i.keysStack...)
nextOffset := lastOffset + 1
allowCompare := false
OUTER:
for true {
curr := i.statesStack[len(i.statesStack)-1]
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
if curr.Final() && i.aut.IsMatch(autCurr) &&
bytes.Compare(i.keysStack, i.nextStart) > 0 {
// in final state greater than start key
return nil
if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare {
// check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
return ErrIteratorDone
}
cmp := bytes.Compare(i.keysStack, i.nextStart)
if cmp > 0 {
// in final state greater than start key
return nil
}
}
numTrans := curr.NumTransitions()
@ -207,8 +216,12 @@ OUTER:
INNER:
for nextOffset < numTrans {
t := curr.TransitionAt(nextOffset)
autNext := i.aut.Accept(autCurr, t)
if !i.aut.CanMatch(autNext) {
// TODO: potential optimization to skip nextOffset
// forwards more directly to something that the
// automaton likes rather than a linear scan?
nextOffset += 1
continue INNER
}
@ -234,30 +247,41 @@ OUTER:
i.valsStack = append(i.valsStack, v)
i.autStatesStack = append(i.autStatesStack, autNext)
// check to see if new keystack might have gone too far
if i.endKeyExclusive != nil &&
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
return ErrIteratorDone
}
nextOffset = 0
allowCompare = true
continue OUTER
}
// no more transitions, so need to backtrack and stack pop
if len(i.statesStack) <= 1 {
// stack len is 1 (root), can't go back further, we're done
break
}
// no transitions, and still room to pop
i.statesStack = i.statesStack[:len(i.statesStack)-1]
i.keysStack = i.keysStack[:len(i.keysStack)-1]
// if the top of the stack represents a linear chain of states
// (i.e., a suffix of nodes linked by single transitions),
// then optimize by popping the suffix in one shot without
// going back all the way to the OUTER loop
var popNum int
for j := len(i.statesStack) - 1; j > 0; j-- {
if i.statesStack[j].NumTransitions() != 1 {
popNum = len(i.statesStack) - 1 - j
break
}
}
if popNum < 1 { // always pop at least 1 entry from the stacks
popNum = 1
}
nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1
nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1
allowCompare = false
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
i.valsStack = i.valsStack[:len(i.valsStack)-1]
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
i.statesStack = i.statesStack[:len(i.statesStack)-popNum]
i.keysStack = i.keysStack[:len(i.keysStack)-popNum]
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum]
i.valsStack = i.valsStack[:len(i.valsStack)-popNum]
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum]
}
return ErrIteratorDone

10
vendor/github.com/couchbase/vellum/go.mod generated vendored Normal file
View file

@ -0,0 +1,10 @@
module github.com/couchbase/vellum
go 1.12
require (
github.com/edsrzf/mmap-go v1.0.0
github.com/spf13/cobra v0.0.5
github.com/willf/bitset v1.1.10
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect
)

39
vendor/github.com/couchbase/vellum/go.sum generated vendored Normal file
View file

@ -0,0 +1,39 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein2
package levenshtein
import (
"fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein2
package levenshtein
import (
"fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein2
package levenshtein
import "fmt"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein2
package levenshtein
import (
"math"

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein2
package levenshtein
import (
"crypto/md5"

View file

@ -75,15 +75,23 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) {
Rune0: [2]rune{r, r},
}
next.Rune = next.Rune0[0:2]
return c.c(&next)
}
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
for _, seq := range c.sequences {
c.compileUtf8Ranges(seq)
// try to find more folded runes
for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) {
next.Rune = append(next.Rune, r1, r1)
}
err = c.c(&next)
if err != nil {
return err
}
} else {
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
if err != nil {
return err
}
for _, seq := range c.sequences {
c.compileUtf8Ranges(seq)
}
}
}
case syntax.OpAnyChar:

12
vendor/github.com/etcd-io/bbolt/bolt_riscv64.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// +build riscv64
package bbolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF
// Are unaligned load/stores broken on this arch?
var brokenUnaligned = true

View file

@ -121,6 +121,7 @@ type DB struct {
AllocSize int
path string
openFile func(string, int, os.FileMode) (*os.File, error)
file *os.File
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.readOnly = true
}
db.openFile = options.OpenFile
if db.openFile == nil {
db.openFile = os.OpenFile
}
// Open data file and separate sync handler for metadata writes.
db.path = path
var err error
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil {
_ = db.close()
return nil, err
}
@ -1054,6 +1060,10 @@ type Options struct {
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
NoSync bool
// OpenFile is used to open files. It defaults to os.OpenFile. This option
// is useful for writing hermetic tests.
OpenFile func(string, int, os.FileMode) (*os.File, error)
}
// DefaultOptions represent the options used if nil options are passed into Open().

View file

@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) {
f.readIDs(a)
}
// noSyncReload reads the freelist from pgids and filters out pending items.
func (f *freelist) noSyncReload(pgids []pgid) {
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
pcache[pendingID] = true
}
}
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range pgids {
if !pcache[id] {
a = append(a, id)
}
}
f.readIDs(a)
}
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
ids := f.getFreePageIDs()

View file

@ -254,17 +254,36 @@ func (tx *Tx) Rollback() error {
if tx.db == nil {
return ErrTxClosed
}
tx.rollback()
tx.nonPhysicalRollback()
return nil
}
// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk.
func (tx *Tx) nonPhysicalRollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
}
tx.close()
}
// rollback needs to reload the free pages from disk in case some system error happens like fsync error.
func (tx *Tx) rollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
if !tx.db.hasSyncedFreelist() {
// Reconstruct free page list by scanning the DB to get the whole free page list.
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
tx.db.freelist.noSyncReload(tx.db.freepages())
} else {
// Read free page list from freelist page.
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
}
}
tx.close()
}
@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error {
// If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader with WriteFlag
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
if err != nil {
return 0, err
}
@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// A reader transaction is maintained during the copy so it is safe to continue
// using the database while a copy is in progress.
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil {
return err
}

View file

@ -1,3 +1,5 @@
The MIT license.
Copyright (c) 2014 the go-unsnap-stream authors.
Permission is hereby granted, free of charge, to any person obtaining a copy of
@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Permission is explicitly granted to relicense this material under new terms of
your choice when integrating this library with another library or project.

View file

@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4].
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo.
For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead.
@ -17,4 +19,4 @@ However, while the c-snappy was useful for checking compatibility, it introduced
[3] https://github.com/kubo/snzip
[4] https://pypi.python.org/pypi/python-snappy
[4] https://pypi.python.org/pypi/python-snappy

View file

@ -7,6 +7,7 @@ import (
"io"
"io/ioutil"
"os"
"strings"
"hash/crc32"
@ -189,7 +190,12 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS
err = nil
}
} else {
panic(err)
// may be an odd already closed... don't panic on that
if strings.Contains(err.Error(), "file already closed") {
err = nil
} else {
panic(err)
}
}
}

View file

@ -5,6 +5,8 @@ import (
"reflect"
)
const resumableDefault = false
var (
// ErrShortBytes is returned when the
// slice being decoded is too short to
@ -26,99 +28,240 @@ type Error interface {
// Resumable returns whether
// or not the error means that
// the stream of data is malformed
// and the information is unrecoverable.
// and the information is unrecoverable.
Resumable() bool
}
// contextError allows msgp Error instances to be enhanced with additional
// context about their origin.
type contextError interface {
Error
// withContext must not modify the error instance - it must clone and
// return a new error with the context added.
withContext(ctx string) error
}
// Cause returns the underlying cause of an error that has been wrapped
// with additional context.
func Cause(e error) error {
out := e
if e, ok := e.(errWrapped); ok && e.cause != nil {
out = e.cause
}
return out
}
// Resumable returns whether or not the error means that the stream of data is
// malformed and the information is unrecoverable.
func Resumable(e error) bool {
if e, ok := e.(Error); ok {
return e.Resumable()
}
return resumableDefault
}
// WrapError wraps an error with additional context that allows the part of the
// serialized type that caused the problem to be identified. Underlying errors
// can be retrieved using Cause()
//
// The input error is not modified - a new error should be returned.
//
// ErrShortBytes is not wrapped with any context due to backward compatibility
// issues with the public API.
//
func WrapError(err error, ctx ...interface{}) error {
switch e := err.(type) {
case errShort:
return e
case contextError:
return e.withContext(ctxString(ctx))
default:
return errWrapped{cause: err, ctx: ctxString(ctx)}
}
}
// ctxString converts the incoming interface{} slice into a single string.
func ctxString(ctx []interface{}) string {
out := ""
for idx, cv := range ctx {
if idx > 0 {
out += "/"
}
out += fmt.Sprintf("%v", cv)
}
return out
}
func addCtx(ctx, add string) string {
if ctx != "" {
return add + "/" + ctx
} else {
return add
}
}
// errWrapped allows arbitrary errors passed to WrapError to be enhanced with
// context and unwrapped with Cause()
type errWrapped struct {
cause error
ctx string
}
func (e errWrapped) Error() string {
if e.ctx != "" {
return fmt.Sprintf("%s at %s", e.cause, e.ctx)
} else {
return e.cause.Error()
}
}
func (e errWrapped) Resumable() bool {
if e, ok := e.cause.(Error); ok {
return e.Resumable()
}
return resumableDefault
}
type errShort struct{}
func (e errShort) Error() string { return "msgp: too few bytes left to read object" }
func (e errShort) Resumable() bool { return false }
type errFatal struct{}
type errFatal struct {
ctx string
}
func (f errFatal) Error() string {
out := "msgp: fatal decoding error (unreachable code)"
if f.ctx != "" {
out += " at " + f.ctx
}
return out
}
func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" }
func (f errFatal) Resumable() bool { return false }
func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f }
// ArrayError is an error returned
// when decoding a fix-sized array
// of the wrong size
type ArrayError struct {
Wanted uint32
Got uint32
ctx string
}
// Error implements the error interface
func (a ArrayError) Error() string {
return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
if a.ctx != "" {
out += " at " + a.ctx
}
return out
}
// Resumable is always 'true' for ArrayErrors
func (a ArrayError) Resumable() bool { return true }
func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a }
// IntOverflow is returned when a call
// would downcast an integer to a type
// with too few bits to hold its value.
type IntOverflow struct {
Value int64 // the value of the integer
FailedBitsize int // the bit size that the int64 could not fit into
ctx string
}
// Error implements the error interface
func (i IntOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
if i.ctx != "" {
str += " at " + i.ctx
}
return str
}
// Resumable is always 'true' for overflows
func (i IntOverflow) Resumable() bool { return true }
func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i }
// UintOverflow is returned when a call
// would downcast an unsigned integer to a type
// with too few bits to hold its value
type UintOverflow struct {
Value uint64 // value of the uint
FailedBitsize int // the bit size that couldn't fit the value
ctx string
}
// Error implements the error interface
func (u UintOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
if u.ctx != "" {
str += " at " + u.ctx
}
return str
}
// Resumable is always 'true' for overflows
func (u UintOverflow) Resumable() bool { return true }
func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u }
// UintBelowZero is returned when a call
// would cast a signed integer below zero
// to an unsigned integer.
type UintBelowZero struct {
Value int64 // value of the incoming int
ctx string
}
// Error implements the error interface
func (u UintBelowZero) Error() string {
return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value)
str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value)
if u.ctx != "" {
str += " at " + u.ctx
}
return str
}
// Resumable is always 'true' for overflows
func (u UintBelowZero) Resumable() bool { return true }
func (u UintBelowZero) withContext(ctx string) error {
u.ctx = ctx
return u
}
// A TypeError is returned when a particular
// decoding method is unsuitable for decoding
// a particular MessagePack value.
type TypeError struct {
Method Type // Type expected by method
Encoded Type // Type actually encoded
ctx string
}
// Error implements the error interface
func (t TypeError) Error() string {
return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
if t.ctx != "" {
out += " at " + t.ctx
}
return out
}
// Resumable returns 'true' for TypeErrors
func (t TypeError) Resumable() bool { return true }
func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t }
// returns either InvalidPrefixError or
// TypeError depending on whether or not
// the prefix is recognized
@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false }
// to a function that takes `interface{}`.
type ErrUnsupportedType struct {
T reflect.Type
ctx string
}
// Error implements error
func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) }
func (e *ErrUnsupportedType) Error() string {
out := fmt.Sprintf("msgp: type %q not supported", e.T)
if e.ctx != "" {
out += " at " + e.ctx
}
return out
}
// Resumable returns 'true' for ErrUnsupportedType
func (e *ErrUnsupportedType) Resumable() bool { return true }
func (e *ErrUnsupportedType) withContext(ctx string) error {
o := *e
o.ctx = addCtx(o.ctx, ctx)
return &o
}

View file

@ -685,7 +685,7 @@ func (mw *Writer) WriteIntf(v interface{}) error {
case reflect.Map:
return mw.writeMap(val)
}
return &ErrUnsupportedType{val.Type()}
return &ErrUnsupportedType{T: val.Type()}
}
func (mw *Writer) writeMap(v reflect.Value) (err error) {

Some files were not shown because too many files have changed in this diff Show more