Платформа ЦРНП "Мирокод" для разработки проектов
https://git.mirocod.ru
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
695 lines
17 KiB
695 lines
17 KiB
package roaring |
|
|
|
/////////////////////////////////////////////////// |
|
// |
|
// container interface methods for runContainer16 |
|
// |
|
/////////////////////////////////////////////////// |
|
|
|
import ( |
|
"fmt" |
|
) |
|
|
|
// compile time verify we meet interface requirements |
|
var _ container = &runContainer16{} |
|
|
|
func (rc *runContainer16) clone() container { |
|
return newRunContainer16CopyIv(rc.iv) |
|
} |
|
|
|
func (rc *runContainer16) minimum() uint16 { |
|
return rc.iv[0].start // assume not empty |
|
} |
|
|
|
func (rc *runContainer16) maximum() uint16 { |
|
return rc.iv[len(rc.iv)-1].last() // assume not empty |
|
} |
|
|
|
func (rc *runContainer16) isFull() bool { |
|
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16)) |
|
} |
|
|
|
func (rc *runContainer16) and(a container) container { |
|
if rc.isFull() { |
|
return a.clone() |
|
} |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return rc.intersect(c) |
|
case *arrayContainer: |
|
return rc.andArray(c) |
|
case *bitmapContainer: |
|
return rc.andBitmapContainer(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) andCardinality(a container) int { |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return int(rc.intersectCardinality(c)) |
|
case *arrayContainer: |
|
return rc.andArrayCardinality(c) |
|
case *bitmapContainer: |
|
return rc.andBitmapContainerCardinality(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
// andBitmapContainer finds the intersection of rc and b. |
|
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container { |
|
bc2 := newBitmapContainerFromRun(rc) |
|
return bc2.andBitmap(bc) |
|
} |
|
|
|
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int { |
|
pos := 0 |
|
answer := 0 |
|
maxpos := ac.getCardinality() |
|
if maxpos == 0 { |
|
return 0 // won't happen in actual code |
|
} |
|
v := ac.content[pos] |
|
mainloop: |
|
for _, p := range rc.iv { |
|
for v < p.start { |
|
pos++ |
|
if pos == maxpos { |
|
break mainloop |
|
} |
|
v = ac.content[pos] |
|
} |
|
for v <= p.last() { |
|
answer++ |
|
pos++ |
|
if pos == maxpos { |
|
break mainloop |
|
} |
|
v = ac.content[pos] |
|
} |
|
} |
|
return answer |
|
} |
|
|
|
func (rc *runContainer16) iand(a container) container { |
|
if rc.isFull() { |
|
return a.clone() |
|
} |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return rc.inplaceIntersect(c) |
|
case *arrayContainer: |
|
return rc.andArray(c) |
|
case *bitmapContainer: |
|
return rc.iandBitmapContainer(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container { |
|
// TODO: optimize by doing less allocation, possibly? |
|
|
|
// sect will be new |
|
sect := rc.intersect(rc2) |
|
*rc = *sect |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container { |
|
isect := rc.andBitmapContainer(bc) |
|
*rc = *newRunContainer16FromContainer(isect) |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) andArray(ac *arrayContainer) container { |
|
if len(rc.iv) == 0 { |
|
return newArrayContainer() |
|
} |
|
|
|
acCardinality := ac.getCardinality() |
|
c := newArrayContainerCapacity(acCardinality) |
|
|
|
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; { |
|
iv := rc.iv[rlePos] |
|
arrayVal := ac.content[arrayPos] |
|
|
|
for iv.last() < arrayVal { |
|
rlePos++ |
|
if rlePos == len(rc.iv) { |
|
return c |
|
} |
|
iv = rc.iv[rlePos] |
|
} |
|
|
|
if iv.start > arrayVal { |
|
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start) |
|
} else { |
|
c.content = append(c.content, arrayVal) |
|
arrayPos++ |
|
} |
|
} |
|
return c |
|
} |
|
|
|
func (rc *runContainer16) andNot(a container) container { |
|
switch c := a.(type) { |
|
case *arrayContainer: |
|
return rc.andNotArray(c) |
|
case *bitmapContainer: |
|
return rc.andNotBitmap(c) |
|
case *runContainer16: |
|
return rc.andNotRunContainer16(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) { |
|
k := 0 |
|
var val int64 |
|
for _, p := range rc.iv { |
|
n := p.runlen() |
|
for j := int64(0); j < n; j++ { |
|
val = int64(p.start) + j |
|
x[k+i] = uint32(val) | mask |
|
k++ |
|
} |
|
} |
|
} |
|
|
|
func (rc *runContainer16) getShortIterator() shortIterable { |
|
return rc.newRunIterator16() |
|
} |
|
|
|
func (rc *runContainer16) getManyIterator() manyIterable { |
|
return rc.newManyRunIterator16() |
|
} |
|
|
|
// add the values in the range [firstOfRange, endx). endx |
|
// is still abe to express 2^16 because it is an int not an uint16. |
|
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container { |
|
|
|
if firstOfRange >= endx { |
|
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx)) |
|
} |
|
addme := newRunContainer16TakeOwnership([]interval16{ |
|
{ |
|
start: uint16(firstOfRange), |
|
length: uint16(endx - 1 - firstOfRange), |
|
}, |
|
}) |
|
*rc = *rc.union(addme) |
|
return rc |
|
} |
|
|
|
// remove the values in the range [firstOfRange,endx) |
|
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container { |
|
if firstOfRange >= endx { |
|
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+ |
|
" nothing to do.", firstOfRange, endx)) |
|
//return rc |
|
} |
|
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1)) |
|
rc.isubtract(x) |
|
return rc |
|
} |
|
|
|
// not flip the values in the range [firstOfRange,endx) |
|
func (rc *runContainer16) not(firstOfRange, endx int) container { |
|
if firstOfRange >= endx { |
|
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) |
|
} |
|
|
|
return rc.Not(firstOfRange, endx) |
|
} |
|
|
|
// Not flips the values in the range [firstOfRange,endx). |
|
// This is not inplace. Only the returned value has the flipped bits. |
|
// |
|
// Currently implemented as (!A intersect B) union (A minus B), |
|
// where A is rc, and B is the supplied [firstOfRange, endx) interval. |
|
// |
|
// TODO(time optimization): convert this to a single pass |
|
// algorithm by copying AndNotRunContainer16() and modifying it. |
|
// Current routine is correct but |
|
// makes 2 more passes through the arrays than should be |
|
// strictly necessary. Measure both ways though--this may not matter. |
|
// |
|
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 { |
|
|
|
if firstOfRange >= endx { |
|
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange)) |
|
} |
|
|
|
if firstOfRange >= endx { |
|
return rc.Clone() |
|
} |
|
|
|
a := rc |
|
// algo: |
|
// (!A intersect B) union (A minus B) |
|
|
|
nota := a.invert() |
|
|
|
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))} |
|
b := newRunContainer16TakeOwnership(bs) |
|
|
|
notAintersectB := nota.intersect(b) |
|
|
|
aMinusB := a.AndNotRunContainer16(b) |
|
|
|
rc2 := notAintersectB.union(aMinusB) |
|
return rc2 |
|
} |
|
|
|
// equals is now logical equals; it does not require the |
|
// same underlying container type. |
|
func (rc *runContainer16) equals(o container) bool { |
|
srb, ok := o.(*runContainer16) |
|
|
|
if !ok { |
|
// maybe value instead of pointer |
|
val, valok := o.(*runContainer16) |
|
if valok { |
|
srb = val |
|
ok = true |
|
} |
|
} |
|
if ok { |
|
// Check if the containers are the same object. |
|
if rc == srb { |
|
return true |
|
} |
|
|
|
if len(srb.iv) != len(rc.iv) { |
|
return false |
|
} |
|
|
|
for i, v := range rc.iv { |
|
if v != srb.iv[i] { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// use generic comparison |
|
if o.getCardinality() != rc.getCardinality() { |
|
return false |
|
} |
|
rit := rc.getShortIterator() |
|
bit := o.getShortIterator() |
|
|
|
//k := 0 |
|
for rit.hasNext() { |
|
if bit.next() != rit.next() { |
|
return false |
|
} |
|
//k++ |
|
} |
|
return true |
|
} |
|
|
|
func (rc *runContainer16) iaddReturnMinimized(x uint16) container { |
|
rc.Add(x) |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iadd(x uint16) (wasNew bool) { |
|
return rc.Add(x) |
|
} |
|
|
|
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container { |
|
rc.removeKey(x) |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iremove(x uint16) bool { |
|
return rc.removeKey(x) |
|
} |
|
|
|
func (rc *runContainer16) or(a container) container { |
|
if rc.isFull() { |
|
return rc.clone() |
|
} |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return rc.union(c) |
|
case *arrayContainer: |
|
return rc.orArray(c) |
|
case *bitmapContainer: |
|
return rc.orBitmapContainer(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) orCardinality(a container) int { |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return int(rc.unionCardinality(c)) |
|
case *arrayContainer: |
|
return rc.orArrayCardinality(c) |
|
case *bitmapContainer: |
|
return rc.orBitmapContainerCardinality(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
// orBitmapContainer finds the union of rc and bc. |
|
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container { |
|
bc2 := newBitmapContainerFromRun(rc) |
|
return bc2.iorBitmap(bc) |
|
} |
|
|
|
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int { |
|
answer := 0 |
|
for i := range rc.iv { |
|
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1) |
|
} |
|
//bc.computeCardinality() |
|
return answer |
|
} |
|
|
|
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int { |
|
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc) |
|
} |
|
|
|
// orArray finds the union of rc and ac. |
|
func (rc *runContainer16) orArray(ac *arrayContainer) container { |
|
bc1 := newBitmapContainerFromRun(rc) |
|
bc2 := ac.toBitmapContainer() |
|
return bc1.orBitmap(bc2) |
|
} |
|
|
|
// orArray finds the union of rc and ac. |
|
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int { |
|
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac) |
|
} |
|
|
|
func (rc *runContainer16) ior(a container) container { |
|
if rc.isFull() { |
|
return rc |
|
} |
|
switch c := a.(type) { |
|
case *runContainer16: |
|
return rc.inplaceUnion(c) |
|
case *arrayContainer: |
|
return rc.iorArray(c) |
|
case *bitmapContainer: |
|
return rc.iorBitmapContainer(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container { |
|
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv)) |
|
for _, p := range rc2.iv { |
|
last := int64(p.last()) |
|
for i := int64(p.start); i <= last; i++ { |
|
rc.Add(uint16(i)) |
|
} |
|
} |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container { |
|
|
|
it := bc.getShortIterator() |
|
for it.hasNext() { |
|
rc.Add(it.next()) |
|
} |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iorArray(ac *arrayContainer) container { |
|
it := ac.getShortIterator() |
|
for it.hasNext() { |
|
rc.Add(it.next()) |
|
} |
|
return rc |
|
} |
|
|
|
// lazyIOR is described (not yet implemented) in |
|
// this nice note from @lemire on |
|
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737 |
|
// |
|
// Description of lazyOR and lazyIOR from @lemire: |
|
// |
|
// Lazy functions are optional and can be simply |
|
// wrapper around non-lazy functions. |
|
// |
|
// The idea of "laziness" is as follows. It is |
|
// inspired by the concept of lazy evaluation |
|
// you might be familiar with (functional programming |
|
// and all that). So a roaring bitmap is |
|
// such that all its containers are, in some |
|
// sense, chosen to use as little memory as |
|
// possible. This is nice. Also, all bitsets |
|
// are "cardinality aware" so that you can do |
|
// fast rank/select queries, or query the |
|
// cardinality of the whole bitmap... very fast, |
|
// without latency. |
|
// |
|
// However, imagine that you are aggregating 100 |
|
// bitmaps together. So you OR the first two, then OR |
|
// that with the third one and so forth. Clearly, |
|
// intermediate bitmaps don't need to be as |
|
// compressed as possible, right? They can be |
|
// in a "dirty state". You only need the end |
|
// result to be in a nice state... which you |
|
// can achieve by calling repairAfterLazy at the end. |
|
// |
|
// The Java/C code does something special for |
|
// the in-place lazy OR runs. The idea is that |
|
// instead of taking two run containers and |
|
// generating a new one, we actually try to |
|
// do the computation in-place through a |
|
// technique invented by @gssiyankai (pinging him!). |
|
// What you do is you check whether the host |
|
// run container has lots of extra capacity. |
|
// If it does, you move its data at the end of |
|
// the backing array, and then you write |
|
// the answer at the beginning. What this |
|
// trick does is minimize memory allocations. |
|
// |
|
func (rc *runContainer16) lazyIOR(a container) container { |
|
// not lazy at the moment |
|
// TODO: make it lazy |
|
return rc.ior(a) |
|
|
|
/* |
|
switch c := a.(type) { |
|
case *arrayContainer: |
|
return rc.lazyIorArray(c) |
|
case *bitmapContainer: |
|
return rc.lazyIorBitmap(c) |
|
case *runContainer16: |
|
return rc.lazyIorRun16(c) |
|
} |
|
panic("unsupported container type") |
|
*/ |
|
} |
|
|
|
// lazyOR is described above in lazyIOR. |
|
func (rc *runContainer16) lazyOR(a container) container { |
|
|
|
// not lazy at the moment |
|
// TODO: make it lazy |
|
return rc.or(a) |
|
|
|
/* |
|
switch c := a.(type) { |
|
case *arrayContainer: |
|
return rc.lazyOrArray(c) |
|
case *bitmapContainer: |
|
return rc.lazyOrBitmap(c) |
|
case *runContainer16: |
|
return rc.lazyOrRunContainer16(c) |
|
} |
|
panic("unsupported container type") |
|
*/ |
|
} |
|
|
|
func (rc *runContainer16) intersects(a container) bool { |
|
// TODO: optimize by doing inplace/less allocation, possibly? |
|
isect := rc.and(a) |
|
return isect.getCardinality() > 0 |
|
} |
|
|
|
func (rc *runContainer16) xor(a container) container { |
|
switch c := a.(type) { |
|
case *arrayContainer: |
|
return rc.xorArray(c) |
|
case *bitmapContainer: |
|
return rc.xorBitmap(c) |
|
case *runContainer16: |
|
return rc.xorRunContainer16(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
func (rc *runContainer16) iandNot(a container) container { |
|
switch c := a.(type) { |
|
case *arrayContainer: |
|
return rc.iandNotArray(c) |
|
case *bitmapContainer: |
|
return rc.iandNotBitmap(c) |
|
case *runContainer16: |
|
return rc.iandNotRunContainer16(c) |
|
} |
|
panic("unsupported container type") |
|
} |
|
|
|
// flip the values in the range [firstOfRange,endx) |
|
func (rc *runContainer16) inot(firstOfRange, endx int) container { |
|
if firstOfRange >= endx { |
|
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange)) |
|
} |
|
// TODO: minimize copies, do it all inplace; not() makes a copy. |
|
rc = rc.Not(firstOfRange, endx) |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) getCardinality() int { |
|
return int(rc.cardinality()) |
|
} |
|
|
|
func (rc *runContainer16) rank(x uint16) int { |
|
n := int64(len(rc.iv)) |
|
xx := int64(x) |
|
w, already, _ := rc.search(xx, nil) |
|
if w < 0 { |
|
return 0 |
|
} |
|
if !already && w == n-1 { |
|
return rc.getCardinality() |
|
} |
|
var rnk int64 |
|
if !already { |
|
for i := int64(0); i <= w; i++ { |
|
rnk += rc.iv[i].runlen() |
|
} |
|
return int(rnk) |
|
} |
|
for i := int64(0); i < w; i++ { |
|
rnk += rc.iv[i].runlen() |
|
} |
|
rnk += int64(x-rc.iv[w].start) + 1 |
|
return int(rnk) |
|
} |
|
|
|
func (rc *runContainer16) selectInt(x uint16) int { |
|
return rc.selectInt16(x) |
|
} |
|
|
|
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container { |
|
return rc.AndNotRunContainer16(b) |
|
} |
|
|
|
func (rc *runContainer16) andNotArray(ac *arrayContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
acb := ac.toBitmapContainer() |
|
return rcb.andNotBitmap(acb) |
|
} |
|
|
|
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
return rcb.andNotBitmap(bc) |
|
} |
|
|
|
func (rc *runContainer16) toBitmapContainer() *bitmapContainer { |
|
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv)) |
|
bc := newBitmapContainer() |
|
for i := range rc.iv { |
|
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) |
|
} |
|
bc.computeCardinality() |
|
return bc |
|
} |
|
|
|
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container { |
|
rcb := rc.toBitmapContainer() |
|
x2b := x2.toBitmapContainer() |
|
rcb.iandNotBitmapSurely(x2b) |
|
// TODO: check size and optimize the return value |
|
// TODO: is inplace modification really required? If not, elide the copy. |
|
rc2 := newRunContainer16FromBitmapContainer(rcb) |
|
*rc = *rc2 |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
acb := ac.toBitmapContainer() |
|
rcb.iandNotBitmapSurely(acb) |
|
// TODO: check size and optimize the return value |
|
// TODO: is inplace modification really required? If not, elide the copy. |
|
rc2 := newRunContainer16FromBitmapContainer(rcb) |
|
*rc = *rc2 |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
rcb.iandNotBitmapSurely(bc) |
|
// TODO: check size and optimize the return value |
|
// TODO: is inplace modification really required? If not, elide the copy. |
|
rc2 := newRunContainer16FromBitmapContainer(rcb) |
|
*rc = *rc2 |
|
return rc |
|
} |
|
|
|
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container { |
|
rcb := rc.toBitmapContainer() |
|
x2b := x2.toBitmapContainer() |
|
return rcb.xorBitmap(x2b) |
|
} |
|
|
|
func (rc *runContainer16) xorArray(ac *arrayContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
acb := ac.toBitmapContainer() |
|
return rcb.xorBitmap(acb) |
|
} |
|
|
|
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container { |
|
rcb := rc.toBitmapContainer() |
|
return rcb.xorBitmap(bc) |
|
} |
|
|
|
// convert to bitmap or array *if needed* |
|
func (rc *runContainer16) toEfficientContainer() container { |
|
|
|
// runContainer16SerializedSizeInBytes(numRuns) |
|
sizeAsRunContainer := rc.getSizeInBytes() |
|
sizeAsBitmapContainer := bitmapContainerSizeInBytes() |
|
card := int(rc.cardinality()) |
|
sizeAsArrayContainer := arrayContainerSizeInBytes(card) |
|
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) { |
|
return rc |
|
} |
|
if card <= arrayDefaultMaxSize { |
|
return rc.toArrayContainer() |
|
} |
|
bc := newBitmapContainerFromRun(rc) |
|
return bc |
|
} |
|
|
|
func (rc *runContainer16) toArrayContainer() *arrayContainer { |
|
ac := newArrayContainer() |
|
for i := range rc.iv { |
|
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1) |
|
} |
|
return ac |
|
} |
|
|
|
func newRunContainer16FromContainer(c container) *runContainer16 { |
|
|
|
switch x := c.(type) { |
|
case *runContainer16: |
|
return x.Clone() |
|
case *arrayContainer: |
|
return newRunContainer16FromArray(x) |
|
case *bitmapContainer: |
|
return newRunContainer16FromBitmapContainer(x) |
|
} |
|
panic("unsupported container type") |
|
}
|
|
|