Skip to content

Commit

Permalink
Merge pull request #417 from RoaringBitmap/faster-faster-iandnot
Browse files Browse the repository at this point in the history
Faster faster iandnot
  • Loading branch information
lemire authored Mar 22, 2024
2 parents a550de6 + 34fc2bf commit c99a062
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 4 deletions.
52 changes: 48 additions & 4 deletions arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -664,10 +664,54 @@ func (ac *arrayContainer) iandNot(a container) container {
}

func (ac *arrayContainer) iandNotRun16(rc *runContainer16) container {
rcb := rc.toBitmapContainer()
acb := ac.toBitmapContainer()
acb.iandNotBitmapSurely(rcb)
*ac = *(acb.toArrayContainer())
// Fast path: if either the array container or the run container is empty, the result is the array.
if ac.isEmpty() || rc.isEmpty() {
// Empty
return ac
}
// Fast path: if the run container is full, the result is empty.
if rc.isFull() {
ac.content = ac.content[:0]
return ac
}
current_run := 0
// All values in [start_run, end_end] are part of the run
start_run := rc.iv[current_run].start
end_end := start_run + rc.iv[current_run].length
// We are going to read values in the array at index i, and we are
// going to write them at index pos. So we do in-place processing.
// We always have that pos <= i by construction. So we can either
// overwrite a value just read, or a value that was previous read.
pos := 0
i := 0
for ; i < len(ac.content); i++ {
if ac.content[i] < start_run {
// the value in the array appears before the run [start_run, end_end]
ac.content[pos] = ac.content[i]
pos++
} else if ac.content[i] <= end_end {
// nothing to do, the value is in the array but also in the run.
} else {
// We have the value in the array after the run. We cannot tell
// whether we need to keep it or not. So let us move to another run.
if current_run+1 < len(rc.iv) {
current_run++
start_run = rc.iv[current_run].start
end_end = start_run + rc.iv[current_run].length
i-- // retry with the same i
} else {
// We have exhausted the number of runs. We can keep the rest of the values
// from i to len(ac.content) - 1 inclusively.
break // We are done, the rest of the array will be kept
}
}
}
for ; i < len(ac.content); i++ {
ac.content[pos] = ac.content[i]
pos++
}
// We 'shink' the slice.
ac.content = ac.content[:pos]
return ac
}

Expand Down
22 changes: 22 additions & 0 deletions arraycontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestArrayContainerTransition(t *testing.T) {
Expand Down Expand Up @@ -332,6 +333,27 @@ func TestArrayContainerEtc070(t *testing.T) {
assert.Equal(t, 1, ac10.numberOfRuns())
}

func TestArrayContainerIAndNot(t *testing.T) {
var ac container
ac = newArrayContainer()
ac.iadd(12)
ac.iadd(27)
ac.iadd(32)
ac.iadd(88)
ac.iadd(188)
ac.iadd(289)

var rc container
rc = newRunContainer16Range(0, 15)
rc = rc.iaddRange(1500, 2000)
rc = rc.iaddRange(55, 100)
rc = rc.iaddRange(25, 50)
ac = ac.iandNot(rc)

require.ElementsMatch(t, []uint16{188, 289}, ac.(*arrayContainer).content)
require.Equal(t, 2, ac.getCardinality())
}

func TestArrayContainerIand(t *testing.T) {
a := NewBitmap()
a.AddRange(0, 200000)
Expand Down
96 changes: 96 additions & 0 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package roaring
import (
"bytes"
"fmt"
"math"
"math/rand"
"testing"

Expand Down Expand Up @@ -1132,3 +1133,98 @@ func BenchmarkAndAny(b *testing.B) {
runSet("small-filters", genOne(r, largeSize, domain), genMulti(r, filtersNum, smallSize, domain))
runSet("equal", genOne(r, defaultSize, domain), genMulti(r, filtersNum, defaultSize, domain))
}

func BenchmarkAndNot(b *testing.B) {
type generator struct {
name string
f func() *Bitmap
}
makeRunContainer := generator{
name: "run",
f: func() *Bitmap {
rb := NewBitmap()
for i := 0; i < 100; i++ {
start := rand.Intn(math.MaxUint16)
limit := start + rand.Intn(math.MaxUint16-start)
rb.AddRange(uint64(start), uint64(limit))
}
rb.RunOptimize()
return rb
},
}

makeArrayContainer := generator{
name: "array",
f: func() *Bitmap {
rb := NewBitmap()
for i := 0; i < arrayDefaultMaxSize/2; i++ {
rb.Add(uint32(rand.Intn(math.MaxUint16)))
}
return rb
},
}
makeBitmapContainer := generator{
name: "bitmap",
f: func() *Bitmap {
buf := make([]uint64, 1024)
for i := range buf {
buf[i] = rand.Uint64()
}

return FromDense(buf, false)
},
}

for _, inPlace := range []bool{true, false} {
for _, leftGen := range []generator{makeRunContainer, makeArrayContainer, makeBitmapContainer} {
for _, rightGen := range []generator{makeRunContainer, makeArrayContainer, makeBitmapContainer} {
b.Run(fmt.Sprintf("inPlace=%v/left=%s/right=%s", inPlace, leftGen.name, rightGen.name), func(b *testing.B) {
b.StopTimer()
serializedLefts := make([][]byte, 1000)
for i := range serializedLefts {
var err error
serializedLefts[i], err = leftGen.f().ToBytes()
if err != nil {
b.Fatal(err)
}
}
serializedRights := make([][]byte, 1000)
for i := range serializedRights {
var err error
serializedRights[i], err = rightGen.f().ToBytes()
if err != nil {
b.Fatal(err)
}
}

lefts := make([]*Bitmap, b.N)
for i := range lefts {
buf := serializedLefts[i%len(serializedLefts)]
lefts[i] = NewBitmap()
if _, err := lefts[i].FromBuffer(buf); err != nil {
b.Fatal(err)
}
lefts[i] = lefts[i].Clone()
}
rights := make([]*Bitmap, b.N)
for i := range rights {
buf := serializedRights[i%len(serializedRights)]
rights[i] = NewBitmap()
if _, err := rights[i].FromBuffer(buf); err != nil {
b.Fatal(err)
}
rights[i] = rights[i].Clone()
}
b.StartTimer()
for i := 0; i < b.N; i++ {
if inPlace {
lefts[i].AndNot(rights[i])
} else {
_ = AndNot(lefts[i], rights[i])
}
}
})
}
}
}
}
1 change: 1 addition & 0 deletions runcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
// runContainer16 does run-length encoding of sets of
// uint16 integers.
type runContainer16 struct {
// iv is a slice of sorted, non-overlapping, non-adjacent intervals.
iv []interval16
}

Expand Down

0 comments on commit c99a062

Please sign in to comment.