Browse Source

MB-33455: improve ComputeGeoRange() performance

This change improves ComputeGeoRange() performance by avoiding trips
to the allocator by using pre-allocated []byte slices.  It also avoids
creation of interim [][]byte slices by append()'ing directly to
returned output slices.

Parts of this change...

- before this change, ComputeGeoRange() would invoke relateAndRecurse(),
  which would recursively invoke ComputeGeoRange().  Five of the
  recursive call parameters... sminLon, sminLat, smaxLon, smaxLat,
  and checkBoundaries... would be passed down on each invocation.

  - in this change, ComputeGeoRange() was refactored to have two, internal
    helper closures: relateAndRecurse() and computeGeoRange(), so that
    those five parameters would be part of the closure and would no
    longer need to be passed on each invocation.

- NewPrefixCodedInt64Prealloc() API was added to the numeric package
  to allow the caller to optionally provide a pre-allocated []byte slice.

  - relatedAndRecurse() now uses this NewPrefixCodedInt64Prealloc() API,
    via an internal helper closure function, makePrefixCoded().

  - makePrefixCoded() manages a preallocated []byte slice, where it
    allocates another []byte slice, as needed, which will be 2x the
    size of its last preallocated slice.

- the previous relateAndRecurse() would return [][]byte slices
  of the terms that were on-the-boundary and not-on-the-boundary.
  The caller, ComputeGeoRange(), would then append() those slices to
  its own on-boundary/not-on-boundary slices, and then return those
  onwards to its caller.  All these interim slices, then, became garbage.

  - In this commit, since relateAndRecurse() and computeGeoRange() are
    now closures, they append() directly onto the final output
    slices of the top-level ComputeGeoRange(), reducing garbage
    and trips to the allocator.

Before the change...

  BenchmarkComputeGeoRangePt01-8 100000     18516 ns/op      2005 B/op     63 allocs/op
  BenchmarkComputeGeoRangePt1-8   10000    108882 ns/op     96267 B/op    736 allocs/op
  BenchmarkComputeGeoRange10-8       50  35883331 ns/op  35812513 B/op 184543 allocs/op
  BenchmarkComputeGeoRange100-8      10 192568538 ns/op 187524856 B/op 926510 allocs/op

After the change...

  BenchmarkComputeGeoRangePt01-8 100000     12447 ns/op       280 B/op      7 allocs/op
  BenchmarkComputeGeoRangePt1-8   30000     47053 ns/op     16416 B/op     28 allocs/op
  BenchmarkComputeGeoRange10-8      100  11836988 ns/op   8503406 B/op     76 allocs/op
  BenchmarkComputeGeoRange100-8      20  72555603 ns/op  42778777 B/op     89 allocs/op

See also: https://issues.couchbase.com/browse/MB-33455
tags/v0.8.0
Steve Yen 7 months ago
parent
commit
7740f389de

+ 18
- 3
numeric/prefix_coded.go View File

@@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte

func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}

func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 {
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
}

nChars := ((63 - shift) / 7) + 1
rv := make(PrefixCoded, nChars+1)

size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}

rv[0] = ShiftStartInt64 + byte(shift)

sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars--
sortableBits = int64(uint64(sortableBits) >> 7)
}
return rv, nil

return rv, preallocRest, nil
}

func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {

+ 65
- 49
search/searcher/search_geoboundingbox.go View File

@@ -22,6 +22,9 @@ import (
"github.com/blevesearch/bleve/search"
)

var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1

func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
@@ -36,7 +39,7 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
}

// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries)

var onBoundarySearcher search.Searcher
@@ -94,59 +97,72 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2

func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
preallocBytesLen := 32
preallocBytes := make([]byte, preallocBytesLen)

func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)

level := ((geo.GeoBits << 1) - res) >> 1

within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(preallocBytes) <= 0 {
preallocBytesLen = preallocBytesLen * 2
preallocBytes = make([]byte, preallocBytesLen)
}

var err error
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
if err != nil {
panic(err)
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
return rv
}

var computeGeoRange func(term uint64, shift uint) // declare for recursion

relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)

within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
onBoundary = append(onBoundary, makePrefixCoded(int64(start), res))
} else {
notOnBoundary = append(notOnBoundary, makePrefixCoded(int64(start), res))
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
computeGeoRange(start, res-1)
}
}
return nil, nil

computeGeoRange = func(term uint64, shift uint) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}

lowerMax := split - 1

level := (GeoBitsShift1 - shift) >> 1

relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}

computeGeoRange(term, shift)

return onBoundary, notOnBoundary
}

func buildRectFilter(dvReader index.DocValueReader, field string,

+ 43
- 0
search/searcher/search_geoboundingbox_test.go View File

@@ -198,3 +198,46 @@ func setupGeo(t *testing.T) index.Index {

return i
}

// --------------------------------------------------------------------

func BenchmarkComputeGeoRangePt01(b *testing.B) {
onBoundary := 4
offBoundary := 0
benchmarkComputeGeoRange(b, -0.01, -0.01, 0.01, 0.01, onBoundary, offBoundary)
}

func BenchmarkComputeGeoRangePt1(b *testing.B) {
onBoundary := 56
offBoundary := 144
benchmarkComputeGeoRange(b, -0.1, -0.1, 0.1, 0.1, onBoundary, offBoundary)
}

func BenchmarkComputeGeoRange10(b *testing.B) {
onBoundary := 5464
offBoundary := 53704
benchmarkComputeGeoRange(b, -10.0, -10.0, 10.0, 10.0, onBoundary, offBoundary)
}

func BenchmarkComputeGeoRange100(b *testing.B) {
onBoundary := 32768
offBoundary := 258560
benchmarkComputeGeoRange(b, -100.0, -100.0, 100.0, 100.0, onBoundary, offBoundary)
}

// --------------------------------------------------------------------

func benchmarkComputeGeoRange(b *testing.B,
minLon, minLat, maxLon, maxLat float64, onBoundary, offBoundary int) {
checkBoundaries := true

b.ResetTimer()

for i := 0; i < b.N; i++ {
onBoundaryRes, offBoundaryRes :=
ComputeGeoRange(0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries)
if len(onBoundaryRes) != onBoundary || len(offBoundaryRes) != offBoundary {
b.Fatalf("boundaries not matching")
}
}
}

Loading…
Cancel
Save