Browse Source

index API & scorch uses index.Regexp instead string'ified regexp

In this optimization and bleve "non-porcelain" index API change, the
index.IndexReaderRegexp API is changed to accept an index.Regexp
instance instead of the string representation of a regexp.

This allows scorch to leverage the LiteralPrefix() information of the
regexp instance (which is not implemented by the vellum.regexp API),
so that the FST dictionary searches can be more selective by
invoking...

   d.fst.Search(r, prefixBeg, prefixEnd)

instead of the previous...

   d.fst.Search(r, nil, nil)

See also: https://issues.couchbase.com/browse/MB-30264
tags/v0.8.0
Steve Yen 1 year ago
parent
commit
96657413a7
8 changed files with 22 additions and 11 deletions
  1. +1
    -1
      index/index.go
  2. +1
    -1
      index/scorch/segment/empty.go
  3. +1
    -1
      index/scorch/segment/segment.go
  4. +14
    -3
      index/scorch/segment/zap/dict.go
  5. +2
    -2
      index/scorch/snapshot_index.go
  6. +1
    -1
      index/scorch/snapshot_segment.go
  7. +1
    -1
      index_meta.go
  8. +1
    -1
      search/searcher/search_regexp.go

+ 1
- 1
index/index.go View File

@@ -110,7 +110,7 @@ type Regexp interface {
}

type IndexReaderRegexp interface {
FieldDictRegexp(field string, regex []byte) (FieldDict, error)
FieldDictRegexp(field string, regex Regexp) (FieldDict, error)
}

type IndexReaderFuzzy interface {


+ 1
- 1
index/scorch/segment/empty.go View File

@@ -80,7 +80,7 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}

func (e *EmptyDictionary) RegexpIterator(start string) DictionaryIterator {
func (e *EmptyDictionary) RegexpIterator(r index.Regexp) DictionaryIterator {
return &EmptyDictionaryIterator{}
}



+ 1
- 1
index/scorch/segment/segment.go View File

@@ -51,7 +51,7 @@ type TermDictionary interface {
Iterator() DictionaryIterator
PrefixIterator(prefix string) DictionaryIterator
RangeIterator(start, end string) DictionaryIterator
RegexpIterator(regex string) DictionaryIterator
RegexpIterator(regex index.Regexp) DictionaryIterator
FuzzyIterator(term string, fuzziness int) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
}


+ 14
- 3
index/scorch/segment/zap/dict.go View File

@@ -178,15 +178,26 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator

// RegexpIterator returns an iterator which only visits terms having the
// the specified regex
func (d *Dictionary) RegexpIterator(regex string) segment.DictionaryIterator {
func (d *Dictionary) RegexpIterator(rIn index.Regexp) segment.DictionaryIterator {
prefixTerm, complete := rIn.LiteralPrefix()
if complete {
return d.PrefixIterator(prefixTerm)
}

rv := &DictionaryIterator{
d: d,
}

if d.fst != nil {
r, err := regexp.New(regex)
r, err := regexp.New(rIn.String())
if err == nil {
itr, err2 := d.fst.Search(r, nil, nil)
var prefixBeg, prefixEnd []byte
if prefixTerm != "" {
prefixBeg = []byte(prefixTerm)
prefixEnd = incrementBytes(prefixEnd)
}

itr, err2 := d.fst.Search(r, prefixBeg, prefixEnd)
if err2 == nil {
rv.itr = itr
} else if err2 != nil && err2 != vellum.ErrIteratorDone {


+ 2
- 2
index/scorch/snapshot_index.go View File

@@ -179,9 +179,9 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
}

func (i *IndexSnapshot) FieldDictRegexp(field string,
termRegex []byte) (index.FieldDict, error) {
termRegex index.Regexp) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RegexpIterator(string(termRegex))
return i.RegexpIterator(termRegex)
})
}



+ 1
- 1
index/scorch/snapshot_segment.go View File

@@ -52,7 +52,7 @@ func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.Dic
return s.d.RangeIterator(start, end)
}

func (s *SegmentDictionarySnapshot) RegexpIterator(regex string) segment.DictionaryIterator {
func (s *SegmentDictionarySnapshot) RegexpIterator(regex index.Regexp) segment.DictionaryIterator {
return s.d.RegexpIterator(regex)
}



+ 1
- 1
index_meta.go View File

@@ -19,7 +19,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"github.com/blevesearch/bleve/index/upsidedown"
)



+ 1
- 1
search/searcher/search_regexp.go View File

@@ -29,7 +29,7 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
search.Searcher, error) {
var candidateTerms []string
if ir, ok := indexReader.(index.IndexReaderRegexp); ok {
fieldDict, err := ir.FieldDictRegexp(field, []byte(pattern.String()))
fieldDict, err := ir.FieldDictRegexp(field, pattern)
if err != nil {
return nil, err
}


Loading…
Cancel
Save