You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

609 lines
17KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bleve
  15. import (
  16. "encoding/json"
  17. "fmt"
  18. "reflect"
  19. "time"
  20. "github.com/blevesearch/bleve/analysis"
  21. "github.com/blevesearch/bleve/analysis/datetime/optional"
  22. "github.com/blevesearch/bleve/document"
  23. "github.com/blevesearch/bleve/registry"
  24. "github.com/blevesearch/bleve/search"
  25. "github.com/blevesearch/bleve/search/collector"
  26. "github.com/blevesearch/bleve/search/query"
  27. "github.com/blevesearch/bleve/size"
  28. )
  29. var reflectStaticSizeSearchResult int
  30. var reflectStaticSizeSearchStatus int
  31. func init() {
  32. var sr SearchResult
  33. reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
  34. var ss SearchStatus
  35. reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
  36. }
  37. var cache = registry.NewCache()
  38. const defaultDateTimeParser = optional.Name
  39. type numericRange struct {
  40. Name string `json:"name,omitempty"`
  41. Min *float64 `json:"min,omitempty"`
  42. Max *float64 `json:"max,omitempty"`
  43. }
  44. type dateTimeRange struct {
  45. Name string `json:"name,omitempty"`
  46. Start time.Time `json:"start,omitempty"`
  47. End time.Time `json:"end,omitempty"`
  48. startString *string
  49. endString *string
  50. }
  51. func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time) {
  52. start = dr.Start
  53. if dr.Start.IsZero() && dr.startString != nil {
  54. s, err := dateTimeParser.ParseDateTime(*dr.startString)
  55. if err == nil {
  56. start = s
  57. }
  58. }
  59. end = dr.End
  60. if dr.End.IsZero() && dr.endString != nil {
  61. e, err := dateTimeParser.ParseDateTime(*dr.endString)
  62. if err == nil {
  63. end = e
  64. }
  65. }
  66. return start, end
  67. }
  68. func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
  69. var temp struct {
  70. Name string `json:"name,omitempty"`
  71. Start *string `json:"start,omitempty"`
  72. End *string `json:"end,omitempty"`
  73. }
  74. err := json.Unmarshal(input, &temp)
  75. if err != nil {
  76. return err
  77. }
  78. dr.Name = temp.Name
  79. if temp.Start != nil {
  80. dr.startString = temp.Start
  81. }
  82. if temp.End != nil {
  83. dr.endString = temp.End
  84. }
  85. return nil
  86. }
  87. func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
  88. rv := map[string]interface{}{
  89. "name": dr.Name,
  90. "start": dr.Start,
  91. "end": dr.End,
  92. }
  93. if dr.Start.IsZero() && dr.startString != nil {
  94. rv["start"] = dr.startString
  95. }
  96. if dr.End.IsZero() && dr.endString != nil {
  97. rv["end"] = dr.endString
  98. }
  99. return json.Marshal(rv)
  100. }
  101. // A FacetRequest describes a facet or aggregation
  102. // of the result document set you would like to be
  103. // built.
  104. type FacetRequest struct {
  105. Size int `json:"size"`
  106. Field string `json:"field"`
  107. NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
  108. DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
  109. }
  110. func (fr *FacetRequest) Validate() error {
  111. nrCount := len(fr.NumericRanges)
  112. drCount := len(fr.DateTimeRanges)
  113. if nrCount > 0 && drCount > 0 {
  114. return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
  115. }
  116. if nrCount > 0 {
  117. nrNames := map[string]interface{}{}
  118. for _, nr := range fr.NumericRanges {
  119. if _, ok := nrNames[nr.Name]; ok {
  120. return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
  121. }
  122. nrNames[nr.Name] = struct{}{}
  123. if nr.Min == nil && nr.Max == nil {
  124. return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
  125. }
  126. }
  127. } else {
  128. dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
  129. if err != nil {
  130. return err
  131. }
  132. drNames := map[string]interface{}{}
  133. for _, dr := range fr.DateTimeRanges {
  134. if _, ok := drNames[dr.Name]; ok {
  135. return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
  136. }
  137. drNames[dr.Name] = struct{}{}
  138. start, end := dr.ParseDates(dateTimeParser)
  139. if start.IsZero() && end.IsZero() {
  140. return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
  141. }
  142. }
  143. }
  144. return nil
  145. }
  146. // NewFacetRequest creates a facet on the specified
  147. // field that limits the number of entries to the
  148. // specified size.
  149. func NewFacetRequest(field string, size int) *FacetRequest {
  150. return &FacetRequest{
  151. Field: field,
  152. Size: size,
  153. }
  154. }
  155. // AddDateTimeRange adds a bucket to a field
  156. // containing date values. Documents with a
  157. // date value falling into this range are tabulated
  158. // as part of this bucket/range.
  159. func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
  160. if fr.DateTimeRanges == nil {
  161. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  162. }
  163. fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
  164. }
  165. // AddDateTimeRangeString adds a bucket to a field
  166. // containing date values.
  167. func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
  168. if fr.DateTimeRanges == nil {
  169. fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
  170. }
  171. fr.DateTimeRanges = append(fr.DateTimeRanges,
  172. &dateTimeRange{Name: name, startString: start, endString: end})
  173. }
  174. // AddNumericRange adds a bucket to a field
  175. // containing numeric values. Documents with a
  176. // numeric value falling into this range are
  177. // tabulated as part of this bucket/range.
  178. func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
  179. if fr.NumericRanges == nil {
  180. fr.NumericRanges = make([]*numericRange, 0, 1)
  181. }
  182. fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
  183. }
  184. // FacetsRequest groups together all the
  185. // FacetRequest objects for a single query.
  186. type FacetsRequest map[string]*FacetRequest
  187. func (fr FacetsRequest) Validate() error {
  188. for _, v := range fr {
  189. err := v.Validate()
  190. if err != nil {
  191. return err
  192. }
  193. }
  194. return nil
  195. }
  196. // HighlightRequest describes how field matches
  197. // should be highlighted.
  198. type HighlightRequest struct {
  199. Style *string `json:"style"`
  200. Fields []string `json:"fields"`
  201. }
  202. // NewHighlight creates a default
  203. // HighlightRequest.
  204. func NewHighlight() *HighlightRequest {
  205. return &HighlightRequest{}
  206. }
  207. // NewHighlightWithStyle creates a HighlightRequest
  208. // with an alternate style.
  209. func NewHighlightWithStyle(style string) *HighlightRequest {
  210. return &HighlightRequest{
  211. Style: &style,
  212. }
  213. }
  214. func (h *HighlightRequest) AddField(field string) {
  215. if h.Fields == nil {
  216. h.Fields = make([]string, 0, 1)
  217. }
  218. h.Fields = append(h.Fields, field)
  219. }
  220. // A SearchRequest describes all the parameters
  221. // needed to search the index.
  222. // Query is required.
  223. // Size/From describe how much and which part of the
  224. // result set to return.
  225. // Highlight describes optional search result
  226. // highlighting.
  227. // Fields describes a list of field values which
  228. // should be retrieved for result documents, provided they
  229. // were stored while indexing.
  230. // Facets describe the set of facets to be computed.
  231. // Explain triggers inclusion of additional search
  232. // result score explanations.
  233. // Sort describes the desired order for the results to be returned.
  234. // Score controls the kind of scoring performed
  235. // SearchAfter supports deep paging by providing a minimum sort key
  236. // SearchBefore supports deep paging by providing a maximum sort key
  237. //
  238. // A special field named "*" can be used to return all fields.
  239. type SearchRequest struct {
  240. Query query.Query `json:"query"`
  241. Size int `json:"size"`
  242. From int `json:"from"`
  243. Highlight *HighlightRequest `json:"highlight"`
  244. Fields []string `json:"fields"`
  245. Facets FacetsRequest `json:"facets"`
  246. Explain bool `json:"explain"`
  247. Sort search.SortOrder `json:"sort"`
  248. IncludeLocations bool `json:"includeLocations"`
  249. Score string `json:"score,omitempty"`
  250. SearchAfter []string `json:"search_after"`
  251. SearchBefore []string `json:"search_before"`
  252. }
  253. func (r *SearchRequest) Validate() error {
  254. if srq, ok := r.Query.(query.ValidatableQuery); ok {
  255. err := srq.Validate()
  256. if err != nil {
  257. return err
  258. }
  259. }
  260. if r.SearchAfter != nil && r.SearchBefore != nil {
  261. return fmt.Errorf("cannot use search after and search before together")
  262. }
  263. if r.SearchAfter != nil {
  264. if r.From != 0 {
  265. return fmt.Errorf("cannot use search after with from !=0")
  266. }
  267. if len(r.SearchAfter) != len(r.Sort) {
  268. return fmt.Errorf("search after must have same size as sort order")
  269. }
  270. }
  271. if r.SearchBefore != nil {
  272. if r.From != 0 {
  273. return fmt.Errorf("cannot use search before with from !=0")
  274. }
  275. if len(r.SearchBefore) != len(r.Sort) {
  276. return fmt.Errorf("search before must have same size as sort order")
  277. }
  278. }
  279. return r.Facets.Validate()
  280. }
  281. // AddFacet adds a FacetRequest to this SearchRequest
  282. func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
  283. if r.Facets == nil {
  284. r.Facets = make(FacetsRequest, 1)
  285. }
  286. r.Facets[facetName] = f
  287. }
  288. // SortBy changes the request to use the requested sort order
  289. // this form uses the simplified syntax with an array of strings
  290. // each string can either be a field name
  291. // or the magic value _id and _score which refer to the doc id and search score
  292. // any of these values can optionally be prefixed with - to reverse the order
  293. func (r *SearchRequest) SortBy(order []string) {
  294. so := search.ParseSortOrderStrings(order)
  295. r.Sort = so
  296. }
  297. // SortByCustom changes the request to use the requested sort order
  298. func (r *SearchRequest) SortByCustom(order search.SortOrder) {
  299. r.Sort = order
  300. }
  301. // SetSearchAfter sets the request to skip over hits with a sort
  302. // value less than the provided sort after key
  303. func (r *SearchRequest) SetSearchAfter(after []string) {
  304. r.SearchAfter = after
  305. }
  306. // SetSearchBefore sets the request to skip over hits with a sort
  307. // value greater than the provided sort before key
  308. func (r *SearchRequest) SetSearchBefore(before []string) {
  309. r.SearchBefore = before
  310. }
  311. // UnmarshalJSON deserializes a JSON representation of
  312. // a SearchRequest
  313. func (r *SearchRequest) UnmarshalJSON(input []byte) error {
  314. var temp struct {
  315. Q json.RawMessage `json:"query"`
  316. Size *int `json:"size"`
  317. From int `json:"from"`
  318. Highlight *HighlightRequest `json:"highlight"`
  319. Fields []string `json:"fields"`
  320. Facets FacetsRequest `json:"facets"`
  321. Explain bool `json:"explain"`
  322. Sort []json.RawMessage `json:"sort"`
  323. IncludeLocations bool `json:"includeLocations"`
  324. Score string `json:"score"`
  325. SearchAfter []string `json:"search_after"`
  326. SearchBefore []string `json:"search_before"`
  327. }
  328. err := json.Unmarshal(input, &temp)
  329. if err != nil {
  330. return err
  331. }
  332. if temp.Size == nil {
  333. r.Size = 10
  334. } else {
  335. r.Size = *temp.Size
  336. }
  337. if temp.Sort == nil {
  338. r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
  339. } else {
  340. r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
  341. if err != nil {
  342. return err
  343. }
  344. }
  345. r.From = temp.From
  346. r.Explain = temp.Explain
  347. r.Highlight = temp.Highlight
  348. r.Fields = temp.Fields
  349. r.Facets = temp.Facets
  350. r.IncludeLocations = temp.IncludeLocations
  351. r.Score = temp.Score
  352. r.SearchAfter = temp.SearchAfter
  353. r.SearchBefore = temp.SearchBefore
  354. r.Query, err = query.ParseQuery(temp.Q)
  355. if err != nil {
  356. return err
  357. }
  358. if r.Size < 0 {
  359. r.Size = 10
  360. }
  361. if r.From < 0 {
  362. r.From = 0
  363. }
  364. return nil
  365. }
  366. // NewSearchRequest creates a new SearchRequest
  367. // for the Query, using default values for all
  368. // other search parameters.
  369. func NewSearchRequest(q query.Query) *SearchRequest {
  370. return NewSearchRequestOptions(q, 10, 0, false)
  371. }
  372. // NewSearchRequestOptions creates a new SearchRequest
  373. // for the Query, with the requested size, from
  374. // and explanation search parameters.
  375. // By default results are ordered by score, descending.
  376. func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
  377. return &SearchRequest{
  378. Query: q,
  379. Size: size,
  380. From: from,
  381. Explain: explain,
  382. Sort: search.SortOrder{&search.SortScore{Desc: true}},
  383. }
  384. }
  385. // IndexErrMap tracks errors with the name of the index where it occurred
  386. type IndexErrMap map[string]error
  387. // MarshalJSON seralizes the error into a string for JSON consumption
  388. func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
  389. tmp := make(map[string]string, len(iem))
  390. for k, v := range iem {
  391. tmp[k] = v.Error()
  392. }
  393. return json.Marshal(tmp)
  394. }
  395. func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
  396. var tmp map[string]string
  397. err := json.Unmarshal(data, &tmp)
  398. if err != nil {
  399. return err
  400. }
  401. for k, v := range tmp {
  402. iem[k] = fmt.Errorf("%s", v)
  403. }
  404. return nil
  405. }
  406. // SearchStatus is a secion in the SearchResult reporting how many
  407. // underlying indexes were queried, how many were successful/failed
  408. // and a map of any errors that were encountered
  409. type SearchStatus struct {
  410. Total int `json:"total"`
  411. Failed int `json:"failed"`
  412. Successful int `json:"successful"`
  413. Errors IndexErrMap `json:"errors,omitempty"`
  414. }
  415. // Merge will merge together multiple SearchStatuses during a MultiSearch
  416. func (ss *SearchStatus) Merge(other *SearchStatus) {
  417. ss.Total += other.Total
  418. ss.Failed += other.Failed
  419. ss.Successful += other.Successful
  420. if len(other.Errors) > 0 {
  421. if ss.Errors == nil {
  422. ss.Errors = make(map[string]error)
  423. }
  424. for otherIndex, otherError := range other.Errors {
  425. ss.Errors[otherIndex] = otherError
  426. }
  427. }
  428. }
  429. // A SearchResult describes the results of executing
  430. // a SearchRequest.
  431. type SearchResult struct {
  432. Status *SearchStatus `json:"status"`
  433. Request *SearchRequest `json:"request"`
  434. Hits search.DocumentMatchCollection `json:"hits"`
  435. Total uint64 `json:"total_hits"`
  436. MaxScore float64 `json:"max_score"`
  437. Took time.Duration `json:"took"`
  438. Facets search.FacetResults `json:"facets"`
  439. }
  440. func (sr *SearchResult) Size() int {
  441. sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
  442. reflectStaticSizeSearchStatus
  443. for _, entry := range sr.Hits {
  444. if entry != nil {
  445. sizeInBytes += entry.Size()
  446. }
  447. }
  448. for k, v := range sr.Facets {
  449. sizeInBytes += size.SizeOfString + len(k) +
  450. v.Size()
  451. }
  452. return sizeInBytes
  453. }
  454. func (sr *SearchResult) String() string {
  455. rv := ""
  456. if sr.Total > 0 {
  457. if sr.Request.Size > 0 {
  458. rv = fmt.Sprintf("%d matches, showing %d through %d, took %s\n", sr.Total, sr.Request.From+1, sr.Request.From+len(sr.Hits), sr.Took)
  459. for i, hit := range sr.Hits {
  460. rv += fmt.Sprintf("%5d. %s (%f)\n", i+sr.Request.From+1, hit.ID, hit.Score)
  461. for fragmentField, fragments := range hit.Fragments {
  462. rv += fmt.Sprintf("\t%s\n", fragmentField)
  463. for _, fragment := range fragments {
  464. rv += fmt.Sprintf("\t\t%s\n", fragment)
  465. }
  466. }
  467. for otherFieldName, otherFieldValue := range hit.Fields {
  468. if _, ok := hit.Fragments[otherFieldName]; !ok {
  469. rv += fmt.Sprintf("\t%s\n", otherFieldName)
  470. rv += fmt.Sprintf("\t\t%v\n", otherFieldValue)
  471. }
  472. }
  473. }
  474. } else {
  475. rv = fmt.Sprintf("%d matches, took %s\n", sr.Total, sr.Took)
  476. }
  477. } else {
  478. rv = "No matches"
  479. }
  480. if len(sr.Facets) > 0 {
  481. rv += fmt.Sprintf("Facets:\n")
  482. for fn, f := range sr.Facets {
  483. rv += fmt.Sprintf("%s(%d)\n", fn, f.Total)
  484. for _, t := range f.Terms {
  485. rv += fmt.Sprintf("\t%s(%d)\n", t.Term, t.Count)
  486. }
  487. if f.Other != 0 {
  488. rv += fmt.Sprintf("\tOther(%d)\n", f.Other)
  489. }
  490. }
  491. }
  492. return rv
  493. }
  494. // Merge will merge together multiple SearchResults during a MultiSearch
  495. func (sr *SearchResult) Merge(other *SearchResult) {
  496. sr.Status.Merge(other.Status)
  497. sr.Hits = append(sr.Hits, other.Hits...)
  498. sr.Total += other.Total
  499. if other.MaxScore > sr.MaxScore {
  500. sr.MaxScore = other.MaxScore
  501. }
  502. if sr.Facets == nil && len(other.Facets) != 0 {
  503. sr.Facets = other.Facets
  504. return
  505. }
  506. sr.Facets.Merge(other.Facets)
  507. }
  508. // MemoryNeededForSearchResult is an exported helper function to determine the RAM
  509. // needed to accommodate the results for a given search request.
  510. func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
  511. if req == nil {
  512. return 0
  513. }
  514. numDocMatches := req.Size + req.From
  515. if req.Size+req.From > collector.PreAllocSizeSkipCap {
  516. numDocMatches = collector.PreAllocSizeSkipCap
  517. }
  518. estimate := 0
  519. // overhead from the SearchResult structure
  520. var sr SearchResult
  521. estimate += sr.Size()
  522. var dm search.DocumentMatch
  523. sizeOfDocumentMatch := dm.Size()
  524. // overhead from results
  525. estimate += numDocMatches * sizeOfDocumentMatch
  526. // overhead from facet results
  527. if req.Facets != nil {
  528. var fr search.FacetResult
  529. estimate += len(req.Facets) * fr.Size()
  530. }
  531. // highlighting, store
  532. var d document.Document
  533. if len(req.Fields) > 0 || req.Highlight != nil {
  534. for i := 0; i < (req.Size + req.From); i++ {
  535. estimate += (req.Size + req.From) * d.Size()
  536. }
  537. }
  538. return uint64(estimate)
  539. }