Unified Storage: Remove wildcard queries (#101441)

* WIP adding custom analyzer so we can do substring search efficiently

* Adding unit tests for title search

* formatting

* adds more title search unit tests

* organize helpers

* fixes issue caused by having two title mappings

* Removes camelcase token filter since it prevents you from searching for a substring of chars and numbers. Adds regression test.

* adds back mapping for title_phrase

* use simple analyzer for input query string so it doesn't filter out english stop words

* ran bleve tests, table snapshots updated

* ignore linter for "unused" test functions. They are very helpful for troubleshooting search. Keeping them.

* only log total hits and query cost if result not nil

* fixes failing test - one more field because there are two title mappings now

* fix test

* fixes test - only take first item when its the title

* Adds separate internal field for title ngram mapping.

When searching with a query, results are sorted by score desc.

When searching without a query, results are sorted by title desc.

Adjusts ngram max to be 10.

Text queries are a disjunction of an exact match, phrase match, and a match. Boosted to have priority in that order.

Adds more unit tests for searching.

* linter

* fix test

* ran tests - generated new test dash json

* sort by title phrase instead of title

* fix test - not relying on /apis/dashboard/search to apply title sorting anymore
pull/101812/head
owensmallwood 4 months ago committed by GitHub
parent da8f26a07c
commit 827da46c51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 8
      pkg/registry/apis/dashboard/search.go
  2. 12
      pkg/registry/apis/dashboard/search_test.go
  3. 7
      pkg/storage/unified/resource/document.go
  4. 1
      pkg/storage/unified/resource/document_test.go
  5. 48
      pkg/storage/unified/search/bleve.go
  6. 28
      pkg/storage/unified/search/bleve_mappings.go
  7. 7
      pkg/storage/unified/search/bleve_mappings_test.go
  8. 426
      pkg/storage/unified/search/bleve_search_test.go
  9. 48
      pkg/storage/unified/search/custom_analyzers.go
  10. 1
      pkg/storage/unified/search/testdata/doc/dashboard-aaa-out.json
  11. 1
      pkg/storage/unified/search/testdata/doc/folder-aaa-out.json
  12. 1
      pkg/storage/unified/search/testdata/doc/folder-bbb-out.json
  13. 1
      pkg/storage/unified/search/testdata/doc/playlist-aaa-out.json
  14. 1
      pkg/storage/unified/search/testdata/doc/report-aaa-out.json

@ -369,6 +369,10 @@ func (s *SearchHandler) DoSearch(w http.ResponseWriter, r *http.Request) {
return
}
if result != nil {
s.log.Debug("search result hits and cost", "total_hits", result.TotalHits, "query_cost", result.QueryCost)
}
parsedResults, err := dashboardsearch.ParseResults(result, searchRequest.Offset)
if err != nil {
errhttp.Write(ctx, err, w)
@ -378,8 +382,8 @@ func (s *SearchHandler) DoSearch(w http.ResponseWriter, r *http.Request) {
if len(searchRequest.SortBy) == 0 {
// default sort by resource descending ( folders then dashboards ) then title
sort.Slice(parsedResults.Hits, func(i, j int) bool {
return parsedResults.Hits[i].Resource > parsedResults.Hits[j].Resource ||
(parsedResults.Hits[i].Resource == parsedResults.Hits[j].Resource && strings.ToLower(parsedResults.Hits[i].Title) < strings.ToLower(parsedResults.Hits[j].Title))
// Just sorting by resource for now. The rest should be sorted by search score already
return parsedResults.Hits[i].Resource > parsedResults.Hits[j].Resource
})
}

@ -276,7 +276,7 @@ func TestSearchHandler(t *testing.T) {
}
})
t.Run("Sort - default sort by resource then title", func(t *testing.T) {
t.Run("Sort - default sort by resource", func(t *testing.T) {
rows := make([]*resource.ResourceTableRow, len(mockResults))
for i, r := range mockResults {
rows[i] = &resource.ResourceTableRow{
@ -334,7 +334,7 @@ func TestSearchHandler(t *testing.T) {
err := json.NewDecoder(resp.Body).Decode(p)
require.NoError(t, err)
assert.Equal(t, len(mockResults), len(p.Hits))
assert.Equal(t, mockResults[3].Value, p.Hits[0].Title)
assert.Equal(t, mockResults[2].Value, p.Hits[0].Title)
assert.Equal(t, mockResults[1].Value, p.Hits[3].Title)
})
}
@ -647,14 +647,14 @@ var mockResults = []MockResult{
Value: "Dashboard 2",
},
{
Name: "f2",
Name: "f1",
Resource: "folder",
Value: "Folder 2",
Value: "Folder 1",
},
{
Name: "f1",
Name: "f2",
Resource: "folder",
Value: "Folder 1",
Value: "Folder 2",
},
}

@ -62,6 +62,9 @@ type IndexableDocument struct {
// The generic display name
Title string `json:"title,omitempty"`
// internal field for searching title with ngrams
TitleNgram string `json:"title_ngram,omitempty"`
// internal sort field for title ( don't set this directly )
TitlePhrase string `json:"title_phrase,omitempty"`
@ -169,7 +172,8 @@ func NewIndexableDocument(key *ResourceKey, rv int64, obj utils.GrafanaMetaAcces
Key: key,
RV: rv,
Name: key.Name,
Title: title, // We always want *something* to display
Title: title, // We always want *something* to display
TitleNgram: title,
TitlePhrase: strings.ToLower(title), // Lowercase for case-insensitive sorting
Labels: obj.GetLabels(),
Folder: obj.GetFolder(),
@ -264,6 +268,7 @@ const SEARCH_FIELD_NAMESPACE = "namespace"
const SEARCH_FIELD_NAME = "name"
const SEARCH_FIELD_RV = "rv"
const SEARCH_FIELD_TITLE = "title"
const SEARCH_FIELD_TITLE_NGRAM = "title_ngram"
const SEARCH_FIELD_TITLE_PHRASE = "title_phrase" // filtering/sorting on title by full phrase
const SEARCH_FIELD_DESCRIPTION = "description"
const SEARCH_FIELD_TAGS = "tags"

@ -37,6 +37,7 @@ func TestStandardDocumentBuilder(t *testing.T) {
"rv": 10,
"title": "test playlist unified storage",
"title_phrase": "test playlist unified storage",
"title_ngram": "test playlist unified storage",
"created": 1717236672000,
"createdBy": "user:ABC",
"updatedBy": "user:XYZ",

@ -13,6 +13,8 @@ import (
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
@ -118,7 +120,10 @@ func (b *bleveBackend) BuildIndex(ctx context.Context,
var index bleve.Index
build := true
mapper := getBleveMappings(fields)
mapper, err := getBleveMappings(fields)
if err != nil {
return nil, err
}
if size > b.opts.FileThreshold {
resourceDir := filepath.Join(b.opts.Root, key.Namespace,
@ -596,12 +601,26 @@ func (b *bleveIndex) toBleveSearchRequest(ctx context.Context, req *resource.Res
// Add a text query
if req.Query != "" && req.Query != "*" {
searchrequest.Fields = append(searchrequest.Fields, resource.SEARCH_FIELD_SCORE)
// mimic the behavior of the sql search
query := strings.ToLower(req.Query)
if !strings.Contains(query, "*") {
query = "*" + query + "*"
}
queries = append(queries, bleve.NewWildcardQuery(query))
// There are multiple ways to match the query string to documents. The following queries are ordered by priority:
// Query 1: Match the exact query string
queryExact := bleve.NewMatchQuery(req.Query)
queryExact.SetBoost(10.0)
queryExact.Analyzer = keyword.Name // don't analyze the query input - treat it as a single token
// Query 2: Phrase query with standard analyzer
queryPhrase := bleve.NewMatchPhraseQuery(req.Query)
queryExact.SetBoost(5.0)
queryPhrase.Analyzer = standard.Name
// Query 3: Match query with standard analyzer
queryAnalyzed := bleve.NewMatchQuery(req.Query)
queryAnalyzed.Analyzer = standard.Name
// At least one of the queries must match
searchQuery := bleve.NewDisjunctionQuery(queryExact, queryAnalyzed, queryPhrase)
queries = append(queries, searchQuery)
}
switch len(queries) {
@ -664,11 +683,18 @@ func (b *bleveIndex) toBleveSearchRequest(ctx context.Context, req *resource.Res
sorting := getSortFields(req)
searchrequest.SortBy(sorting)
// Always sort by *something*, otherwise the order is unstable
// When no sort fields are provided, sort by score if there is a query, otherwise sort by title
if len(sorting) == 0 {
searchrequest.Sort = append(searchrequest.Sort, &search.SortDocID{
Desc: false,
})
if req.Query != "" && req.Query != "*" {
searchrequest.Sort = append(searchrequest.Sort, &search.SortScore{
Desc: true,
})
} else {
searchrequest.Sort = append(searchrequest.Sort, &search.SortField{
Field: resource.SEARCH_FIELD_TITLE_PHRASE,
Desc: false,
})
}
}
return searchrequest, nil

@ -3,15 +3,22 @@ package search
import (
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/grafana/grafana/pkg/storage/unified/resource"
)
func getBleveMappings(fields resource.SearchableDocumentFields) mapping.IndexMapping {
func getBleveMappings(fields resource.SearchableDocumentFields) (mapping.IndexMapping, error) {
mapper := bleve.NewIndexMapping()
err := RegisterCustomAnalyzers(mapper)
if err != nil {
return nil, err
}
mapper.DefaultMapping = getBleveDocMappings(fields)
return mapper
return mapper, nil
}
func getBleveDocMappings(_ resource.SearchableDocumentFields) *mapping.DocumentMapping {
@ -24,15 +31,22 @@ func getBleveDocMappings(_ resource.SearchableDocumentFields) *mapping.DocumentM
}
mapper.AddFieldMappingsAt(resource.SEARCH_FIELD_NAME, nameMapping)
// for searching by title - uses an edge ngram token filter
titleSearchMapping := bleve.NewTextFieldMapping()
titleSearchMapping.Analyzer = TITLE_ANALYZER
titleSearchMapping.Store = true
mapper.AddFieldMappingsAt(resource.SEARCH_FIELD_TITLE_NGRAM, titleSearchMapping)
// mapping for title to search on words/tokens larger than the ngram size
titleWordMapping := bleve.NewTextFieldMapping()
titleWordMapping.Analyzer = standard.Name
titleWordMapping.Store = true
mapper.AddFieldMappingsAt(resource.SEARCH_FIELD_TITLE, titleWordMapping)
// for filtering/sorting by title full phrase
titlePhraseMapping := bleve.NewKeywordFieldMapping()
mapper.AddFieldMappingsAt(resource.SEARCH_FIELD_TITLE_PHRASE, titlePhraseMapping)
// for searching by title
// TODO: do we still need this since we have SEARCH_FIELD_TITLE_PHRASE?
titleSearchMapping := bleve.NewTextFieldMapping()
mapper.AddFieldMappingsAt(resource.SEARCH_FIELD_TITLE, titleSearchMapping)
descriptionMapping := &mapping.FieldMapping{
Name: resource.SEARCH_FIELD_DESCRIPTION,
Type: "text",

@ -12,7 +12,8 @@ import (
)
func TestDocumentMapping(t *testing.T) {
mappings := getBleveMappings(nil)
mappings, err := getBleveMappings(nil)
require.NoError(t, err)
data := resource.IndexableDocument{
Title: "title",
Description: "descr",
@ -37,7 +38,7 @@ func TestDocumentMapping(t *testing.T) {
}
doc := document.NewDocument("id")
err := mappings.MapDocument(doc, data)
err = mappings.MapDocument(doc, data)
require.NoError(t, err)
for _, f := range doc.Fields {
@ -46,5 +47,5 @@ func TestDocumentMapping(t *testing.T) {
fmt.Printf("DOC: fields %d\n", len(doc.Fields))
fmt.Printf("DOC: size %d\n", doc.Size())
require.Equal(t, 14, len(doc.Fields))
require.Equal(t, 15, len(doc.Fields))
}

@ -0,0 +1,426 @@
package search
import (
"context"
"fmt"
"log"
"os"
"testing"
"github.com/blevesearch/bleve/v2"
"github.com/grafana/grafana/pkg/apimachinery/identity"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/store/kind/dashboard"
"github.com/grafana/grafana/pkg/services/user"
"github.com/grafana/grafana/pkg/storage/unified/resource"
"github.com/stretchr/testify/require"
)
func TestCanSearchByTitle(t *testing.T) {
key := &resource.ResourceKey{
Namespace: "default",
Group: "dashboard.grafana.app",
Resource: "dashboards",
}
t.Run("when query is empty, sort documents by title instead of search score", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "name1",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "bbb",
TitleNgram: "bbb",
TitlePhrase: "bbb",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name2",
Key: &resource.ResourceKey{
Name: "name2",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "aaa",
TitleNgram: "aaa",
TitlePhrase: "aaa",
})
require.NoError(t, err)
// search for phrase
query := newQuery("")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(2), res.TotalHits)
require.Equal(t, "name2", res.Results.Rows[0].Key.Name)
})
t.Run("will boost phrase match query over match query results", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "name1",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "I want to say a hello",
TitleNgram: "I want to say a hello",
TitlePhrase: "I want to say a hello",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name2",
Key: &resource.ResourceKey{
Name: "name2",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "we want hello",
TitleNgram: "we want hello",
TitlePhrase: "we want hello",
})
require.NoError(t, err)
// search for phrase
query := newQuery("want hello")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(2), res.TotalHits)
require.Equal(t, "name2", res.Results.Rows[0].Key.Name)
})
t.Run("will prioritize matches", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "name1",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "Asserts Dashboards",
TitleNgram: "Asserts Dashboards",
TitlePhrase: "Asserts Dashboards",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name2",
Key: &resource.ResourceKey{
Name: "name2",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "New dashboard 10",
TitleNgram: "New dashboard 10",
TitlePhrase: "New dashboard 10",
})
require.NoError(t, err)
query := newQuery("New dash")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(2), res.TotalHits)
require.Equal(t, "name2", res.Results.Rows[0].Key.Name)
})
t.Run("will boost exact match query over match phrase query results", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "name1",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "we want hello pls",
TitleNgram: "we want hello pls",
TitlePhrase: "we want hello pls",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name2",
Key: &resource.ResourceKey{
Name: "name2",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "we want hello",
TitleNgram: "we want hello",
TitlePhrase: "we want hello",
})
require.NoError(t, err)
// search for exact match
query := newQuery("we want hello")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(2), res.TotalHits)
require.Equal(t, "name2", res.Results.Rows[0].Key.Name)
})
t.Run("title with numbers will match document", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "aaa",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "A123456",
TitleNgram: "A123456",
})
require.NoError(t, err)
// search for prefix of title with mix of chars and numbers
query := newQuery("A12")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// search for whole title
query = newQuery("A123456")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
})
t.Run("title search will match document", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "aaa",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "I want to say a wonderfully Hello to the WORLD! Hello-world",
TitleNgram: "I want to say a wonderfully Hello to the WORLD! Hello-world",
TitlePhrase: "I want to say a wonderfully Hello to the WORLD! Hello-world",
})
require.NoError(t, err)
// search by entire phrase
query := newQuery("I want to say a wonderfully Hello to the WORLD! Hello-world")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// search for word at start
query = newQuery("hello")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// search for word larger than ngram max size
query = newQuery("wonderfully")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// search for word at end
query = newQuery("world")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// can search for word substring anchored at start of word (edge ngram)
query = newQuery("worl")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// can search for multiple, non-consecutive words in title
query = newQuery("hello world")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
// can search for a term with a hyphen
query = newQuery("hello-world")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(1), res.TotalHits)
})
t.Run("title search will NOT match documents", func(t *testing.T) {
index := newTestDashboardsIndex(t)
err := index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name1",
Key: &resource.ResourceKey{
Name: "name1",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "I want to say a wonderful Hello to the WORLD! Hello-world",
TitleNgram: "I want to say a wonderful Hello to the WORLD! Hello-world",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name2",
Key: &resource.ResourceKey{
Name: "name2",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "A0456",
TitleNgram: "A0456",
})
require.NoError(t, err)
err = index.Write(&resource.IndexableDocument{
RV: 1,
Name: "name3",
Key: &resource.ResourceKey{
Name: "name3",
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
},
Title: "mash-A02382-10",
TitleNgram: "mash-A02382-10",
TitlePhrase: "mash-A02382-10",
})
require.NoError(t, err)
// word that doesn't exist
query := newQuery("cats")
res, err := index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(0), res.TotalHits)
// string shorter than 3 chars (ngam min)
query = newQuery("ma")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(0), res.TotalHits)
// substring that doesn't exist
query = newQuery("A01")
res, err = index.Search(context.Background(), nil, query, nil)
require.NoError(t, err)
require.Equal(t, int64(0), res.TotalHits)
})
}
func newQuery(query string) *resource.ResourceSearchRequest {
return &resource.ResourceSearchRequest{
Options: &resource.ListOptions{
Key: &resource.ResourceKey{
Namespace: "default",
Group: "dashboard.grafana.app",
Resource: "dashboards",
},
},
Limit: 100000,
Query: query,
}
}
func newTestDashboardsIndex(t *testing.T) resource.ResourceIndex {
key := &resource.ResourceKey{
Namespace: "default",
Group: "dashboard.grafana.app",
Resource: "dashboards",
}
tmpdir, err := os.MkdirTemp("", "grafana-bleve-test")
require.NoError(t, err)
backend, err := NewBleveBackend(BleveOptions{
Root: tmpdir,
FileThreshold: 9999, // use in-memory for tests
}, tracing.NewNoopTracerService(), featuremgmt.WithFeatures(featuremgmt.FlagUnifiedStorageSearchPermissionFiltering))
require.NoError(t, err)
// AVOID NPE in test
resource.NewIndexMetrics(backend.opts.Root, backend)
rv := int64(10)
ctx := identity.WithRequester(context.Background(), &user.SignedInUser{Namespace: "ns"})
info, err := DashboardBuilder(func(ctx context.Context, namespace string, blob resource.BlobSupport) (resource.DocumentBuilder, error) {
return &DashboardDocumentBuilder{
Namespace: namespace,
Blob: blob,
Stats: make(map[string]map[string]int64), // empty stats
DatasourceLookup: dashboard.CreateDatasourceLookup([]*dashboard.DatasourceQueryResult{{}}),
}, nil
})
require.NoError(t, err)
index, err := backend.BuildIndex(ctx, resource.NamespacedResource{
Namespace: key.Namespace,
Group: key.Group,
Resource: key.Resource,
}, 2, rv, info.Fields, func(index resource.ResourceIndex) (int64, error) { return 0, nil })
require.NoError(t, err)
return index
}
// helper to check which tokens are generated by an analyzer
// nolint:unused
func debugAnalyzer(index bleve.Index, analyzerName string, text string) {
// Get the analyzer (default: "standard")
analyzer := index.Mapping().AnalyzerNamed(analyzerName)
if analyzer == nil {
log.Fatal("Analyzer not found")
}
// Analyze text to see generated tokens
analysisResult := analyzer.Analyze([]byte(text))
// Print tokens
fmt.Println("Generated tokens for analyzer:", analyzerName)
for _, token := range analysisResult {
fmt.Println(string(token.Term))
}
}
// helper to check which terms are indexed for a field
// nolint:unused
func debugIndexedTerms(index bleve.Index, field string) {
// Check what terms exist for the title field
fieldTerms, err := index.FieldDict(field)
if err != nil {
log.Fatal(err)
}
for {
term, err := fieldTerms.Next()
if err != nil {
break
}
if term != nil {
fmt.Println(term.Term)
}
}
}

@ -0,0 +1,48 @@
package search
import (
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/edgengram"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unique"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace"
"github.com/blevesearch/bleve/v2/mapping"
)
const TITLE_ANALYZER = "title_analyzer"
func RegisterCustomAnalyzers(mapper *mapping.IndexMappingImpl) error {
return registerTitleAnalyzer(mapper)
}
// The registerTitleAnalyzer function defines a custom analyzer for the title field.
// The edgeNgramTokenFilter will create n-grams anchored to the front of each token.
// For example, the token "hello" will be tokenized into "hel", "hell", "hello".
func registerTitleAnalyzer(mapper *mapping.IndexMappingImpl) error {
// Define an N-Gram tokenizer (for substring search)
edgeNgramTokenFilter := map[string]interface{}{
"type": edgengram.Name,
"min": 3.0,
"max": 10.0,
"back": edgengram.FRONT,
}
err := mapper.AddCustomTokenFilter("edge_ngram_filter", edgeNgramTokenFilter)
if err != nil {
return err
}
//Create a custom analyzer using the N-Gram tokenizer
ngramAnalyzer := map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []string{"edge_ngram_filter", lowercase.Name, unique.Name},
//"char_filters": //TODO IF NEEDED
}
err = mapper.AddCustomAnalyzer(TITLE_ANALYZER, ngramAnalyzer)
if err != nil {
return err
}
return nil
}

@ -8,6 +8,7 @@
"name": "aaa",
"rv": 1234,
"title": "Test title",
"title_ngram": "Test title",
"title_phrase": "test title",
"description": "test description",
"tags": [

@ -8,6 +8,7 @@
"name": "aaa",
"rv": 1234,
"title": "test-aaa",
"title_ngram": "test-aaa",
"title_phrase": "test-aaa",
"created": 1730490142000,
"createdBy": "user:1",

@ -8,6 +8,7 @@
"name": "bbb",
"rv": 1234,
"title": "test-bbb",
"title_ngram": "test-bbb",
"title_phrase": "test-bbb",
"created": 1730490142000,
"createdBy": "user:1",

@ -8,6 +8,7 @@
"name": "aaa",
"rv": 1234,
"title": "Test AAA",
"title_ngram": "Test AAA",
"title_phrase": "test aaa",
"created": 1731336353000,
"createdBy": "user:t000000001"

@ -8,6 +8,7 @@
"name": "aaa",
"rv": 1234,
"title": "Test AAA",
"title_ngram": "Test AAA",
"title_phrase": "test aaa",
"labels": {
"grafana.app/deprecatedInternalID": "123"

Loading…
Cancel
Save