mirror of https://github.com/grafana/grafana
Unified Storage: Remove wildcard queries (#101441)
* WIP adding custom analyzer so we can do substring search efficiently * Adding unit tests for title search * formatting * adds more title search unit tests * organize helpers * fixes issue caused by having two title mappings * Removes camelcase token filter since it prevents you from searching for a substring of chars and numbers. Adds regression test. * adds back mapping for title_phrase * use simple analyzer for input query string so it doesn't filter out english stop words * ran bleve tests, table snapshots updated * ignore linter for "unused" test functions. They are very helpful for troubleshooting search. Keeping them. * only log total hits and query cost if result not nil * fixes failing test - one more field because there are two title mappings now * fix test * fixes test - only take first item when its the title * Adds separate internal field for title ngram mapping. When searching with a query, results are sorted by score desc. When searching without a query, results are sorted by title desc. Adjusts ngram max to be 10. Text queries are a disjunction of an exact match, phrase match, and a match. Boosted to have priority in that order. Adds more unit tests for searching. * linter * fix test * ran tests - generated new test dash json * sort by title phrase instead of title * fix test - not relying on /apis/dashboard/search to apply title sorting anymorepull/101812/head
parent
da8f26a07c
commit
827da46c51
@ -0,0 +1,426 @@ |
||||
package search |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"log" |
||||
"os" |
||||
"testing" |
||||
|
||||
"github.com/blevesearch/bleve/v2" |
||||
"github.com/grafana/grafana/pkg/apimachinery/identity" |
||||
"github.com/grafana/grafana/pkg/infra/tracing" |
||||
"github.com/grafana/grafana/pkg/services/featuremgmt" |
||||
"github.com/grafana/grafana/pkg/services/store/kind/dashboard" |
||||
"github.com/grafana/grafana/pkg/services/user" |
||||
"github.com/grafana/grafana/pkg/storage/unified/resource" |
||||
"github.com/stretchr/testify/require" |
||||
) |
||||
|
||||
func TestCanSearchByTitle(t *testing.T) { |
||||
key := &resource.ResourceKey{ |
||||
Namespace: "default", |
||||
Group: "dashboard.grafana.app", |
||||
Resource: "dashboards", |
||||
} |
||||
|
||||
t.Run("when query is empty, sort documents by title instead of search score", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name1", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "bbb", |
||||
TitleNgram: "bbb", |
||||
TitlePhrase: "bbb", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name2", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name2", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "aaa", |
||||
TitleNgram: "aaa", |
||||
TitlePhrase: "aaa", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// search for phrase
|
||||
query := newQuery("") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(2), res.TotalHits) |
||||
require.Equal(t, "name2", res.Results.Rows[0].Key.Name) |
||||
}) |
||||
|
||||
t.Run("will boost phrase match query over match query results", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name1", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "I want to say a hello", |
||||
TitleNgram: "I want to say a hello", |
||||
TitlePhrase: "I want to say a hello", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name2", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name2", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "we want hello", |
||||
TitleNgram: "we want hello", |
||||
TitlePhrase: "we want hello", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// search for phrase
|
||||
query := newQuery("want hello") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(2), res.TotalHits) |
||||
require.Equal(t, "name2", res.Results.Rows[0].Key.Name) |
||||
}) |
||||
|
||||
t.Run("will prioritize matches", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name1", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "Asserts Dashboards", |
||||
TitleNgram: "Asserts Dashboards", |
||||
TitlePhrase: "Asserts Dashboards", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name2", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name2", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "New dashboard 10", |
||||
TitleNgram: "New dashboard 10", |
||||
TitlePhrase: "New dashboard 10", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
query := newQuery("New dash") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(2), res.TotalHits) |
||||
require.Equal(t, "name2", res.Results.Rows[0].Key.Name) |
||||
}) |
||||
|
||||
t.Run("will boost exact match query over match phrase query results", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name1", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "we want hello pls", |
||||
TitleNgram: "we want hello pls", |
||||
TitlePhrase: "we want hello pls", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name2", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name2", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "we want hello", |
||||
TitleNgram: "we want hello", |
||||
TitlePhrase: "we want hello", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// search for exact match
|
||||
query := newQuery("we want hello") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(2), res.TotalHits) |
||||
require.Equal(t, "name2", res.Results.Rows[0].Key.Name) |
||||
}) |
||||
|
||||
t.Run("title with numbers will match document", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "aaa", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "A123456", |
||||
TitleNgram: "A123456", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// search for prefix of title with mix of chars and numbers
|
||||
query := newQuery("A12") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// search for whole title
|
||||
query = newQuery("A123456") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
}) |
||||
|
||||
t.Run("title search will match document", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "aaa", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "I want to say a wonderfully Hello to the WORLD! Hello-world", |
||||
TitleNgram: "I want to say a wonderfully Hello to the WORLD! Hello-world", |
||||
TitlePhrase: "I want to say a wonderfully Hello to the WORLD! Hello-world", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// search by entire phrase
|
||||
query := newQuery("I want to say a wonderfully Hello to the WORLD! Hello-world") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// search for word at start
|
||||
query = newQuery("hello") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// search for word larger than ngram max size
|
||||
query = newQuery("wonderfully") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// search for word at end
|
||||
query = newQuery("world") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// can search for word substring anchored at start of word (edge ngram)
|
||||
query = newQuery("worl") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// can search for multiple, non-consecutive words in title
|
||||
query = newQuery("hello world") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
|
||||
// can search for a term with a hyphen
|
||||
query = newQuery("hello-world") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(1), res.TotalHits) |
||||
}) |
||||
|
||||
t.Run("title search will NOT match documents", func(t *testing.T) { |
||||
index := newTestDashboardsIndex(t) |
||||
err := index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name1", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name1", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "I want to say a wonderful Hello to the WORLD! Hello-world", |
||||
TitleNgram: "I want to say a wonderful Hello to the WORLD! Hello-world", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name2", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name2", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "A0456", |
||||
TitleNgram: "A0456", |
||||
}) |
||||
require.NoError(t, err) |
||||
err = index.Write(&resource.IndexableDocument{ |
||||
RV: 1, |
||||
Name: "name3", |
||||
Key: &resource.ResourceKey{ |
||||
Name: "name3", |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, |
||||
Title: "mash-A02382-10", |
||||
TitleNgram: "mash-A02382-10", |
||||
TitlePhrase: "mash-A02382-10", |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
// word that doesn't exist
|
||||
query := newQuery("cats") |
||||
res, err := index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(0), res.TotalHits) |
||||
|
||||
// string shorter than 3 chars (ngam min)
|
||||
query = newQuery("ma") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(0), res.TotalHits) |
||||
|
||||
// substring that doesn't exist
|
||||
query = newQuery("A01") |
||||
res, err = index.Search(context.Background(), nil, query, nil) |
||||
require.NoError(t, err) |
||||
require.Equal(t, int64(0), res.TotalHits) |
||||
}) |
||||
} |
||||
|
||||
func newQuery(query string) *resource.ResourceSearchRequest { |
||||
return &resource.ResourceSearchRequest{ |
||||
Options: &resource.ListOptions{ |
||||
Key: &resource.ResourceKey{ |
||||
Namespace: "default", |
||||
Group: "dashboard.grafana.app", |
||||
Resource: "dashboards", |
||||
}, |
||||
}, |
||||
Limit: 100000, |
||||
Query: query, |
||||
} |
||||
} |
||||
|
||||
func newTestDashboardsIndex(t *testing.T) resource.ResourceIndex { |
||||
key := &resource.ResourceKey{ |
||||
Namespace: "default", |
||||
Group: "dashboard.grafana.app", |
||||
Resource: "dashboards", |
||||
} |
||||
tmpdir, err := os.MkdirTemp("", "grafana-bleve-test") |
||||
require.NoError(t, err) |
||||
|
||||
backend, err := NewBleveBackend(BleveOptions{ |
||||
Root: tmpdir, |
||||
FileThreshold: 9999, // use in-memory for tests
|
||||
}, tracing.NewNoopTracerService(), featuremgmt.WithFeatures(featuremgmt.FlagUnifiedStorageSearchPermissionFiltering)) |
||||
require.NoError(t, err) |
||||
|
||||
// AVOID NPE in test
|
||||
resource.NewIndexMetrics(backend.opts.Root, backend) |
||||
|
||||
rv := int64(10) |
||||
ctx := identity.WithRequester(context.Background(), &user.SignedInUser{Namespace: "ns"}) |
||||
|
||||
info, err := DashboardBuilder(func(ctx context.Context, namespace string, blob resource.BlobSupport) (resource.DocumentBuilder, error) { |
||||
return &DashboardDocumentBuilder{ |
||||
Namespace: namespace, |
||||
Blob: blob, |
||||
Stats: make(map[string]map[string]int64), // empty stats
|
||||
DatasourceLookup: dashboard.CreateDatasourceLookup([]*dashboard.DatasourceQueryResult{{}}), |
||||
}, nil |
||||
}) |
||||
require.NoError(t, err) |
||||
|
||||
index, err := backend.BuildIndex(ctx, resource.NamespacedResource{ |
||||
Namespace: key.Namespace, |
||||
Group: key.Group, |
||||
Resource: key.Resource, |
||||
}, 2, rv, info.Fields, func(index resource.ResourceIndex) (int64, error) { return 0, nil }) |
||||
require.NoError(t, err) |
||||
|
||||
return index |
||||
} |
||||
|
||||
// helper to check which tokens are generated by an analyzer
|
||||
// nolint:unused
|
||||
func debugAnalyzer(index bleve.Index, analyzerName string, text string) { |
||||
// Get the analyzer (default: "standard")
|
||||
analyzer := index.Mapping().AnalyzerNamed(analyzerName) |
||||
if analyzer == nil { |
||||
log.Fatal("Analyzer not found") |
||||
} |
||||
|
||||
// Analyze text to see generated tokens
|
||||
analysisResult := analyzer.Analyze([]byte(text)) |
||||
|
||||
// Print tokens
|
||||
fmt.Println("Generated tokens for analyzer:", analyzerName) |
||||
for _, token := range analysisResult { |
||||
fmt.Println(string(token.Term)) |
||||
} |
||||
} |
||||
|
||||
// helper to check which terms are indexed for a field
|
||||
// nolint:unused
|
||||
func debugIndexedTerms(index bleve.Index, field string) { |
||||
// Check what terms exist for the title field
|
||||
fieldTerms, err := index.FieldDict(field) |
||||
if err != nil { |
||||
log.Fatal(err) |
||||
} |
||||
|
||||
for { |
||||
term, err := fieldTerms.Next() |
||||
if err != nil { |
||||
break |
||||
} |
||||
if term != nil { |
||||
fmt.Println(term.Term) |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,48 @@ |
||||
package search |
||||
|
||||
import ( |
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom" |
||||
"github.com/blevesearch/bleve/v2/analysis/token/edgengram" |
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase" |
||||
"github.com/blevesearch/bleve/v2/analysis/token/unique" |
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace" |
||||
"github.com/blevesearch/bleve/v2/mapping" |
||||
) |
||||
|
||||
const TITLE_ANALYZER = "title_analyzer" |
||||
|
||||
func RegisterCustomAnalyzers(mapper *mapping.IndexMappingImpl) error { |
||||
return registerTitleAnalyzer(mapper) |
||||
} |
||||
|
||||
// The registerTitleAnalyzer function defines a custom analyzer for the title field.
|
||||
// The edgeNgramTokenFilter will create n-grams anchored to the front of each token.
|
||||
// For example, the token "hello" will be tokenized into "hel", "hell", "hello".
|
||||
func registerTitleAnalyzer(mapper *mapping.IndexMappingImpl) error { |
||||
// Define an N-Gram tokenizer (for substring search)
|
||||
edgeNgramTokenFilter := map[string]interface{}{ |
||||
"type": edgengram.Name, |
||||
"min": 3.0, |
||||
"max": 10.0, |
||||
"back": edgengram.FRONT, |
||||
} |
||||
err := mapper.AddCustomTokenFilter("edge_ngram_filter", edgeNgramTokenFilter) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
//Create a custom analyzer using the N-Gram tokenizer
|
||||
ngramAnalyzer := map[string]interface{}{ |
||||
"type": custom.Name, |
||||
"tokenizer": whitespace.Name, |
||||
"token_filters": []string{"edge_ngram_filter", lowercase.Name, unique.Name}, |
||||
//"char_filters": //TODO IF NEEDED
|
||||
} |
||||
|
||||
err = mapper.AddCustomAnalyzer(TITLE_ANALYZER, ngramAnalyzer) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
return nil |
||||
} |
Loading…
Reference in new issue