CloudWatch: Refactor query batching (#78581)

Co-authored-by: Fiona Liao <fiona.y.liao@gmail.com>
pull/78867/head
Shirley 2 years ago committed by GitHub
parent 3c89e68fc1
commit 86311e3a33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 99
      pkg/tsdb/cloudwatch/get_metric_query_batches.go
  2. 35
      pkg/tsdb/cloudwatch/get_metric_query_batches_test.go

@ -13,72 +13,79 @@ var nonWordRegex = regexp.MustCompile(`\W+`)
// getMetricQueryBatches separates queries into batches if necessary. Metric Insight queries cannot run together, and math expressions must be run // getMetricQueryBatches separates queries into batches if necessary. Metric Insight queries cannot run together, and math expressions must be run
// with all the queries they reference. // with all the queries they reference.
func getMetricQueryBatches(queries []*models.CloudWatchQuery, logger log.Logger) [][]*models.CloudWatchQuery { func getMetricQueryBatches(queries []*models.CloudWatchQuery, logger log.Logger) [][]*models.CloudWatchQuery {
metricInsightIndices := []int{} if !hasMultipleMetricInsights(queries) {
mathIndices := []int{}
for i, query := range queries {
switch query.GetGetMetricDataAPIMode() {
case models.GMDApiModeSQLExpression:
metricInsightIndices = append(metricInsightIndices, i)
case models.GMDApiModeMathExpression:
mathIndices = append(mathIndices, i)
default:
}
}
// We only need multiple batches if there are multiple metrics insight queries
if len(metricInsightIndices) <= 1 {
return [][]*models.CloudWatchQuery{queries} return [][]*models.CloudWatchQuery{queries}
} }
logger.Debug("Separating queries into batches") logger.Debug("Separating queries into batches")
// Map ids to their queries
idToIndex := map[string]int{} // set up list of math expression queries since below we loop over them to get what query IDs they reference
for i, query := range queries { var mathQueries []*models.CloudWatchQuery
if query.Id != "" { for _, query := range queries {
idToIndex[query.Id] = i if query.GetGetMetricDataAPIMode() == models.GMDApiModeMathExpression {
mathQueries = append(mathQueries, query)
} }
} }
// Find and track which queries are referenced by math queries // put queries into a set in order to facilitate lookup below
queryReferences := make([][]int, len(queries)) idToQuery := make(map[string]*models.CloudWatchQuery, len(queries))
isReferenced := make([]bool, len(queries)) for _, q := range queries {
for _, idx := range mathIndices { idToQuery[q.Id] = q
tokens := nonWordRegex.Split(queries[idx].Expression, -1) }
references := []int{}
for _, token := range tokens { // gets query IDs which are referenced in math expressions
ref, found := idToIndex[token] mathQueryIdToReferences := make(map[string][]*models.CloudWatchQuery)
// we will use this set of referenced queries to determine the root queries below
referencedQueries := make(map[string]bool)
for _, mathQuery := range mathQueries {
substrings := nonWordRegex.Split(mathQuery.Expression, -1)
for _, id := range substrings {
query, found := idToQuery[id]
if found { if found {
references = append(references, ref) mathQueryIdToReferences[mathQuery.Id] = append(mathQueryIdToReferences[mathQuery.Id], query)
isReferenced[ref] = true referencedQueries[query.Id] = true
} }
} }
queryReferences[idx] = references
} }
// Create a new batch for every query not used in another query
batches := [][]*models.CloudWatchQuery{} batches := [][]*models.CloudWatchQuery{}
for i, used := range isReferenced { for _, query := range queries {
if !used { // if a query is not referenced, then it is a "root" query
batches = append(batches, getReferencedQueries(queries, queryReferences, i)) if _, ok := referencedQueries[query.Id]; !ok {
batches = append(batches, getConnectedQueries(query, mathQueryIdToReferences))
} }
} }
return batches return batches
} }
// getReferencedQueries gets all the queries referenced by startQuery and its referenced queries // getConnectedQueries does a breadth-first search to find all the query ids connected to the root id by references. The root id is also returned in the response.
func getReferencedQueries(queries []*models.CloudWatchQuery, queryReferences [][]int, startQuery int) []*models.CloudWatchQuery { func getConnectedQueries(root *models.CloudWatchQuery, queryReferences map[string][]*models.CloudWatchQuery) []*models.CloudWatchQuery {
usedQueries := make([]bool, len(queries)) visited := map[string]bool{root.Id: true}
batch := []*models.CloudWatchQuery{} queriesToReturn := []*models.CloudWatchQuery{}
queriesToAdd := []int{startQuery} queriesToVisit := []*models.CloudWatchQuery{root}
usedQueries[startQuery] = true for i := 0; i < len(queriesToVisit); i++ {
for i := 0; i < len(queriesToAdd); i++ { currentQuery := queriesToVisit[i]
batch = append(batch, queries[queriesToAdd[i]]) queriesToReturn = append(queriesToReturn, currentQuery)
for _, queryIdx := range queryReferences[queriesToAdd[i]] { for _, queryRef := range queryReferences[currentQuery.Id] {
if !usedQueries[queryIdx] { if !visited[queryRef.Id] {
usedQueries[queryIdx] = true visited[queryRef.Id] = true
queriesToAdd = append(queriesToAdd, queryIdx) queriesToVisit = append(queriesToVisit, queryRef)
} }
} }
} }
return batch return queriesToReturn
}
func hasMultipleMetricInsights(queries []*models.CloudWatchQuery) bool {
count := 0
for _, query := range queries {
if query.GetGetMetricDataAPIMode() == models.GMDApiModeSQLExpression {
count++
}
if count > 1 {
return true
}
}
return false
} }

@ -22,6 +22,7 @@ func TestGetMetricQueryBatches(t *testing.T) {
MetricQueryType: models.MetricQueryTypeQuery, MetricQueryType: models.MetricQueryTypeQuery,
Id: "i3", Id: "i3",
} }
metricStat := models.CloudWatchQuery{ metricStat := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch, MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeBuilder, MetricEditorMode: models.MetricEditorModeBuilder,
@ -33,6 +34,24 @@ func TestGetMetricQueryBatches(t *testing.T) {
Expression: "PERIOD(i1)", Expression: "PERIOD(i1)",
Id: "m1", Id: "m1",
} }
m99_ref_m98 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m98)",
Id: "m99",
}
m98_ref_m88 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m88)",
Id: "m98",
}
m88_ref_m98 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw,
Expression: "PERIOD(m98)",
Id: "m88",
}
m2_ref_i1 := models.CloudWatchQuery{ m2_ref_i1 := models.CloudWatchQuery{
MetricQueryType: models.MetricQueryTypeSearch, MetricQueryType: models.MetricQueryTypeSearch,
MetricEditorMode: models.MetricEditorModeRaw, MetricEditorMode: models.MetricEditorModeRaw,
@ -58,6 +77,22 @@ func TestGetMetricQueryBatches(t *testing.T) {
Id: "m5", Id: "m5",
} }
t.Run("m99 ref m98 which ref m88 which ref m98, with 2 insights", func(t *testing.T) {
batch := []*models.CloudWatchQuery{
&insight1,
&insight2,
&m99_ref_m98,
&m98_ref_m88,
&m88_ref_m98,
}
result := getMetricQueryBatches(batch, logger)
assert.Len(t, result, 3)
assert.ElementsMatch(t, []*models.CloudWatchQuery{&insight1}, result[0])
assert.ElementsMatch(t, []*models.CloudWatchQuery{&insight2}, result[1])
assert.ElementsMatch(t, []*models.CloudWatchQuery{&m99_ref_m98, &m98_ref_m88, &m88_ref_m98}, result[2])
})
t.Run("zero insight queries should not separate into batches", func(t *testing.T) { t.Run("zero insight queries should not separate into batches", func(t *testing.T) {
batch := []*models.CloudWatchQuery{ batch := []*models.CloudWatchQuery{
&metricStat, &metricStat,

Loading…
Cancel
Save