InfluxDB: Improve maxDataPoints error-message in Flux-mode, raise limits (#31259)

* influxdb: flux: improve error-message * influxdb: flux: raise max-points tolerance limit * influxdb: flux: better error-message * influxdb: flux: different approach to nice-error-message-generation * influxdb: flux: removed unnecessary whitespace * influxdb: flux: better error message, new approach * influxdb: flux: test max-data-points-exceeded situtation
4 years ago · 8d39e6640c
parent bd0d051438
commit 8d39e6640c
4 changed files with 104 additions and 7 deletions
--- a/pkg/tsdb/influxdb/flux/builder.go
+++ b/pkg/tsdb/influxdb/flux/builder.go
@ -183,6 +183,14 @@ func getTimeSeriesTimeColumn(columns []*query.FluxColumn) *query.FluxColumn {
 	return nil
 }

+type maxPointsExceededError struct {
+	Count int
+}
+
+func (e maxPointsExceededError) Error() string {
+	return fmt.Sprintf("max data points limit exceeded (count is %d)", e.Count)
+}
+
 func getTableID(record *query.FluxRecord, groupColumns []string) []interface{} {
 	result := make([]interface{}, len(groupColumns))

@ -300,8 +308,9 @@ func (fb *frameBuilder) Append(record *query.FluxRecord) error {
 		}
 	}

-	if fb.active.Fields[0].Len() > fb.maxPoints {
-		return fmt.Errorf("returned too many points in a series: %d", fb.maxPoints)
+	pointsCount := fb.active.Fields[0].Len()
+	if pointsCount > fb.maxPoints {
+		return maxPointsExceededError{Count: pointsCount}
 	}

 	return nil
--- a/pkg/tsdb/influxdb/flux/executor.go
+++ b/pkg/tsdb/influxdb/flux/executor.go
@ -2,13 +2,17 @@ package flux

 import (
 	"context"
+	"errors"
 	"fmt"
+	"strings"

 	"github.com/grafana/grafana-plugin-sdk-go/backend"
 	"github.com/grafana/grafana-plugin-sdk-go/data"
 	"github.com/influxdata/influxdb-client-go/v2/api"
 )

+const maxPointsEnforceFactor float64 = 10
+
 // executeQuery runs a flux query using the queryModel to interpolate the query and the runner to execute it.
 // maxSeries somehow limits the response.
 func executeQuery(ctx context.Context, query queryModel, runner queryRunner, maxSeries int) (dr backend.DataResponse) {
@ -27,7 +31,27 @@ func executeQuery(ctx context.Context, query queryModel, runner queryRunner, max
 		glog.Warn("Flux query failed", "err", err, "query", flux)
 		dr.Error = err
 	} else {
-		dr = readDataFrames(tables, int(float64(query.MaxDataPoints)*2), maxSeries)
+		// we only enforce a larger number than maxDataPoints
+		maxPointsEnforced := int(float64(query.MaxDataPoints) * maxPointsEnforceFactor)
+
+		dr = readDataFrames(tables, maxPointsEnforced, maxSeries)
+
+		if dr.Error != nil {
+			// we check if a too-many-data-points error happened, and if it is so,
+			// we improve the error-message.
+			// (we have to do it in such a complicated way, because at the point where
+			// the error happens, there is not enough info to create a nice error message)
+			var maxPointError maxPointsExceededError
+			if errors.As(dr.Error, &maxPointError) {
+				text := fmt.Sprintf("A query returned too many datapoints and the results have been truncated at %d points to prevent memory issues. At the current graph size, Grafana can only draw %d.", maxPointError.Count, query.MaxDataPoints)
+				// we recommend to the user to use AggregateWindow(), but only if it is not already used
+				if !strings.Contains(query.RawQuery, "aggregateWindow(") {
+					text += " Try using the aggregateWindow() function in your query to reduce the number of points returned."
+				}
+
+				dr.Error = fmt.Errorf(text)
+			}
+		}
 	}

 	// Make sure there is at least one frame
--- a/pkg/tsdb/influxdb/flux/executor_test.go
+++ b/pkg/tsdb/influxdb/flux/executor_test.go
@ -58,18 +58,24 @@ func (r *MockRunner) runQuery(ctx context.Context, q string) (*api.QueryTableRes
 	return client.QueryAPI("x").Query(ctx, q)
 }

-func verifyGoldenResponse(t *testing.T, name string) *backend.DataResponse {
+func executeMockedQuery(t *testing.T, name string, query queryModel) *backend.DataResponse {
 	runner := &MockRunner{
 		testDataPath: name + ".csv",
 	}

-	dr := executeQuery(context.Background(), queryModel{MaxDataPoints: 100}, runner, 50)
+	dr := executeQuery(context.Background(), query, runner, 50)
+	return &dr
+}
+
+func verifyGoldenResponse(t *testing.T, name string) *backend.DataResponse {
+	dr := executeMockedQuery(t, name, queryModel{MaxDataPoints: 100})
+
 	err := experimental.CheckGoldenDataResponse(filepath.Join("testdata", fmt.Sprintf("%s.golden.txt", name)),
-		&dr, true)
+		dr, true)
 	require.NoError(t, err)
 	require.NoError(t, dr.Error)

-	return &dr
+	return dr
 }

 func TestExecuteSimple(t *testing.T) {
@ -229,3 +235,33 @@ func TestRealQuery(t *testing.T) {
 		require.NoError(t, err)
 	})
 }
+
+func assertDataResponseDimensions(t *testing.T, dr *backend.DataResponse, rows int, columns int) {
+	require.Len(t, dr.Frames, 1)
+	fields := dr.Frames[0].Fields
+	require.Len(t, fields, rows)
+	require.Equal(t, fields[0].Len(), columns)
+	require.Equal(t, fields[1].Len(), columns)
+}
+
+func TestMaxDataPointsExceededNoAggregate(t *testing.T) {
+	// unfortunately the golden-response style tests do not support
+	// responses that contain errors, so we can only do manual checks
+	// on the DataResponse
+	dr := executeMockedQuery(t, "max_data_points_exceeded", queryModel{MaxDataPoints: 2})
+
+	// it should contain the error-message
+	require.EqualError(t, dr.Error, "A query returned too many datapoints and the results have been truncated at 21 points to prevent memory issues. At the current graph size, Grafana can only draw 2. Try using the aggregateWindow() function in your query to reduce the number of points returned.")
+	assertDataResponseDimensions(t, dr, 2, 21)
+}
+
+func TestMaxDataPointsExceededWithAggregate(t *testing.T) {
+	// unfortunately the golden-response style tests do not support
+	// responses that contain errors, so we can only do manual checks
+	// on the DataResponse
+	dr := executeMockedQuery(t, "max_data_points_exceeded", queryModel{RawQuery: "aggregateWindow()", MaxDataPoints: 2})
+
+	// it should contain the error-message
+	require.EqualError(t, dr.Error, "A query returned too many datapoints and the results have been truncated at 21 points to prevent memory issues. At the current graph size, Grafana can only draw 2.")
+	assertDataResponseDimensions(t, dr, 2, 21)
+}
--- a/pkg/tsdb/influxdb/flux/testdata/max_data_points_exceeded.csv
+++ b/pkg/tsdb/influxdb/flux/testdata/max_data_points_exceeded.csv
@ -0,0 +1,28 @@
+#group,false,false,true,true,true,true,false,false
+#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,string,string,double,dateTime:RFC3339
+#default,mean,,,,,,,
+,result,table,_start,_stop,_field,_measurement,_value,_time
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,30,2021-02-22T16:03:50Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,40,2021-02-22T16:04:00Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,50,2021-02-22T16:04:10Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,60,2021-02-22T16:04:20Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,70,2021-02-22T16:04:30Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,80,2021-02-22T16:04:40Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,81,2021-02-22T16:04:50Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,82,2021-02-22T16:05:00Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,83,2021-02-22T16:05:10Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,84,2021-02-22T16:05:20Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,85,2021-02-22T16:05:30Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,86,2021-02-22T16:05:40Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,87,2021-02-22T16:05:50Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,86,2021-02-22T16:06:00Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,85,2021-02-22T16:06:10Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,84,2021-02-22T16:06:20Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,83,2021-02-22T16:06:30Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,82,2021-02-22T16:06:40Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,83,2021-02-22T16:06:50Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,84,2021-02-22T16:07:00Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,85,2021-02-22T16:07:10Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,86,2021-02-22T16:07:20Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,97,2021-02-22T16:07:30Z
+,,0,2021-02-22T16:03:35.1428118Z,2021-02-22T16:08:35.1428118Z,usage_idle,cpu,88,2021-02-22T16:07:40Z