SQL Expressions: Add cell-limit for input dataframes (#101700)

* expr: Add row limit to SQL expressions

Adds a configurable row limit to SQL expressions to prevent memory issues with large
result sets. The limit is configured via the `sql_expression_row_limit` setting in
the `[expressions]` section of grafana.ini, with a default of 100,000 rows.

The limit is enforced by checking the total number of rows across all input tables
before executing the SQL query. If the total exceeds the limit, the query fails
with an error message indicating the limit was exceeded.

* revert addition of newline

* Switch to table-driven tests

* Remove single-frame test-cases.

We only need to test for the multi frame case. Single frame is a subset
of the multi-frame case

* Add helper function

Simplify the way tests are set up and written

* Support convention, that limit: 0 is no limit

* Set the row-limit in one place only

* Update default limit to 20k rows

As per some discussion here:
https://raintank-corp.slack.com/archives/C071A5XCFST/p1741611647001369?thread_ts=1740047619.804869&cid=C071A5XCFST

* Test row-limit is applied from config

Make sure we protect this from regressions

This is perhaps a brittle test, somewhat coupled to the code here. But
it's good enough to prevent regressions at least.

* Add public documentation for the limit

* Limit total number of cells instead of rows

* Use named-return for totalRows

As @kylebrandt requested during review of #101700

* Leave DF cells as zero values during limits tests

When testing the cell limit we don't interact with the cell values at
all, so we leave them at their zero values both to speed up tests, and
to simplify and clarify that their values aren't used.

* Set SQLCmd limit at object creation - don't mutate

* Test that SQL node receives limit when built

And that it receives it from the Grafana config

* Improve TODO message for new Expression Parser

* Fix failing test by always creating config on the Service
pull/101580/head
Sam Jewell 4 months ago committed by GitHub
parent 42ae2fb026
commit 7a3415148e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 4
      docs/sources/setup-grafana/configure-grafana/_index.md
  2. 2
      pkg/expr/graph.go
  3. 2
      pkg/expr/graph_test.go
  4. 4
      pkg/expr/nodes.go
  5. 4
      pkg/expr/reader.go
  6. 65
      pkg/expr/service_test.go
  7. 38
      pkg/expr/sql_command.go
  8. 142
      pkg/expr/sql_command_test.go
  9. 4
      pkg/setting/setting.go

@ -2753,6 +2753,10 @@ Set the default start of the week, valid values are: `saturday`, `sunday`, `mond
Set this to `false` to disable expressions and hide them in the Grafana UI. Default is `true`.
#### `sql_expression_cell_limit`
Set the maximum number of cells that can be passed to a SQL expression. Default is `100000`.
### `[geomap]`
This section controls the defaults settings for **Geomap Plugin**.

@ -277,7 +277,7 @@ func (s *Service) buildGraph(req *Request) (*simple.DirectedGraph, error) {
case TypeDatasourceNode:
node, err = s.buildDSNode(dp, rn, req)
case TypeCMDNode:
node, err = buildCMDNode(rn, s.features)
node, err = buildCMDNode(rn, s.features, s.cfg.SQLExpressionCellLimit)
case TypeMLNode:
if s.features.IsEnabledGlobally(featuremgmt.FlagMlExpressions) {
node, err = s.buildMLNode(dp, rn, req)

@ -8,6 +8,7 @@ import (
"github.com/grafana/grafana/pkg/services/datasources"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/setting"
)
func TestServicebuildPipeLine(t *testing.T) {
@ -234,6 +235,7 @@ func TestServicebuildPipeLine(t *testing.T) {
}
s := Service{
features: featuremgmt.WithFeatures(featuremgmt.FlagExpressionParser),
cfg: setting.NewCfg(),
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

@ -106,7 +106,7 @@ func (gn *CMDNode) Execute(ctx context.Context, now time.Time, vars mathexp.Vars
return gn.Command.Execute(ctx, now, vars, s.tracer)
}
func buildCMDNode(rn *rawNode, toggles featuremgmt.FeatureToggles) (*CMDNode, error) {
func buildCMDNode(rn *rawNode, toggles featuremgmt.FeatureToggles, sqlExpressionCellLimit int64) (*CMDNode, error) {
commandType, err := GetExpressionCommandType(rn.Query)
if err != nil {
return nil, fmt.Errorf("invalid command type in expression '%v': %w", rn.RefID, err)
@ -163,7 +163,7 @@ func buildCMDNode(rn *rawNode, toggles featuremgmt.FeatureToggles) (*CMDNode, er
case TypeThreshold:
node.Command, err = UnmarshalThresholdCommand(rn, toggles)
case TypeSQL:
node.Command, err = UnmarshalSQLCommand(rn)
node.Command, err = UnmarshalSQLCommand(rn, sqlExpressionCellLimit)
default:
return nil, fmt.Errorf("expression command type '%v' in expression '%v' not implemented", commandType, rn.RefID)
}

@ -134,7 +134,9 @@ func (h *ExpressionQueryReader) ReadQuery(
err = iter.ReadVal(q)
if err == nil {
eq.Properties = q
eq.Command, err = NewSQLCommand(common.RefID, q.Expression)
// TODO: Cascade limit from Grafana config in this (new Expression Parser) branch of the code
cellLimit := 0 // zero means no limit
eq.Command, err = NewSQLCommand(common.RefID, q.Expression, int64(cellLimit))
}
case QueryTypeThreshold:

@ -146,6 +146,71 @@ func TestDSQueryError(t *testing.T) {
require.Equal(t, fp(42), res.Responses["C"].Frames[0].Fields[0].At(0))
}
func TestSQLExpressionCellLimitFromConfig(t *testing.T) {
tests := []struct {
name string
configCellLimit int64
expectedLimit int64
}{
{
name: "should pass default cell limit (0) to SQL command",
configCellLimit: 0,
expectedLimit: 0,
},
{
name: "should pass custom cell limit to SQL command",
configCellLimit: 5000,
expectedLimit: 5000,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a request with an SQL expression
sqlQuery := Query{
RefID: "A",
DataSource: dataSourceModel(),
JSON: json.RawMessage(`{ "datasource": { "uid": "__expr__", "type": "__expr__"}, "type": "sql", "expression": "SELECT 1 AS n" }`),
TimeRange: AbsoluteTimeRange{
From: time.Time{},
To: time.Time{},
},
}
queries := []Query{sqlQuery}
// Create service with specified cell limit
cfg := setting.NewCfg()
cfg.ExpressionsEnabled = true
cfg.SQLExpressionCellLimit = tt.configCellLimit
features := featuremgmt.WithFeatures(featuremgmt.FlagSqlExpressions)
// Create service with our configured limit
s := &Service{
cfg: cfg,
features: features,
converter: &ResultConverter{
Features: features,
},
}
req := &Request{Queries: queries, User: &user.SignedInUser{}}
// Build the pipeline
pipeline, err := s.BuildPipeline(req)
require.NoError(t, err)
node := pipeline[0]
cmdNode := node.(*CMDNode)
sqlCmd := cmdNode.Command.(*SQLCommand)
// Verify the SQL command has the correct limit
require.Equal(t, tt.expectedLimit, sqlCmd.limit, "SQL command has incorrect cell limit")
})
}
}
func fp(f float64) *float64 {
return &f
}

@ -19,10 +19,11 @@ type SQLCommand struct {
query string
varsToQuery []string
refID string
limit int64
}
// NewSQLCommand creates a new SQLCommand.
func NewSQLCommand(refID, rawSQL string) (*SQLCommand, error) {
func NewSQLCommand(refID, rawSQL string, limit int64) (*SQLCommand, error) {
if rawSQL == "" {
return nil, errutil.BadRequest("sql-missing-query",
errutil.WithPublicMessage("missing SQL query"))
@ -40,15 +41,17 @@ func NewSQLCommand(refID, rawSQL string) (*SQLCommand, error) {
if tables != nil {
logger.Debug("REF tables", "tables", tables, "sql", rawSQL)
}
return &SQLCommand{
query: rawSQL,
varsToQuery: tables,
refID: refID,
limit: limit,
}, nil
}
// UnmarshalSQLCommand creates a SQLCommand from Grafana's frontend query.
func UnmarshalSQLCommand(rn *rawNode) (*SQLCommand, error) {
func UnmarshalSQLCommand(rn *rawNode, limit int64) (*SQLCommand, error) {
if rn.TimeRange == nil {
logger.Error("time range must be specified for refID", "refID", rn.RefID)
return nil, fmt.Errorf("time range must be specified for refID %s", rn.RefID)
@ -65,7 +68,7 @@ func UnmarshalSQLCommand(rn *rawNode) (*SQLCommand, error) {
return nil, fmt.Errorf("expected sql expression to be type string, but got type %T", expressionRaw)
}
return NewSQLCommand(rn.RefID, expression)
return NewSQLCommand(rn.RefID, expression, limit)
}
// NeedsVars returns the variable names (refIds) that are dependencies
@ -91,12 +94,23 @@ func (gr *SQLCommand) Execute(ctx context.Context, now time.Time, vars mathexp.V
allFrames = append(allFrames, frames...)
}
rsp := mathexp.Results{}
db := sql.DB{}
totalCells := totalCells(allFrames)
// limit of 0 or less means no limit (following convention)
if gr.limit > 0 && totalCells > gr.limit {
return mathexp.Results{},
fmt.Errorf(
"SQL expression: total cell count across all input tables exceeds limit of %d. Total cells: %d",
gr.limit,
totalCells,
)
}
logger.Debug("Executing query", "query", gr.query, "frames", len(allFrames))
db := sql.DB{}
frame, err := db.QueryFrames(ctx, gr.refID, gr.query, allFrames)
rsp := mathexp.Results{}
if err != nil {
logger.Error("Failed to query frames", "error", err.Error())
rsp.Error = err
@ -121,3 +135,15 @@ func (gr *SQLCommand) Execute(ctx context.Context, now time.Time, vars mathexp.V
func (gr *SQLCommand) Type() string {
return TypeSQL.String()
}
func totalCells(frames []*data.Frame) (total int64) {
for _, frame := range frames {
if frame != nil {
// Calculate cells as rows × columns
rows := int64(frame.Rows())
cols := int64(len(frame.Fields))
total += rows * cols
}
}
return
}

@ -1,13 +1,21 @@
package expr
import (
"context"
"fmt"
"net/http"
"strings"
"testing"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/expr/mathexp"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/trace"
)
func TestNewCommand(t *testing.T) {
t.Skip()
cmd, err := NewSQLCommand("a", "select a from foo, bar")
cmd, err := NewSQLCommand("a", "select a from foo, bar", 0)
if err != nil && strings.Contains(err.Error(), "feature is not enabled") {
return
}
@ -25,3 +33,133 @@ func TestNewCommand(t *testing.T) {
return
}
}
// Helper function for creating test data
func createFrameWithRowsAndCols(rows int, cols int) *data.Frame {
frame := data.NewFrame("dummy")
for c := 0; c < cols; c++ {
values := make([]string, rows)
frame.Fields = append(frame.Fields, data.NewField(fmt.Sprintf("col%d", c), nil, values))
}
return frame
}
func TestSQLCommandCellLimits(t *testing.T) {
tests := []struct {
name string
limit int64
frames []*data.Frame
vars []string
expectError bool
errorContains string
}{
{
name: "single (long) frame within cell limit",
limit: 10,
frames: []*data.Frame{
createFrameWithRowsAndCols(10, 1), // 10 cells
},
vars: []string{"foo"},
},
{
name: "single (wide) frame within cell limit",
limit: 10,
frames: []*data.Frame{
createFrameWithRowsAndCols(1, 10), // 10 cells
},
vars: []string{"foo"},
},
{
name: "multiple frames within cell limit",
limit: 12,
frames: []*data.Frame{
createFrameWithRowsAndCols(2, 3), // 6 cells
createFrameWithRowsAndCols(2, 3), // 6 cells
},
vars: []string{"foo", "bar"},
},
{
name: "single (long) frame exceeds cell limit",
limit: 9,
frames: []*data.Frame{
createFrameWithRowsAndCols(10, 1), // 10 cells > 9 limit
},
vars: []string{"foo"},
expectError: true,
errorContains: "exceeds limit",
},
{
name: "single (wide) frame exceeds cell limit",
limit: 9,
frames: []*data.Frame{
createFrameWithRowsAndCols(1, 10), // 10 cells > 9 limit
},
vars: []string{"foo"},
expectError: true,
errorContains: "exceeds limit",
},
{
name: "multiple frames exceed cell limit",
limit: 11,
frames: []*data.Frame{
createFrameWithRowsAndCols(2, 3), // 6 cells
createFrameWithRowsAndCols(2, 3), // 6 cells
},
vars: []string{"foo", "bar"},
expectError: true,
errorContains: "exceeds limit",
},
{
name: "limit of 0 means no limit: allow large frame",
limit: 0,
frames: []*data.Frame{
createFrameWithRowsAndCols(200000, 1), // 200,000 cells
},
vars: []string{"foo", "bar"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cmd, err := NewSQLCommand("a", "select a from foo, bar", tt.limit)
require.NoError(t, err, "Failed to create SQL command")
vars := mathexp.Vars{}
for i, frame := range tt.frames {
vars[tt.vars[i]] = mathexp.Results{
Values: mathexp.Values{mathexp.TableData{Frame: frame}},
}
}
_, err = cmd.Execute(context.Background(), time.Now(), vars, &testTracer{})
if tt.expectError {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errorContains)
} else {
require.NoError(t, err)
}
})
}
}
type testTracer struct {
trace.Tracer
}
func (t *testTracer) Start(ctx context.Context, name string, s ...trace.SpanStartOption) (context.Context, trace.Span) {
return ctx, &testSpan{}
}
func (t *testTracer) Inject(context.Context, http.Header, trace.Span) {
}
type testSpan struct {
trace.Span
}
func (ts *testSpan) End(opt ...trace.SpanEndOption) {
}

@ -419,6 +419,9 @@ type Cfg struct {
// ExpressionsEnabled specifies whether expressions are enabled.
ExpressionsEnabled bool
// SQLExpressionCellLimit is the maximum number of cells (rows × columns, across all frames) that can be accepted by a SQL expression.
SQLExpressionCellLimit int64
ImageUploadProvider string
// LiveMaxConnections is a maximum number of WebSocket connections to
@ -780,6 +783,7 @@ func (cfg *Cfg) readAnnotationSettings() error {
func (cfg *Cfg) readExpressionsSettings() {
expressions := cfg.Raw.Section("expressions")
cfg.ExpressionsEnabled = expressions.Key("enabled").MustBool(true)
cfg.SQLExpressionCellLimit = expressions.Key("sql_expression_cell_limit").MustInt64(100000)
}
type AnnotationCleanupSettings struct {

Loading…
Cancel
Save