fix(parser): parenthesis around aggregate expression parsing

The position range of nested aggregate expression was wrong, for the
expression "(sum(foo))" the position of "sum(foo)" should be 1-9, but
the parser could not decide the end of the expression on pos 9, instead
it read ahead to pos 10 and then emitted the aggregate. But we only
kept the last closing position (10) and wrote that into the aggregate.

The reason for this is that the parser cannot know from "(sum(foo)" alone
if the aggregate is finished. It could be finished as in "(sum(foo))" but
equally it could continue with group modifier as "(sum(foo) by (bar))".

The fix is to track ")" tokens in a stack in addition to the lastClosing
position, which is overloaded with other things like offset number tracking.


Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
pull/16041/head
György Krajcsovits 2 months ago committed by Lukasz Mierzwa
parent ba690a8c4c
commit 06d0b063ea
  1. 26
      promql/parser/generated_parser.y
  2. 14
      promql/parser/generated_parser.y.go
  3. 21
      promql/parser/parse.go
  4. 43
      promql/parser/parse_test.go

@ -243,9 +243,23 @@ expr :
*/
aggregate_expr : aggregate_op aggregate_modifier function_call_body
{ $$ = yylex.(*parser).newAggregateExpr($1, $2, $3) }
{
// Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input
// like 'sum (some_metric) by test'
if len(yylex.(*parser).closingParens) > 1 {
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
$$ = yylex.(*parser).newAggregateExpr($1, $2, $3)
}
| aggregate_op function_call_body aggregate_modifier
{ $$ = yylex.(*parser).newAggregateExpr($1, $3, $2) }
{
// Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input
// like 'sum by test (some_metric)'
if len(yylex.(*parser).closingParens) > 1 {
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
$$ = yylex.(*parser).newAggregateExpr($1, $3, $2)
}
| aggregate_op function_call_body
{ $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2) }
| aggregate_op error
@ -399,9 +413,10 @@ function_call : IDENTIFIER function_call_body
Args: $2.(Expressions),
PosRange: posrange.PositionRange{
Start: $1.Pos,
End: yylex.(*parser).lastClosing,
End: yylex.(*parser).closingParens[0],
},
}
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
;
@ -427,7 +442,10 @@ function_call_args: function_call_args COMMA expr
*/
paren_expr : LEFT_PAREN expr RIGHT_PAREN
{ $$ = &ParenExpr{Expr: $2.(Expr), PosRange: mergeRanges(&$1, &$3)} }
{
$$ = &ParenExpr{Expr: $2.(Expr), PosRange: mergeRanges(&$1, &$3)}
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
;
/*

@ -1087,11 +1087,21 @@ yydefault:
case 21:
yyDollar = yyS[yypt-3 : yypt+1]
{
// Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input
// like 'sum (some_metric) by test'
if len(yylex.(*parser).closingParens) > 1 {
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node)
}
case 22:
yyDollar = yyS[yypt-3 : yypt+1]
{
// Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input
// like 'sum by test (some_metric)'
if len(yylex.(*parser).closingParens) > 1 {
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node)
}
case 23:
@ -1319,9 +1329,10 @@ yydefault:
Args: yyDollar[2].node.(Expressions),
PosRange: posrange.PositionRange{
Start: yyDollar[1].item.Pos,
End: yylex.(*parser).lastClosing,
End: yylex.(*parser).closingParens[0],
},
}
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
case 63:
yyDollar = yyS[yypt-3 : yypt+1]
@ -1353,6 +1364,7 @@ yydefault:
yyDollar = yyS[yypt-3 : yypt+1]
{
yyVAL.node = &ParenExpr{Expr: yyDollar[2].node.(Expr), PosRange: mergeRanges(&yyDollar[1].item, &yyDollar[3].item)}
yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:]
}
case 69:
yyDollar = yyS[yypt-1 : yypt+1]

@ -59,6 +59,13 @@ type parser struct {
// Everytime an Item is lexed that could be the end
// of certain expressions its end position is stored here.
lastClosing posrange.Pos
// Keep track of closing parentheses in addition, because sometimes the
// parser needs to read past a closing parenthesis to find the end of an
// expression, e.g. reading ony '(sum(foo)' cannot tell the end of the
// aggregation expression, since it could continue with either
// '(sum(foo))' or '(sum(foo) by (bar))' by which time we set lastClosing
// to the last paren.
closingParens []posrange.Pos
yyParser yyParserImpl
@ -82,6 +89,7 @@ func NewParser(input string, opts ...Opt) *parser { //nolint:revive // unexporte
p.injecting = false
p.parseErrors = nil
p.generatedParserResult = nil
p.closingParens = make([]posrange.Pos, 0)
// Clear lexer struct before reusing.
p.lex = Lexer{
@ -171,6 +179,11 @@ func EnrichParseError(err error, enrich func(parseErr *ParseErr)) {
func ParseExpr(input string) (expr Expr, err error) {
p := NewParser(input)
defer p.Close()
if len(p.closingParens) > 0 {
return nil, fmt.Errorf("internal parser error, not all closing parens consumed: %v", p.closingParens)
}
return p.ParseExpr()
}
@ -374,7 +387,10 @@ func (p *parser) Lex(lval *yySymType) int {
case EOF:
lval.item.Typ = EOF
p.InjectItem(0)
case RIGHT_BRACE, RIGHT_PAREN, RIGHT_BRACKET, DURATION, NUMBER:
case RIGHT_PAREN:
p.closingParens = append(p.closingParens, lval.item.Pos+posrange.Pos(len(lval.item.Val)))
fallthrough
case RIGHT_BRACE, RIGHT_BRACKET, DURATION, NUMBER:
p.lastClosing = lval.item.Pos + posrange.Pos(len(lval.item.Val))
}
@ -437,8 +453,9 @@ func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateE
ret.PosRange = posrange.PositionRange{
Start: op.Pos,
End: p.lastClosing,
End: p.closingParens[0],
}
p.closingParens = p.closingParens[1:]
ret.Op = op.Typ

@ -3946,6 +3946,7 @@ var testExpr = []struct {
},
},
},
// Test that nested parentheses result in the correct position range.
{
input: `foo[11s+10s-5*2^2]`,
expected: &MatrixSelector{
@ -4264,6 +4265,48 @@ var testExpr = []struct {
PosRange: posrange.PositionRange{Start: 0, End: 10},
},
},
{
input: "(sum(foo) by (bar))",
expected: &ParenExpr{
Expr: &AggregateExpr{
Op: SUM,
Grouping: []string{"bar"},
Expr: &VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
},
PosRange: posrange.PositionRange{
Start: 5,
End: 8,
},
},
PosRange: posrange.PositionRange{Start: 1, End: 18},
},
PosRange: posrange.PositionRange{Start: 0, End: 19},
},
},
{
input: "(sum by (bar) (foo))",
expected: &ParenExpr{
Expr: &AggregateExpr{
Op: SUM,
Grouping: []string{"bar"},
Expr: &VectorSelector{
Name: "foo",
LabelMatchers: []*labels.Matcher{
MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
},
PosRange: posrange.PositionRange{
Start: 15,
End: 18,
},
},
PosRange: posrange.PositionRange{Start: 1, End: 19},
},
PosRange: posrange.PositionRange{Start: 0, End: 20},
},
},
}
func makeInt64Pointer(val int64) *int64 {

Loading…
Cancel
Save