chore: Add tree printer for physical plan (#16716)

This PR adds a generic tree printer (similar to the Unix utility `tree`).

Additionally it provides an implementation to convert the DAG of the physical plan into the generic tree used by the tree printer.
This allows to print plans like this (note, these examples do not make sense, they are only for demonstrating the visual output of the printer).

Simple DAG with single child node:
```
Limit #limit offset=0 limit=0
└── Filter #filter predicates=()
    └── SortMerge #merge column=<nil> order=UNDEFINED
        ├── DataObjScan #scan1 location= stream_ids=() projections=() predicates=() direction=0 limit=0
        └── DataObjScan #scan2 location= stream_ids=() projections=() predicates=() direction=0 limit=0
```

DAG with multple root nodes:

```
Limit #limit1 offset=0 limit=0
└── DataObjScan #scan1 location= stream_ids=() projections=() predicates=() direction=0 limit=0

Limit #limit2 offset=0 limit=0
└── DataObjScan #scan2 location= stream_ids=() projections=() predicates=() direction=0 limit=0
```

DAG with parent node that share the same child node:

```
Limit #limit offset=0 limit=0
├── Limit #filter1 offset=0 limit=0
│   └── DataObjScan #scan location= stream_ids=() projections=() predicates=() direction=0 limit=0
└── Limit #filter2 offset=0 limit=0
    └── DataObjScan #scan location= stream_ids=() projections=() predicates=() direction=0 limit=0
```

--- 
Signed-off-by: Christian Haudum <christian.haudum@gmail.com>
pull/16574/head^2
Christian Haudum 2 months ago committed by GitHub
parent 5b619b6d66
commit 9bdd5d15bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      .golangci.yml
  2. 202
      pkg/dataobj/planner/internal/tree/printer.go
  3. 51
      pkg/dataobj/planner/internal/tree/printer_test.go
  4. 82
      pkg/dataobj/planner/physical/printer.go
  5. 52
      pkg/dataobj/planner/physical/printer_test.go

@ -76,6 +76,7 @@ issues:
- Error return value of .*log\.Logger\)\.Log\x60 is not checked
- Error return value of .*.Log.* is not checked
- Error return value of `` is not checked
- Error return value of `.*WriteString` is not checked
exclude-rules:
- path: pkg/scheduler/scheduler.go
text: 'SA1019: msg.GetHttpRequest is deprecated: Do not use'

@ -0,0 +1,202 @@
package tree
import (
"fmt"
"io"
)
const (
symPrefix = " "
symIndent = "│ "
symConn = "├── "
symLastConn = "└── "
)
// Property represents a property of a [Node]. It is a key-value-pair, where
// the value is either a single value or a list of values.
// When the value is a multi-value, the field IsMultiValue needs to be set to
// `true`.
// A single-value property is represented as `key=value` and a multi-value
// property as `key=(value1, value2, ...)`.
type Property struct {
// Key is the name of the property.
Key string
// Values holds the value(s) of the property.
Values []any
// IsMultiValue marks whether the property is a multi-value property.
IsMultiValue bool
}
// NewProperty creates a new Property with the specified key, multi-value flag, and values.
// The multi parameter determines if the property should be treated as a multi-value property.
func NewProperty(key string, multi bool, values ...any) Property {
return Property{
Key: key,
Values: values,
IsMultiValue: multi,
}
}
// Node represents a node in a tree structure that can be traversed and printed
// by the [Printer].
// It allows for building hierarchical representations of data where each node
// can have multiple properties and multiple children.
type Node struct {
// ID is a unique identifier for the node.
ID string
// Name is the display name of the node.
Name string
// Properties contains a list of key-value properties associated with the node.
Properties []Property
// Children are child nodes of the node.
Children []*Node
// Comments, like Children, are child nodes of the node, with the difference
// that comments are indented a level deeper than children. A common use-case
// for comments are tree-style properies of a node, such as expressions of a
// physical plan node.
Comments []*Node
}
// NewNode creates a new node with the given name, unique identifier and
// properties.
func NewNode(name, id string, properties ...Property) *Node {
return &Node{
ID: id,
Name: name,
Properties: properties,
}
}
// AddChild creates a new node with the given name, unique identifier, and properties
// and adds it to the parent node.
func (n *Node) AddChild(name, id string, properties []Property) *Node {
child := NewNode(name, id, properties...)
n.Children = append(n.Children, child)
return child
}
func (n *Node) AddComment(name, id string, properties []Property) *Node {
node := NewNode(name, id, properties...)
n.Comments = append(n.Comments, node)
return node
}
// Printer is used for writing the hierarchical representation of a tree
// of [Node]s.
type Printer struct {
w io.StringWriter
}
// NewPrinter creates a new [Printer] instance that writes to the specified
// [io.StringWriter].
func NewPrinter(w io.StringWriter) *Printer {
return &Printer{w: w}
}
// Print writes the entire tree structure starting from the given root node to
// the printer's [io.StringWriter].
// Example output:
//
// SortMerge #sort order=ASC column=timestamp
// ├── Limit #limit1 limit=1000
// │ └── DataObjScan #scan1 location=dataobj_1
// └── Limit #limit2 limit=1000
// └── DataObjScan #scan2 location=dataobj_2
func (tp *Printer) Print(root *Node) {
tp.printNode(root)
tp.printChildren(root.Comments, root.Children, "")
}
func (tp *Printer) printNode(node *Node) {
tp.w.WriteString(node.Name)
if node.ID != "" {
tp.w.WriteString(" #")
tp.w.WriteString(node.ID)
}
if len(node.Properties) == 0 {
tp.w.WriteString("\n")
return
}
tp.w.WriteString(" ")
for i, attr := range node.Properties {
tp.w.WriteString(attr.Key)
tp.w.WriteString("=")
if attr.IsMultiValue {
tp.w.WriteString("(")
}
for ii, val := range attr.Values {
tp.w.WriteString(fmt.Sprintf("%v", val))
if ii < len(attr.Values)-1 {
tp.w.WriteString(", ")
}
}
if attr.IsMultiValue {
tp.w.WriteString(")")
}
if i < len(node.Properties)-1 {
tp.w.WriteString(" ")
}
}
tp.w.WriteString("\n")
}
// printChildren recursively prints all children with appropriate indentation
func (tp *Printer) printChildren(comments, children []*Node, prefix string) {
hasChildren := len(children) > 0
// Iterate over sub nodes first.
// They have extended indentation compared to regular child nodes
// and depending if there are child nodes, also have a | as prefix.
for i, node := range comments {
isLast := i == len(comments)-1
// Choose connector symbols based on whether this is the last item
connector := symPrefix + symConn
newPrefix := prefix + symIndent + symIndent
if hasChildren {
connector = symIndent + symConn
}
if isLast {
connector = symPrefix + symLastConn
newPrefix = prefix + symIndent + symPrefix
if hasChildren {
connector = symIndent + symLastConn
}
}
// Print this node
tp.w.WriteString(prefix)
tp.w.WriteString(connector)
tp.printNode(node)
// Recursively print children
tp.printChildren(node.Comments, node.Children, newPrefix)
}
// Iterate over child nodes last.
for i, node := range children {
isLast := i == len(children)-1
// Choose connector symbols based on whether this is the last item
connector := symConn
newPrefix := prefix + symIndent
if isLast {
connector = symLastConn
newPrefix = prefix + symPrefix
}
// Print this node
tp.w.WriteString(prefix)
tp.w.WriteString(connector)
tp.printNode(node)
// Recursively print children
tp.printChildren(node.Comments, node.Children, newPrefix)
}
}

@ -0,0 +1,51 @@
package tree
import (
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestPrinter(t *testing.T) {
root := NewNode("Root", "")
lvl1 := root.AddChild("Merge", "foo", []Property{
{Key: "key_a", Values: []any{"value_a"}, IsMultiValue: true},
{Key: "key_b", Values: []any{"value_b", "value_c"}, IsMultiValue: true},
})
lvl2 := lvl1.AddChild("Product", "foobar", []Property{
{Key: "relations", Values: []any{"foo", "bar"}, IsMultiValue: true},
})
rel := lvl2.AddComment("Relation", "foo", nil)
rel.AddChild("Shard", "0", nil)
rel.AddChild("Shard", "1", nil)
rel.AddChild("Shard", "2", nil)
lvl2.AddComment("Relation", "bar", nil)
lvl2.AddChild("Scan", "foo", []Property{
{Key: "selector", Values: []any{`{env="prod", region=".+"}`}},
})
lvl2.AddChild("Scan", "bar", []Property{
{Key: "selector", Values: []any{`{env="dev", region=".+"}`}},
})
_ = lvl1.AddChild("Scan", "baz", []Property{})
b := &strings.Builder{}
p := NewPrinter(b)
p.Print(root)
t.Log("\n" + b.String())
expected := `
Root
Merge #foo key_a=(value_a) key_b=(value_b, value_c)
Product #foobar relations=(foo, bar)
Relation #foo
Shard #0
Shard #1
Shard #2
Relation #bar
Scan #foo selector={env="prod", region=".+"}
Scan #bar selector={env="dev", region=".+"}
Scan #baz
`
require.Equal(t, expected, "\n"+b.String())
}

@ -0,0 +1,82 @@
package physical
import (
"strings"
"github.com/grafana/loki/v3/pkg/dataobj/planner/internal/tree"
)
// BuildTree converts a physical plan node and its children into a tree structure
// that can be used for visualization and debugging purposes.
func BuildTree(p *Plan, n Node) *tree.Node {
return toTree(p, n)
}
func toTree(p *Plan, n Node) *tree.Node {
root := toTreeNode(n)
for _, child := range p.Children(n) {
if ch := toTree(p, child); ch != nil {
root.Children = append(root.Children, ch)
}
}
return root
}
func toTreeNode(n Node) *tree.Node {
treeNode := tree.NewNode(n.Type().String(), n.ID())
switch node := n.(type) {
case *DataObjScan:
treeNode.Properties = []tree.Property{
tree.NewProperty("location", false, node.Location),
tree.NewProperty("stream_ids", true, toAnySlice(node.StreamIDs)...),
tree.NewProperty("projections", true, toAnySlice(node.Projections)...),
tree.NewProperty("predicates", true, toAnySlice(node.Predicates)...),
tree.NewProperty("direction", false, node.Direction),
tree.NewProperty("limit", false, node.Limit),
}
case *SortMerge:
treeNode.Properties = []tree.Property{
tree.NewProperty("column", false, node.Column),
tree.NewProperty("order", false, node.Order),
}
case *Projection:
treeNode.Properties = []tree.Property{
tree.NewProperty("columns", true, toAnySlice(node.Columns)...),
}
case *Filter:
treeNode.Properties = []tree.Property{
tree.NewProperty("predicates", true, toAnySlice(node.Predicates)...),
}
case *Limit:
treeNode.Properties = []tree.Property{
tree.NewProperty("offset", false, node.Offset),
tree.NewProperty("limit", false, node.Limit),
}
}
return treeNode
}
func toAnySlice[T any](s []T) []any {
ret := make([]any, len(s))
for i := range s {
ret[i] = s[i]
}
return ret
}
// PrintAsTree converts a physical [Plan] into a human-readable tree representation.
// It processes each root node in the plan graph, and returns the combined
// string output of all trees joined by newlines.
func PrintAsTree(p *Plan) string {
results := make([]string, 0, len(p.Roots()))
for _, root := range p.Roots() {
sb := &strings.Builder{}
printer := tree.NewPrinter(sb)
node := BuildTree(p, root)
printer.Print(node)
results = append(results, sb.String())
}
return strings.Join(results, "\n")
}

@ -0,0 +1,52 @@
package physical
import "testing"
func TestPrinter(t *testing.T) {
t.Run("simple tree", func(t *testing.T) {
p := &Plan{}
limit := p.addNode(&Limit{id: "limit"})
filter := p.addNode(&Filter{id: "filter"})
merge := p.addNode(&SortMerge{id: "merge"})
scan1 := p.addNode(&DataObjScan{id: "scan1"})
scan2 := p.addNode(&DataObjScan{id: "scan2"})
_ = p.addEdge(Edge{Parent: limit, Child: filter})
_ = p.addEdge(Edge{Parent: filter, Child: merge})
_ = p.addEdge(Edge{Parent: merge, Child: scan1})
_ = p.addEdge(Edge{Parent: merge, Child: scan2})
repr := PrintAsTree(p)
t.Log("\n" + repr)
})
t.Run("multiple root nodes", func(t *testing.T) {
p := &Plan{}
limit1 := p.addNode(&Limit{id: "limit1"})
scan1 := p.addNode(&DataObjScan{id: "scan1"})
_ = p.addEdge(Edge{Parent: limit1, Child: scan1})
limit2 := p.addNode(&Limit{id: "limit2"})
scan2 := p.addNode(&DataObjScan{id: "scan2"})
_ = p.addEdge(Edge{Parent: limit2, Child: scan2})
repr := PrintAsTree(p)
t.Log("\n" + repr)
})
t.Run("multiple parents sharing the same child node", func(t *testing.T) {
p := &Plan{}
limit := p.addNode(&Limit{id: "limit"})
filter1 := p.addNode(&Limit{id: "filter1"})
filter2 := p.addNode(&Limit{id: "filter2"})
scan := p.addNode(&DataObjScan{id: "scan"})
_ = p.addEdge(Edge{Parent: limit, Child: filter1})
_ = p.addEdge(Edge{Parent: limit, Child: filter2})
_ = p.addEdge(Edge{Parent: filter1, Child: scan})
_ = p.addEdge(Edge{Parent: filter2, Child: scan})
repr := PrintAsTree(p)
t.Log("\n" + repr)
})
}
Loading…
Cancel
Save