Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/logql/log/pattern/pattern.go

158 lines
3.4 KiB

package pattern
import (
"bytes"
"errors"
)
var (
ErrNoCapture = errors.New("at least one capture is required")
ErrCaptureNotAllowed = errors.New("named captures are not allowed")
ErrInvalidExpr = errors.New("invalid expression")
)
type Matcher struct {
e expr
captures [][]byte
names []string
}
func New(in string) (*Matcher, error) {
e, err := parseExpr(in)
if err != nil {
return nil, err
}
if err := e.validate(); err != nil {
return nil, err
}
return &Matcher{
e: e,
captures: make([][]byte, 0, e.captureCount()),
names: e.captures(),
}, nil
}
func ParseLineFilter(in []byte) (*Matcher, error) {
if len(in) == 0 {
return new(Matcher), nil
}
e, err := parseExprBytes(in)
if err != nil {
return nil, err
}
if err = e.validateNoConsecutiveCaptures(); err != nil {
return nil, err
}
if err = e.validateNoNamedCaptures(); err != nil {
return nil, err
}
return &Matcher{e: e}, nil
}
func ParseLiterals(in string) ([][]byte, error) {
e, err := parseExpr(in)
if err != nil {
return nil, err
}
lit := make([][]byte, 0, len(e))
for _, n := range e {
if l, ok := n.(literals); ok {
lit = append(lit, l)
}
}
return lit, nil
}
// Matches matches the given line with the provided pattern.
// Matches invalidates the previous returned captures array.
func (m *Matcher) Matches(in []byte) [][]byte {
if len(in) == 0 {
return nil
}
if len(m.e) == 0 {
return nil
}
captures := m.captures[:0]
expr := m.e
if ls, ok := expr[0].(literals); ok {
i := bytes.Index(in, ls)
if i != 0 {
return nil
}
in = in[len(ls):]
expr = expr[1:]
}
if len(expr) == 0 {
return nil
}
// from now we have capture - literals - capture ... (literals)?
for len(expr) != 0 {
if len(expr) == 1 { // we're ending on a capture.
if !(expr[0].(capture)).isUnnamed() {
captures = append(captures, in)
}
return captures
}
capt := expr[0].(capture)
ls := expr[1].(literals)
expr = expr[2:]
i := bytes.Index(in, ls)
if i == -1 {
// if a capture is missed we return up to the end as the capture.
if !capt.isUnnamed() {
captures = append(captures, in)
}
return captures
}
if capt.isUnnamed() {
in = in[len(ls)+i:]
continue
}
captures = append(captures, in[:i])
in = in[len(ls)+i:]
}
return captures
}
func (m *Matcher) Names() []string {
return m.names
}
func (m *Matcher) Test(in []byte) bool {
if len(in) == 0 || len(m.e) == 0 {
// An empty line can only match an empty pattern.
return len(in) == 0 && len(m.e) == 0
}
var off int
for i := 0; i < len(m.e); i++ {
lit, ok := m.e[i].(literals)
if !ok {
continue
}
j := bytes.Index(in[off:], lit)
if j == -1 {
return false
}
if i != 0 && j == 0 {
// This means we either have repetitive literals, or an empty
// capture. Either way, the line does not match the pattern.
return false
}
off += j + len(lit)
}
// If we end up on a literal, we only consider the test successful if
// the remaining input is empty. Otherwise, if we end up on a capture,
// the remainder (the captured text) must not be empty.
//
// For example, "foo bar baz" does not match "<_> bar", but it matches
// "<_> baz" and "foo <_>".
//
// Empty captures are not allowed as well: " bar " does not match
// "<_> bar <_>", but matches "<_>bar<_>".
_, reqRem := m.e[len(m.e)-1].(capture)
hasRem := off != len(in)
return reqRem == hasRem
}