package executor import ( "testing" "github.com/stretchr/testify/require" ) func TestRegexpParser_Process(t *testing.T) { tests := []struct { name string line string pattern string want map[string]string }{ { "single named capture group", `level=info msg=hello`, `level=(?P\w+)`, map[string]string{"level": "info"}, }, { "multiple named capture groups", `2023-01-01 INFO this is a message`, `(?P\d{4}-\d{2}-\d{2}) (?P\w+) (?P.*)`, map[string]string{"timestamp": "2023-01-01", "level": "INFO", "message": "this is a message"}, }, { "pattern with special characters", `[2023-01-01] ERROR: something went wrong`, `\[(?P[^\]]+)\] (?P\w+): (?P.*)`, map[string]string{"date": "2023-01-01", "level": "ERROR", "message": "something went wrong"}, }, { "partial match returns matched groups", `prefix level=warn suffix`, `level=(?P\w+)`, map[string]string{"level": "warn"}, }, { "no match returns empty", `this line does not match`, `level=(?P\w+)`, map[string]string{}, }, { "ip address and port extraction", `connection from 192.168.1.100:8080 established`, `(?P\d+\.\d+\.\d+\.\d+):(?P\d+)`, map[string]string{"ip": "192.168.1.100", "port": "8080"}, }, { "log format with json-like structure", `{"time":"2023-01-01","level":"info","msg":"test"}`, `"level":"(?P[^"]+)".*"msg":"(?P[^"]+)"`, map[string]string{"level": "info", "message": "test"}, }, { "apache common log format", `127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`, `(?P[\d.]+) - (?P\S+) \[(?P[^\]]+)\] "(?P\w+) (?P[^"]+)" (?P\d+) (?P\d+)`, map[string]string{ "ip": "127.0.0.1", "user": "frank", "timestamp": "10/Oct/2000:13:55:36 -0700", "method": "GET", "path": "/apache_pb.gif HTTP/1.0", "status": "200", "size": "2326", }, }, { "empty capture group value", `key= value=test`, `key=(?P\w*) value=(?P\w+)`, map[string]string{"key": "", "value": "test"}, }, { "unicode in log line", `user=José level=信息 msg=こんにちは`, `user=(?P\S+) level=(?P\S+)`, map[string]string{"user": "José", "level": "信息"}, }, { "nested groups - only named captured", `error: [code:123] message`, `error: \[code:(?P\d+)\]`, map[string]string{"code": "123"}, }, { "multiple occurrences - first match only", `level=info level=warn level=error`, `level=(?P\w+)`, map[string]string{"level": "info"}, }, { "case insensitive matching", `LEVEL=INFO msg=test`, `(?i)level=(?P\w+)`, map[string]string{"level": "INFO"}, }, { "multiline content in single entry", "first line\nsecond line\nlevel=debug", `level=(?P\w+)`, map[string]string{"level": "debug"}, }, { "escaped characters in log", `path="/var/log/app.log" level=info`, `path="(?P[^"]+)" level=(?P\w+)`, map[string]string{"path": "/var/log/app.log", "level": "info"}, }, { "empty line returns empty", ``, `level=(?P\w+)`, map[string]string{}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { parser, err := newRegexpParser(tt.pattern) require.NoError(t, err) result, err := parser.process(tt.line) require.NoError(t, err) require.Equal(t, tt.want, result) }) } } func TestRegexpParser_NoNamedCaptures(t *testing.T) { tests := []struct { name string pattern string }{ {"no capture groups", `level=\w+`}, {"only unnamed capture groups", `level=(\w+) msg=(\w+)`}, {"mixed but no named", `(level)=(\w+)`}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, err := newRegexpParser(tt.pattern) require.Error(t, err) require.Contains(t, err.Error(), "at least one named capture must be supplied") }) } } func TestRegexpParser_InvalidPattern(t *testing.T) { tests := []struct { name string pattern string }{ {"unclosed group", `(?P\w+`}, {"invalid escape", `(?P[\`}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { _, err := newRegexpParser(tt.pattern) require.Error(t, err) }) } } func BenchmarkRegexpParser_Process(b *testing.B) { testCases := []struct { name string line string pattern string }{ { "simple", `level=info msg=test user=admin`, `level=(?P\w+) msg=(?P\w+)`, }, { "complex_log_format", `127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`, `(?P[\d.]+) - (?P\S+) \[(?P[^\]]+)\] "(?P\w+) (?P[^"]+)" (?P\d+) (?P\d+)`, }, { "many_capture_groups", `a=1 b=2 c=3 d=4 e=5 f=6 g=7 h=8`, `a=(?P\d+) b=(?P\d+) c=(?P\d+) d=(?P\d+) e=(?P\d+) f=(?P\d+) g=(?P\d+) h=(?P\d+)`, }, } for _, tc := range testCases { b.Run(tc.name, func(b *testing.B) { parser, err := newRegexpParser(tc.pattern) if err != nil { b.Fatal(err) } for b.Loop() { _, err := parser.process(tc.line) if err != nil { b.Fatal(err) } } }) } }