Logging: Add HTTP API to change the log level at runtime (#9357)

**What this PR does / why we need it**:
To be able to change the log level at runtime, particularly from info to
debug to enable debugging information during an incident without
restarting Loki.

**Which issue(s) this PR fixes**:
Fixes #6805

**Special notes for your reviewer**:

**Checklist**
- [x] Reviewed the
[`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md)
guide (**required**)
- [x] Documentation added
- [x] Tests updated
- [x] `CHANGELOG.md` updated
- [x] Changes that require user attention or interaction to upgrade are
documented in `docs/sources/upgrading/_index.md`: No such changes

---------

Co-authored-by: J Stickler <julie.stickler@grafana.com>
pull/9502/head
indX 3 years ago committed by GitHub
parent b4d0a2cd68
commit a55404766c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 16
      docs/sources/reference/api.md
  3. 2
      pkg/loki/loki.go
  4. 67
      pkg/util/log/log.go
  5. 64
      pkg/util/log/log_test.go

@ -28,6 +28,7 @@
* [8732](https://github.com/grafana/loki/pull/8732) **abaguas**: azure: respect retry config before cancelling the context
* [9206](https://github.com/grafana/loki/pull/9206) **dannykopping**: Ruler: log rule evaluation detail.
* [9184](https://github.com/grafana/loki/pull/9184) **periklis**: Bump dskit to introduce IPv6 support for memberlist
* [9357](https://github.com/grafana/loki/pull/9357) **Indransh**: Add HTTP API to change the log level at runtime
* [9431](https://github.com/grafana/loki/pull/9431) **dannykopping**: Add more buckets to `loki_memcache_request_duration_seconds` metric; latencies can increase if using memcached with NVMe
##### Fixes

@ -22,6 +22,7 @@ component is different.
These endpoints are exposed by all components:
- [`GET /ready`](#identify-ready-loki-instance)
- [`GET /log_level`](#change-log-level-at-runtime)
- [`GET /metrics`](#return-exposed-prometheus-metrics)
- [`GET /config`](#list-current-configuration)
- [`GET /services`](#list-running-services)
@ -612,6 +613,21 @@ running Loki on Kubernetes, `/ready` can be used as a readiness probe.
In microservices mode, the `/ready` endpoint is exposed by all components.
## Change log level at runtime
```
GET /log_level
POST /log_level
```
`/log_level` a `GET` returns the current log level and a `POST` lets you change the log level of a Loki process at runtime. This can be useful for accessing debugging information during an incident. Caution should be used when running at the `debug` log level, as this produces a large volume of data.
Params:
- `log_level`: A valid log level that can be passed as a URL param (`?log_level=<level>`) or as a form value in case of `POST`. Valid levels: [debug, info, warn, error]
In microservices mode, the `/log_level` endpoint is exposed by all components.
## Flush in-memory chunks to backing store
```

@ -484,6 +484,8 @@ func (t *Loki) Run(opts RunOpts) error {
}
t.Server.HTTP.Path("/ready").Methods("GET").Handler(t.readyHandler(sm, shutdownRequested))
t.Server.HTTP.Path("/log_level").Methods("GET", "POST").Handler(util_log.LevelHandler(&t.Cfg.Server.LogLevel))
grpc_health_v1.RegisterHealthServer(t.Server.GRPC, grpcutil.NewHealthCheck(sm))
// Config endpoint adds a way to see the config and the changes compared to the defaults.

@ -1,9 +1,11 @@
package log
import (
"encoding/json"
"fmt"
"io"
"math"
"net/http"
"os"
"time"
@ -22,6 +24,8 @@ var (
Logger = log.NewNopLogger()
bufferedLogger *LineBufferedLogger
plogger *prometheusLogger
)
// InitLogger initialises the global gokit logger (util_log.Logger) and overrides the
@ -53,6 +57,7 @@ func Flush() error {
// prometheusLogger exposes Prometheus counters for each of go-kit's log levels.
type prometheusLogger struct {
baseLogger log.Logger
logger log.Logger
logMessages *prometheus.CounterVec
internalLogMessages *prometheus.CounterVec
@ -62,6 +67,54 @@ type prometheusLogger struct {
useSyncLogger bool
}
// LevelHandler returns an http handler function that returns the current log level.
// The optional query parameter 'log_level' can be passed to change the log level at runtime.
func LevelHandler(currentLogLevel *logging.Level) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
type logResponse struct {
Status string `json:"status,omitempty"`
Message string `json:"message"`
}
var resp logResponse
status := http.StatusOK
w.Header().Set("Content-Type", "application/json; charset=utf-8")
switch r.Method {
case "GET":
resp = logResponse{
Message: fmt.Sprintf("Current log level is %s", currentLogLevel.String()),
}
case "POST":
logLevel := r.FormValue("log_level")
// Update log level in config
err := currentLogLevel.Set(logLevel)
if err != nil {
status = http.StatusBadRequest
resp = logResponse{
Message: fmt.Sprintf("%v", err),
Status: "failed",
}
} else {
plogger.Set(levelFilter(logLevel))
msg := fmt.Sprintf("Log level set to %s", logLevel)
level.Info(Logger).Log("msg", msg)
resp = logResponse{
Status: "success",
Message: msg,
}
}
}
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(resp); err != nil {
level.Error(Logger).Log("msg", err)
}
}
}
// newPrometheusLogger creates a new instance of PrometheusLogger which exposes
// Prometheus counters for various log levels.
func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.Registerer, buffered bool, sync bool) log.Logger {
@ -111,13 +164,14 @@ func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.
writer = log.NewSyncWriter(writer)
}
logger := log.NewLogfmtLogger(writer)
baseLogger := log.NewLogfmtLogger(writer)
if format.String() == "json" {
logger = log.NewJSONLogger(writer)
baseLogger = log.NewJSONLogger(writer)
}
logger = level.NewFilter(logger, levelFilter(l.String()))
logger := level.NewFilter(baseLogger, levelFilter(l.String()))
plogger := &prometheusLogger{
plogger = &prometheusLogger{
baseLogger: baseLogger,
logger: logger,
logMessages: logMessages,
internalLogMessages: internalLogMessages,
@ -139,6 +193,11 @@ func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus.
return log.With(plogger, "ts", log.DefaultTimestampUTC)
}
// Set overrides the log level of the logger.
func (pl *prometheusLogger) Set(option level.Option) {
pl.logger = level.NewFilter(pl.baseLogger, option)
}
// Log increments the appropriate Prometheus counter depending on the log level.
func (pl *prometheusLogger) Log(kv ...interface{}) error {
pl.logger.Log(kv...)

@ -0,0 +1,64 @@
package log
import (
"io"
"net/http"
"net/url"
"strings"
"testing"
"github.com/go-kit/log"
"github.com/stretchr/testify/assert"
"github.com/weaveworks/common/logging"
)
func TestLevelHandler(t *testing.T) {
var lvl logging.Level
err := lvl.Set("info")
assert.NoError(t, err)
plogger = &prometheusLogger{
baseLogger: log.NewLogfmtLogger(io.Discard),
}
// Start test http server
go func() {
err := http.ListenAndServe(":8080", LevelHandler(&lvl))
assert.NoError(t, err)
}()
testCases := []struct {
testName string
targetLogLevel string
expectedResponse string
expectedLogLevel string
expectedStatusCode int
}{
{"GetLogLevel", "", `{"message":"Current log level is info"}`, "info", 200},
{"PostLogLevelInvalid", "invalid", `{"message":"unrecognized log level \"invalid\"", "status":"failed"}`, "info", 400},
{"PostLogLevelEmpty", "", `{"message":"unrecognized log level \"\"", "status":"failed"}`, "info", 400},
{"PostLogLevelDebug", "debug", `{"status": "success", "message":"Log level set to debug"}`, "debug", 200},
}
for _, testCase := range testCases {
t.Run(testCase.testName, func(t *testing.T) {
var (
resp *http.Response
err error
)
if strings.HasPrefix(testCase.testName, "Get") {
resp, err = http.Get("http://localhost:8080/")
} else if strings.HasPrefix(testCase.testName, "Post") {
resp, err = http.PostForm("http://localhost:8080/", url.Values{"log_level": {testCase.targetLogLevel}})
}
assert.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
assert.JSONEq(t, testCase.expectedResponse, string(body))
assert.Equal(t, testCase.expectedStatusCode, resp.StatusCode)
assert.Equal(t, testCase.expectedLogLevel, lvl.String())
})
}
}
Loading…
Cancel
Save