package queryrange

import (
	"context"
	"fmt"
	"net/http"
	"time"

	"github.com/go-kit/log"
	"github.com/go-kit/log/level"
	"github.com/gogo/protobuf/proto"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promauto"
	"github.com/prometheus/common/model"
	"github.com/weaveworks/common/httpgrpc"
	"golang.org/x/sync/errgroup"

	"github.com/grafana/dskit/tenant"

	"github.com/grafana/loki/pkg/loghttp"
	"github.com/grafana/loki/pkg/logproto"
	"github.com/grafana/loki/pkg/logqlmodel/stats"
	"github.com/grafana/loki/pkg/querier/queryrange/queryrangebase"
	"github.com/grafana/loki/pkg/storage/chunk/cache"
	"github.com/grafana/loki/pkg/util/validation"
)

// LogResultCacheMetrics is the metrics wrapper used in log result cache.
type LogResultCacheMetrics struct {
	CacheHit  prometheus.Counter
	CacheMiss prometheus.Counter
}

// NewLogResultCacheMetrics creates metrics to be used in log result cache.
func NewLogResultCacheMetrics(registerer prometheus.Registerer) *LogResultCacheMetrics {
	return &LogResultCacheMetrics{
		CacheHit: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
			Namespace: "loki",
			Name:      "query_frontend_log_result_cache_hit_total",
		}),
		CacheMiss: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
			Namespace: "loki",
			Name:      "query_frontend_log_result_cache_miss_total",
		}),
	}
}

// NewLogResultCache creates a new log result cache middleware.
// Currently it only caches empty filter queries, this is because those are usually easily and freely cacheable.
// Log hits are difficult to handle because of the limit query parameter and the size of the response.
// In the future it could be extended to cache non-empty query results.
// see https://docs.google.com/document/d/1_mACOpxdWZ5K0cIedaja5gzMbv-m0lUVazqZd2O4mEU/edit
func NewLogResultCache(logger log.Logger, limits Limits, cache cache.Cache, shouldCache queryrangebase.ShouldCacheFn, metrics *LogResultCacheMetrics) queryrangebase.Middleware {
	if metrics == nil {
		metrics = NewLogResultCacheMetrics(nil)
	}
	return queryrangebase.MiddlewareFunc(func(next queryrangebase.Handler) queryrangebase.Handler {
		return &logResultCache{
			next:        next,
			limits:      limits,
			cache:       cache,
			logger:      logger,
			shouldCache: shouldCache,
			metrics:     metrics,
		}
	})
}

type logResultCache struct {
	next        queryrangebase.Handler
	limits      Limits
	cache       cache.Cache
	shouldCache queryrangebase.ShouldCacheFn

	metrics *LogResultCacheMetrics
	logger  log.Logger
}

func (l *logResultCache) Do(ctx context.Context, req queryrangebase.Request) (queryrangebase.Response, error) {
	tenantIDs, err := tenant.TenantIDs(ctx)
	if err != nil {
		return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error())
	}

	if l.shouldCache != nil && !l.shouldCache(req) {
		return l.next.Do(ctx, req)
	}

	maxCacheFreshness := validation.MaxDurationPerTenant(tenantIDs, l.limits.MaxCacheFreshness)
	maxCacheTime := int64(model.Now().Add(-maxCacheFreshness))
	if req.GetEnd() > maxCacheTime {
		return l.next.Do(ctx, req)
	}

	lokiReq, ok := req.(*LokiRequest)
	if !ok {
		return nil, httpgrpc.Errorf(http.StatusInternalServerError, "invalid request type %T", req)
	}

	interval := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, l.limits.QuerySplitDuration)
	// skip caching by if interval is unset
	if interval == 0 {
		return l.next.Do(ctx, req)
	}
	// The first subquery might not be aligned.
	alignedStart := time.Unix(0, lokiReq.GetStartTs().UnixNano()-(lokiReq.GetStartTs().UnixNano()%interval.Nanoseconds()))
	// generate the cache key based on query, tenant and start time.
	cacheKey := fmt.Sprintf("log:%s:%s:%d:%d", tenant.JoinTenantIDs(tenantIDs), req.GetQuery(), interval.Nanoseconds(), alignedStart.UnixNano()/(interval.Nanoseconds()))

	_, buff, _, err := l.cache.Fetch(ctx, []string{cache.HashKey(cacheKey)})
	if err != nil {
		level.Warn(l.logger).Log("msg", "error fetching cache", "err", err, "cacheKey", cacheKey)
		return l.next.Do(ctx, req)
	}
	// we expect only one key to be found or missing.
	if len(buff) > 1 {
		level.Warn(l.logger).Log("msg", "unexpected length of cache return values", "buff", len(buff))
		return l.next.Do(ctx, req)
	}

	if len(buff) == 0 {
		// cache miss
		return l.handleMiss(ctx, cacheKey, lokiReq)
	}

	// cache hit
	var cachedRequest LokiRequest
	err = proto.Unmarshal(buff[0], &cachedRequest)
	if err != nil {
		level.Warn(l.logger).Log("msg", "error unmarshalling request from cache", "err", err)
		return l.next.Do(ctx, req)
	}
	return l.handleHit(ctx, cacheKey, &cachedRequest, lokiReq)
}

func (l *logResultCache) handleMiss(ctx context.Context, cacheKey string, req *LokiRequest) (queryrangebase.Response, error) {
	l.metrics.CacheMiss.Inc()
	level.Debug(l.logger).Log("msg", "cache miss", "key", cacheKey)
	resp, err := l.next.Do(ctx, req)
	if err != nil {
		return nil, err
	}
	lokiRes, ok := resp.(*LokiResponse)
	if !ok {
		return nil, fmt.Errorf("unexpected response type %T", resp)
	}
	// At the moment we only cache empty results
	if !isEmpty(lokiRes) {
		return resp, nil
	}
	data, err := proto.Marshal(req)
	if err != nil {
		level.Warn(l.logger).Log("msg", "error marshalling request", "err", err)
		return resp, nil
	}
	// cache the result
	err = l.cache.Store(ctx, []string{cache.HashKey(cacheKey)}, [][]byte{data})
	if err != nil {
		level.Warn(l.logger).Log("msg", "error storing cache", "err", err)
	}
	return resp, nil
}

func (l *logResultCache) handleHit(ctx context.Context, cacheKey string, cachedRequest *LokiRequest, lokiReq *LokiRequest) (queryrangebase.Response, error) {
	l.metrics.CacheHit.Inc()
	// we start with an empty response
	result := emptyResponse(cachedRequest)
	// if the request is the same and cover the whole time range,
	// we can just return the cached result.
	if !lokiReq.GetStartTs().After(cachedRequest.GetStartTs()) && lokiReq.GetStartTs().Equal(cachedRequest.GetStartTs()) &&
		!lokiReq.GetEndTs().Before(cachedRequest.GetEndTs()) && lokiReq.GetEndTs().Equal(cachedRequest.GetEndTs()) {
		return result, nil
	}
	// we could be missing data at the start and the end.
	// so we're going to fetch what is missing.
	var (
		startRequest, endRequest *LokiRequest
		startResp, endResp       *LokiResponse
		updateCache              bool
		ok                       bool
	)
	g, ctx := errgroup.WithContext(ctx)

	// if we're missing data at the start, start fetching from the start to the cached start.
	if lokiReq.GetStartTs().Before(cachedRequest.GetStartTs()) {
		g.Go(func() error {
			startRequest = lokiReq.WithStartEndTime(lokiReq.GetStartTs(), cachedRequest.GetStartTs())
			resp, err := l.next.Do(ctx, startRequest)
			if err != nil {
				return err
			}
			startResp, ok = resp.(*LokiResponse)
			if !ok {
				return fmt.Errorf("unexpected response type %T", resp)
			}
			return nil
		})
	}

	// if we're missing data at the end, start fetching from the cached end to the end.
	if lokiReq.GetEndTs().After(cachedRequest.GetEndTs()) {
		g.Go(func() error {
			endRequest = lokiReq.WithStartEndTime(cachedRequest.GetEndTs(), lokiReq.GetEndTs())
			resp, err := l.next.Do(ctx, endRequest)
			if err != nil {
				return err
			}
			endResp, ok = resp.(*LokiResponse)
			if !ok {
				return fmt.Errorf("unexpected response type %T", resp)
			}
			return nil
		})
	}

	if err := g.Wait(); err != nil {
		return nil, err
	}

	// if we have data at the start, we need to merge it with the cached data if it's empty and update the cache.
	// If it's not empty only merge the response.
	if startResp != nil {
		if isEmpty(startResp) {
			cachedRequest = cachedRequest.WithStartEndTime(startRequest.GetStartTs(), cachedRequest.GetEndTs())
			updateCache = true
		} else {
			if startResp.Status != loghttp.QueryStatusSuccess {
				return startResp, nil
			}
			result = mergeLokiResponse(startResp, result)
		}
	}

	// if we have data at the end, we need to merge it with the cached data if it's empty and update the cache.
	// If it's not empty only merge the response.
	if endResp != nil {
		if isEmpty(endResp) {
			cachedRequest = cachedRequest.WithStartEndTime(cachedRequest.GetStartTs(), endRequest.GetEndTs())
			updateCache = true
		} else {
			if endResp.Status != loghttp.QueryStatusSuccess {
				return endResp, nil
			}
			result = mergeLokiResponse(endResp, result)
		}
	}

	// we need to update the cache since we fetched more either at the end or the start and it was empty.
	if updateCache {
		data, err := proto.Marshal(cachedRequest)
		if err != nil {
			level.Warn(l.logger).Log("msg", "error marshalling request", "err", err)
			return result, err
		}
		// cache the result
		err = l.cache.Store(ctx, []string{cache.HashKey(cacheKey)}, [][]byte{data})
		if err != nil {
			level.Warn(l.logger).Log("msg", "error storing cache", "err", err)
		}
	}
	return result, nil
}

func isEmpty(lokiRes *LokiResponse) bool {
	return lokiRes.Status == loghttp.QueryStatusSuccess && len(lokiRes.Data.Result) == 0
}

func emptyResponse(lokiReq *LokiRequest) *LokiResponse {
	return &LokiResponse{
		Status:     loghttp.QueryStatusSuccess,
		Statistics: stats.Result{},
		Direction:  lokiReq.Direction,
		Limit:      lokiReq.Limit,
		Version:    uint32(loghttp.GetVersion(lokiReq.Path)),
		Data: LokiData{
			ResultType: loghttp.ResultTypeStream,
			Result:     []logproto.Stream{},
		},
	}
}