feat(datasource/Tempo): Instrument Tempo query latency measurements (#101285)

* feat: Instrument Tempo query latency measurements

Add comprehensive latency tracking and reporting for Tempo
queries using reportInteraction:
- Add latency measurements for TraceQL metrics queries
- Add latency measurements for TraceID queries
- Add latency measurements for TraceQL search queries
- Track both streaming and non-streaming query performance
- Include success/error states and relevant metadata in reports
- Measure latency in milliseconds for more precise tracking

This instrumentation will help monitor query performance and
identify potential bottlenecks in trace queries.

Signed-off-by: Alex Bikfalvi <alex.bikfalvi@grafana.com>

* fixup! feat: Instrument Tempo query latency measurements

Signed-off-by: Alex Bikfalvi <alex.bikfalvi@grafana.com>

* prettier fix

---------

Signed-off-by: Alex Bikfalvi <alex.bikfalvi@grafana.com>
Co-authored-by: André Pereira <adrapereira@gmail.com>
pull/101940/head
Alex Bikfalvi 9 months ago committed by GitHub
parent bbab62ce39
commit 2712686a36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 216
      public/app/plugins/datasource/tempo/datasource.ts

@ -1,6 +1,6 @@
import { groupBy } from 'lodash'; import { groupBy } from 'lodash';
import { EMPTY, forkJoin, from, lastValueFrom, merge, Observable, of } from 'rxjs'; import { EMPTY, forkJoin, from, lastValueFrom, merge, Observable, of } from 'rxjs';
import { catchError, concatMap, map, mergeMap, toArray } from 'rxjs/operators'; import { catchError, concatMap, finalize, map, mergeMap, toArray } from 'rxjs/operators';
import semver from 'semver'; import semver from 'semver';
import { import {
@ -97,6 +97,16 @@ interface ServiceMapQueryResponseWithRates {
edges: DataFrame; edges: DataFrame;
} }
interface TempoQueryMetrics {
success: boolean;
streaming?: boolean;
latencyMs: number;
query?: string;
error?: string;
statusCode?: number;
statusText?: string;
}
export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJsonData> { export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJsonData> {
tracesToLogs?: TraceToLogsOptions; tracesToLogs?: TraceToLogsOptions;
serviceMap?: { serviceMap?: {
@ -363,8 +373,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
grafana_version: config.buildInfo.version, grafana_version: config.buildInfo.version,
hasQuery: queryValue !== '' ? true : false, hasQuery: queryValue !== '' ? true : false,
}); });
subQueries.push(this.handleTraceIdQuery(options, targets.traceql, queryValue));
subQueries.push(this.handleTraceIdQuery(options, targets.traceql));
} else { } else {
if (this.isTraceQlMetricsQuery(queryValue)) { if (this.isTraceQlMetricsQuery(queryValue)) {
reportInteraction('grafana_traces_traceql_metrics_queried', { reportInteraction('grafana_traces_traceql_metrics_queried', {
@ -377,7 +386,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
if (this.isStreamingMetricsEnabled()) { if (this.isStreamingMetricsEnabled()) {
subQueries.push(this.handleMetricsStreamingQuery(options, targets.traceql, queryValue)); subQueries.push(this.handleMetricsStreamingQuery(options, targets.traceql, queryValue));
} else { } else {
subQueries.push(this.handleTraceQlMetricsQuery(options, targets.traceql)); subQueries.push(this.handleTraceQlMetricsQuery(options, targets.traceql, queryValue));
} }
} else { } else {
reportInteraction('grafana_traces_traceql_queried', { reportInteraction('grafana_traces_traceql_queried', {
@ -424,6 +433,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
if (this.isStreamingSearchEnabled()) { if (this.isStreamingSearchEnabled()) {
subQueries.push(this.handleStreamingQuery(options, traceqlSearchTargets, queryFromFilters)); subQueries.push(this.handleStreamingQuery(options, traceqlSearchTargets, queryFromFilters));
} else { } else {
const startTime = performance.now();
subQueries.push( subQueries.push(
this._request('/api/search', { this._request('/api/search', {
q: queryFromFilters, q: queryFromFilters,
@ -433,6 +443,12 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
end: options.range.to.unix(), end: options.range.to.unix(),
}).pipe( }).pipe(
map((response) => { map((response) => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: true,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: queryFromFilters ?? '',
});
return { return {
data: formatTraceQLResponse( data: formatTraceQLResponse(
response.data.traces, response.data.traces,
@ -442,6 +458,15 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
}; };
}), }),
catchError((err) => { catchError((err) => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: queryFromFilters ?? '',
error: getErrorMessage(err.message),
statusCode: err.status,
statusText: err.statusText,
});
return of({ error: { message: getErrorMessage(err.data.message) }, data: [] }); return of({ error: { message: getErrorMessage(err.data.message) }, data: [] });
}) })
) )
@ -569,7 +594,11 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
* @param targets * @param targets
* @private * @private
*/ */
handleTraceIdQuery(options: DataQueryRequest<TempoQuery>, targets: TempoQuery[]): Observable<DataQueryResponse> { handleTraceIdQuery(
options: DataQueryRequest<TempoQuery>,
targets: TempoQuery[],
query: string
): Observable<DataQueryResponse> {
const validTargets = targets const validTargets = targets
.filter((t) => t.query) .filter((t) => t.query)
.map((t): TempoQuery => ({ ...t, query: t.query?.trim(), queryType: 'traceId' })); .map((t): TempoQuery => ({ ...t, query: t.query?.trim(), queryType: 'traceId' }));
@ -577,13 +606,41 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return EMPTY; return EMPTY;
} }
const startTime = performance.now();
const request = this.makeTraceIdRequest(options, validTargets); const request = this.makeTraceIdRequest(options, validTargets);
return super.query(request).pipe( return super.query(request).pipe(
map((response) => { map((response) => {
if (response.error) { if (response.error) {
reportTempoQueryMetrics('grafana_traces_traceID_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(response.error.message),
statusCode: response.error.status,
statusText: response.error.statusText,
});
return response; return response;
} }
reportTempoQueryMetrics('grafana_traces_traceID_response', options, {
success: true,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
});
return transformTrace(response, this.instanceSettings, this.nodeGraph?.enabled); return transformTrace(response, this.instanceSettings, this.nodeGraph?.enabled);
}),
catchError((error) => {
reportTempoQueryMetrics('grafana_traces_traceID_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(error.message),
statusCode: error.status,
statusText: error.statusText,
});
throw error;
}) })
); );
} }
@ -595,6 +652,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
}, },
queryValue: string queryValue: string
): Observable<DataQueryResponse> => { ): Observable<DataQueryResponse> => {
const startTime = performance.now();
if (this.isStreamingSearchEnabled()) { if (this.isStreamingSearchEnabled()) {
return this.handleStreamingQuery(options, targets.traceql, queryValue); return this.handleStreamingQuery(options, targets.traceql, queryValue);
} else { } else {
@ -606,11 +664,26 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
end: options.range.to.unix(), end: options.range.to.unix(),
}).pipe( }).pipe(
map((response) => { map((response) => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: true,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: queryValue ?? '',
});
return { return {
data: formatTraceQLResponse(response.data.traces, this.instanceSettings, targets.traceql[0].tableType), data: formatTraceQLResponse(response.data.traces, this.instanceSettings, targets.traceql[0].tableType),
}; };
}), }),
catchError((err) => { catchError((err) => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: queryValue ?? '',
error: getErrorMessage(err.message),
statusCode: err.status,
statusText: err.statusText,
});
return of({ error: { message: getErrorMessage(err.data.message) }, data: [] }); return of({ error: { message: getErrorMessage(err.data.message) }, data: [] });
}) })
); );
@ -619,7 +692,8 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
handleTraceQlMetricsQuery( handleTraceQlMetricsQuery(
options: DataQueryRequest<TempoQuery>, options: DataQueryRequest<TempoQuery>,
targets: TempoQuery[] targets: TempoQuery[],
query: string
): Observable<DataQueryResponse> { ): Observable<DataQueryResponse> {
const validTargets = targets const validTargets = targets
.filter((t) => t.query) .filter((t) => t.query)
@ -630,12 +704,28 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return EMPTY; return EMPTY;
} }
const startTime = performance.now();
const request = { ...options, targets: validTargets }; const request = { ...options, targets: validTargets };
return super.query(request).pipe( return super.query(request).pipe(
map((response) => { map((response) => {
reportTempoQueryMetrics('grafana_traces_traceql_metrics_response', options, {
success: true,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
});
return enhanceTraceQlMetricsResponse(response, this.instanceSettings); return enhanceTraceQlMetricsResponse(response, this.instanceSettings);
}), }),
catchError((err) => { catchError((err) => {
reportTempoQueryMetrics('grafana_traces_traceql_metrics_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(err.data.message),
statusCode: err.status,
statusText: err.statusText,
});
return of({ error: { message: getErrorMessage(err.data.message) }, data: [] }); return of({ error: { message: getErrorMessage(err.data.message) }, data: [] });
}) })
); );
@ -659,6 +749,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
}); });
} }
const startTime = performance.now();
const groupBy = target.groupBy ? this.formatGroupBy(target.groupBy) : ''; const groupBy = target.groupBy ? this.formatGroupBy(target.groupBy) : '';
return this._request('/api/metrics/summary', { return this._request('/api/metrics/summary', {
q: query, q: query,
@ -668,6 +759,13 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
}).pipe( }).pipe(
map((response) => { map((response) => {
if (!response.data.summaries) { if (!response.data.summaries) {
reportTempoQueryMetrics('grafana_traces_metrics_summary_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(`No summary data for '${groupBy}'.`),
});
return { return {
error: { error: {
message: getErrorMessage(`No summary data for '${groupBy}'.`), message: getErrorMessage(`No summary data for '${groupBy}'.`),
@ -678,6 +776,13 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
// Check if any of the results have series data as older versions of Tempo placed the series data in a different structure // Check if any of the results have series data as older versions of Tempo placed the series data in a different structure
const hasSeries = response.data.summaries.some((summary: MetricsSummary) => summary.series.length > 0); const hasSeries = response.data.summaries.some((summary: MetricsSummary) => summary.series.length > 0);
if (!hasSeries) { if (!hasSeries) {
reportTempoQueryMetrics('grafana_traces_metrics_summary_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(`No series data. Ensure you are using an up to date version of Tempo`),
});
return { return {
error: { error: {
message: getErrorMessage(`No series data. Ensure you are using an up to date version of Tempo`), message: getErrorMessage(`No series data. Ensure you are using an up to date version of Tempo`),
@ -685,11 +790,26 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
data: emptyResponse, data: emptyResponse,
}; };
} }
reportTempoQueryMetrics('grafana_traces_metrics_summary_response', options, {
success: true,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
});
return { return {
data: createTableFrameFromMetricsSummaryQuery(response.data.summaries, query, this.instanceSettings), data: createTableFrameFromMetricsSummaryQuery(response.data.summaries, query, this.instanceSettings),
}; };
}), }),
catchError((error) => { catchError((error) => {
reportTempoQueryMetrics('grafana_traces_metrics_summary_response', options, {
success: false,
streaming: false,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(error.data.message),
statusCode: error.status,
statusText: error.statusText,
});
return of({ return of({
error: { message: getErrorMessage(error.data.message) }, error: { message: getErrorMessage(error.data.message) },
data: emptyResponse, data: emptyResponse,
@ -709,6 +829,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return EMPTY; return EMPTY;
} }
const startTime = performance.now();
return merge( return merge(
...targets.map((target) => ...targets.map((target) =>
doTempoSearchStreaming( doTempoSearchStreaming(
@ -718,6 +839,28 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
this.instanceSettings this.instanceSettings
) )
) )
).pipe(
catchError((error) => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: false,
streaming: true,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(error.data.message),
statusCode: error.status,
statusText: error.statusText,
});
// Re-throw the error to maintain the error chain
throw error;
}),
finalize(() => {
reportTempoQueryMetrics('grafana_traces_traceql_response', options, {
success: true,
streaming: true,
query: query ?? '',
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
});
})
); );
} }
@ -732,6 +875,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
return EMPTY; return EMPTY;
} }
const startTime = performance.now();
return merge( return merge(
...targets.map((target) => ...targets.map((target) =>
doTempoMetricsStreaming( doTempoMetricsStreaming(
@ -740,6 +884,28 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
options options
) )
) )
).pipe(
catchError((error) => {
reportTempoQueryMetrics('grafana_traces_traceql_metrics_response', options, {
success: false,
streaming: true,
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
query: query ?? '',
error: getErrorMessage(error.data.message),
statusCode: error.status,
statusText: error.statusText,
});
// Re-throw the error to maintain the error chain
throw error;
}),
finalize(() => {
reportTempoQueryMetrics('grafana_traces_traceql_metrics_response', options, {
success: true,
streaming: true,
query: query ?? '',
latencyMs: Math.round(performance.now() - startTime), // rounded to nearest millisecond
});
})
); );
} }
@ -1442,6 +1608,44 @@ function getServiceGraphViewDataFrames(
return df; return df;
} }
/**
* Reports metrics for Tempo query interactions.
*
* @param options - The data query request options containing app and other context
* @param metrics - Object containing metrics to report:
* - success: Whether the query was successful
* - streaming: (optional) Whether streaming was used
* - latencyMs: Query execution time in milliseconds
* - query: (optional) The query string that was executed
* - error: (optional) Error message if query failed
* - statusCode: (optional) HTTP status code if query failed
* - statusText: (optional) HTTP status text if query failed
* @param interactionName - (optional) Name of the interaction to report.
* Defaults to 'grafana_traces_traceql_response'
*
* @example
* ```typescript
* reportTempoQueryMetrics(options, {
* success: true,
* streaming: true,
* latencyMs: Math.round(performance.now() - startTime),
* query: 'my query'
* });
* ```
*/
function reportTempoQueryMetrics(
interactionName: string,
options: DataQueryRequest<TempoQuery>,
metrics: TempoQueryMetrics
) {
reportInteraction(interactionName, {
datasourceType: 'tempo',
app: options.app ?? '',
grafana_version: config.buildInfo.version,
...metrics,
});
}
export function buildExpr( export function buildExpr(
metric: { expr: string; params: string[]; topk?: number }, metric: { expr: string; params: string[]; topk?: number },
extraParams: string, extraParams: string,

Loading…
Cancel
Save