From 6cb2c701e633e1ca996548b0660379671180edba Mon Sep 17 00:00:00 2001 From: Brendan O'Handley Date: Tue, 17 Jun 2025 22:40:40 +0200 Subject: [PATCH] Tempo: Enable native histograms for Tempo service graph (#105989) * add native histogram dropdown to tempo config for service graph * add docs for configuring native histograms * add config native histogram options to types * add native histogram metric to graph transform * add native histogram to service map query for links * add native histogram duration metric for duration queries * use native histogram for duration queries * export for tests * add tests for native histogram links, queries and dataframes * update tempo devenv to use native histograms * use union for histogramType * run prettier * remove comment --- devenv/datasources.yaml | 3 + .../docker/blocks/tempo/docker-compose.yaml | 1 + devenv/docker/blocks/tempo/tempo.yaml | 1 + .../configuration/ServiceGraphSettings.tsx | 71 ++++++++++++- .../datasource/tempo/datasource.test.ts | 100 +++++++++++++++++- .../plugins/datasource/tempo/datasource.ts | 99 ++++++++++++----- .../datasource/tempo/graphTransform.ts | 5 + public/app/plugins/datasource/tempo/types.ts | 1 + 8 files changed, 251 insertions(+), 30 deletions(-) diff --git a/devenv/datasources.yaml b/devenv/datasources.yaml index 24bb67f8cc7..8eaad77a2fb 100644 --- a/devenv/datasources.yaml +++ b/devenv/datasources.yaml @@ -341,6 +341,9 @@ datasources: queries: - name: 'Metrics' query: 'sum(rate({$$__tags}[5m]))' + serviceMap: + datasourceUid: 'gdev-prometheus' + histogramType: 'both' # 'classic' or 'native' or 'both' - name: gdev-pyroscope type: grafana-pyroscope-datasource diff --git a/devenv/docker/blocks/tempo/docker-compose.yaml b/devenv/docker/blocks/tempo/docker-compose.yaml index ad2539924af..bdac352a523 100644 --- a/devenv/docker/blocks/tempo/docker-compose.yaml +++ b/devenv/docker/blocks/tempo/docker-compose.yaml @@ -90,6 +90,7 @@ - --config.file=/etc/prometheus.yaml - --web.enable-remote-write-receiver - --enable-feature=exemplar-storage + - --enable-feature=native-histograms volumes: - ./docker/blocks/tempo/prometheus.yaml:/etc/prometheus.yaml links: diff --git a/devenv/docker/blocks/tempo/tempo.yaml b/devenv/docker/blocks/tempo/tempo.yaml index b1130a6e27c..1fb175d54be 100644 --- a/devenv/docker/blocks/tempo/tempo.yaml +++ b/devenv/docker/blocks/tempo/tempo.yaml @@ -60,6 +60,7 @@ storage: overrides: defaults: metrics_generator: + generate_native_histograms: both # 'classic' or 'native' or 'both' processors: [local-blocks, service-graphs, span-metrics] stream_over_http_enabled: true diff --git a/public/app/plugins/datasource/tempo/configuration/ServiceGraphSettings.tsx b/public/app/plugins/datasource/tempo/configuration/ServiceGraphSettings.tsx index 31016057bd4..24bbb101458 100644 --- a/public/app/plugins/datasource/tempo/configuration/ServiceGraphSettings.tsx +++ b/public/app/plugins/datasource/tempo/configuration/ServiceGraphSettings.tsx @@ -4,7 +4,7 @@ import { updateDatasourcePluginJsonDataOption, } from '@grafana/data'; import { DataSourcePicker } from '@grafana/runtime'; -import { Button, InlineField, InlineFieldRow, useStyles2 } from '@grafana/ui'; +import { Button, InlineField, InlineFieldRow, useStyles2, Combobox } from '@grafana/ui'; import { TempoJsonData } from '../types'; @@ -15,6 +15,59 @@ interface Props extends DataSourcePluginOptionsEditorProps {} export function ServiceGraphSettings({ options, onOptionsChange }: Props) { const styles = useStyles2(getStyles); + const histogramOptions = [ + { label: 'Classic', value: 'classic' }, + { label: 'Native', value: 'native' }, + { label: 'Both', value: 'both' }, + ]; + + const nativeHistogramDocs = ( + <> + Select which type of histograms are configured in the {metricsGeneratorDocsLink()}. If native histograms are + configured, you must also configure native histograms ingestion in {prometheusNativeHistogramsDocsLink()} or{' '} + {mimirNativeHistogramsDocsLink()}. + + ); + + function metricsGeneratorDocsLink() { + return ( + + Tempo metrics generator + + ); + } + + function prometheusNativeHistogramsDocsLink() { + return ( + + Prometheus + + ); + } + + function mimirNativeHistogramsDocsLink() { + return ( + + Mimir + + ); + } + return (
@@ -52,6 +105,22 @@ export function ServiceGraphSettings({ options, onOptionsChange }: Props) { ) : null} + + + + updateDatasourcePluginJsonDataOption({ onOptionsChange, options }, 'serviceMap', { + ...options.jsonData.serviceMap, + histogramType: value.value, + }) + } + /> + +
); } diff --git a/public/app/plugins/datasource/tempo/datasource.test.ts b/public/app/plugins/datasource/tempo/datasource.test.ts index d2eb0ee69ca..532e5835153 100644 --- a/public/app/plugins/datasource/tempo/datasource.test.ts +++ b/public/app/plugins/datasource/tempo/datasource.test.ts @@ -42,6 +42,8 @@ import { makeTempoLink, getFieldConfig, getEscapedSpanNames, + makeHistogramLink, + makePromServiceMapRequest, } from './datasource'; import mockJson from './test/mockJsonResponse.json'; import mockServiceGraph from './test/mockServiceGraph.json'; @@ -812,7 +814,7 @@ describe('Tempo service graph view', () => { }, { url: '', - title: 'Request histogram', + title: 'Request classic histogram', internal: { query: { expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))', @@ -909,7 +911,7 @@ describe('Tempo service graph view', () => { }, { url: '', - title: 'Request histogram', + title: 'Request classic histogram', internal: { query: { expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.sourceName}",client_service_namespace="${__data.fields.sourceNamespace}",server="${__data.fields.targetName}",server_service_namespace="${__data.fields.targetNamespace}"}[$__rate_interval])) by (le, client, server, server_service_namespace, client_service_namespace))', @@ -1247,6 +1249,98 @@ describe('should provide functionality for ad-hoc filters', () => { }); }); +describe('histogram type functionality', () => { + it('should create correct histogram links for classic histogram type', () => { + const datasourceUid = 'prom'; + const source = 'client="${__data.fields.source}",'; + const target = 'server="${__data.fields.target}"'; + const serverSumBy = 'server'; + + const links = makeHistogramLink(datasourceUid, source, target, serverSumBy); + expect(links).toHaveLength(1); + expect(links[0].title).toBe('Request classic histogram'); + expect(links[0].internal.query.expr).toBe( + 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))' + ); + }); + + it('should create correct histogram links for native histogram type', () => { + const datasourceUid = 'prom'; + const source = 'client="${__data.fields.source}",'; + const target = 'server="${__data.fields.target}"'; + const serverSumBy = 'server'; + + const links = makeHistogramLink(datasourceUid, source, target, serverSumBy, 'native'); + expect(links).toHaveLength(1); + expect(links[0].title).toBe('Request native histogram'); + expect(links[0].internal.query.expr).toBe( + 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))' + ); + }); + + it('should create correct histogram links for both histogram types', () => { + const datasourceUid = 'prom'; + const source = 'client="${__data.fields.source}",'; + const target = 'server="${__data.fields.target}"'; + const serverSumBy = 'server'; + + const links = makeHistogramLink(datasourceUid, source, target, serverSumBy, 'both'); + expect(links).toHaveLength(2); + expect(links[0].title).toBe('Request classic histogram'); + expect(links[1].title).toBe('Request native histogram'); + expect(links[0].internal.query.expr).toBe( + 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))' + ); + expect(links[1].internal.query.expr).toBe( + 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))' + ); + }); + + it('should include histogram type in field config', () => { + const datasourceUid = 'prom'; + const tempoDatasourceUid = 'tempo'; + const targetField = '__data.fields.target'; + const tempoField = '__data.fields.target'; + const sourceField = '__data.fields.source'; + + const fieldConfig = getFieldConfig( + datasourceUid, + tempoDatasourceUid, + targetField, + tempoField, + sourceField, + undefined, + 'native' + ); + const histogramLink = fieldConfig.links.find((link) => link.title === 'Request native histogram'); + expect(histogramLink).toBeDefined(); + expect(histogramLink?.internal?.query).toBeDefined(); + if (histogramLink?.internal?.query && 'expr' in histogramLink.internal.query) { + expect(histogramLink.internal.query.expr).toBe( + 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))' + ); + } + }); + + it('should handle histogram type in service map query', () => { + const request = makePromServiceMapRequest( + { + targets: [{ serviceMapQuery: '{service="test"}' }], + range: getDefaultTimeRange(), + } as DataQueryRequest, + 'native' + ); + + const bucketMetric = request.targets.find((t: PromQuery) => t.expr.includes('_bucket')); + expect(bucketMetric).toBeUndefined(); + + const nativeMetric = request.targets.find((t: PromQuery) => + t.expr.includes('traces_service_graph_request_server_seconds') + ); + expect(nativeMetric).toBeDefined(); + }); +}); + const prometheusMock = (): DataSourceApi => { return { query: jest.fn(() => @@ -1456,7 +1550,7 @@ const serviceGraphLinks = [ }, { url: '', - title: 'Request histogram', + title: 'Request classic histogram', internal: { query: { expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{server="${__data.fields.id}"}[$__rate_interval])) by (le, client, server))', diff --git a/public/app/plugins/datasource/tempo/datasource.ts b/public/app/plugins/datasource/tempo/datasource.ts index 99450e64265..968b59cd26b 100644 --- a/public/app/plugins/datasource/tempo/datasource.ts +++ b/public/app/plugins/datasource/tempo/datasource.ts @@ -47,10 +47,12 @@ import { errorRateMetric, failedMetric, histogramMetric, + nativeHistogramMetric, mapPromMetricsToServiceMap, rateMetric, serviceMapMetrics, totalsMetric, + nativeHistogramDurationMetric, } from './graphTransform'; import TempoLanguageProvider from './language_provider'; import { @@ -110,6 +112,7 @@ export class TempoDatasource extends DataSourceWithBackend - rateQuery(options, result, dsId).pipe( - concatMap((result) => errorAndDurationQuery(options, result, dsId, tempoDsUid)) + rateQuery(options, result, datasourceUid).pipe( + concatMap((result) => errorAndDurationQuery(options, result, datasourceUid, tempoDsUid, histogramType)) ) ) ) @@ -943,9 +946,10 @@ function queryPrometheus(request: DataQueryRequest, datasourceUid: st function serviceMapQuery( request: DataQueryRequest, datasourceUid: string, - tempoDatasourceUid: string + tempoDatasourceUid: string, + histogramType?: string ): Observable { - const serviceMapRequest = makePromServiceMapRequest(request); + const serviceMapRequest = makePromServiceMapRequest(request, histogramType); return queryPrometheus(serviceMapRequest, datasourceUid).pipe( // Just collect all the responses first before processing into node graph data @@ -982,7 +986,8 @@ function serviceMapQuery( '__data.fields.title', // targetField '__data.fields[0]', // tempoField undefined, // sourceField - { targetNamespace: '__data.fields.subtitle' } + { targetNamespace: '__data.fields.subtitle' }, + histogramType ); edges.fields[0].config = getFieldConfig( @@ -991,21 +996,27 @@ function serviceMapQuery( '__data.fields.targetName', // targetField '__data.fields.target', // tempoField '__data.fields.sourceName', // sourceField - { targetNamespace: '__data.fields.targetNamespace', sourceNamespace: '__data.fields.sourceNamespace' } + { targetNamespace: '__data.fields.targetNamespace', sourceNamespace: '__data.fields.sourceNamespace' }, + histogramType ); } else { nodes.fields[0].config = getFieldConfig( datasourceUid, tempoDatasourceUid, '__data.fields.id', - '__data.fields[0]' + '__data.fields[0]', + undefined, + undefined, + histogramType ); edges.fields[0].config = getFieldConfig( datasourceUid, tempoDatasourceUid, '__data.fields.target', '__data.fields.target', - '__data.fields.source' + '__data.fields.source', + undefined, + histogramType ); } @@ -1021,9 +1032,10 @@ function serviceMapQuery( function rateQuery( request: DataQueryRequest, serviceMapResponse: ServiceMapQueryResponse, - datasourceUid: string + datasourceUid: string, + histogramType?: string ): Observable { - const serviceMapRequest = makePromServiceMapRequest(request); + const serviceMapRequest = makePromServiceMapRequest(request, histogramType); serviceMapRequest.targets = makeServiceGraphViewRequest([buildExpr(rateMetric, defaultTableFilter, request)]); return queryPrometheus(serviceMapRequest, datasourceUid).pipe( @@ -1048,7 +1060,8 @@ function errorAndDurationQuery( request: DataQueryRequest, rateResponse: ServiceMapQueryResponseWithRates, datasourceUid: string, - tempoDatasourceUid: string + tempoDatasourceUid: string, + histogramType?: string ) { let serviceGraphViewMetrics = []; let errorRateBySpanName = ''; @@ -1074,13 +1087,14 @@ function errorAndDurationQuery( errorRateBySpanName = buildExpr(errorRateMetric, 'span_name=~"' + spanNames.join('|') + '"', request); serviceGraphViewMetrics.push(errorRateBySpanName); spanNames.map((name: string) => { - const metric = buildExpr(durationMetric, 'span_name=~"' + name + '"', request); + const checkedDurationMetric = histogramType === 'native' ? nativeHistogramDurationMetric : durationMetric; + const metric = buildExpr(checkedDurationMetric, 'span_name=~"' + name + '"', request); durationsBySpanName.push(metric); serviceGraphViewMetrics.push(metric); }); } - const serviceMapRequest = makePromServiceMapRequest(request); + const serviceMapRequest = makePromServiceMapRequest(request, histogramType); serviceMapRequest.targets = makeServiceGraphViewRequest(serviceGraphViewMetrics); return queryPrometheus(serviceMapRequest, datasourceUid).pipe( @@ -1099,7 +1113,8 @@ function errorAndDurationQuery( errorRateBySpanName, durationsBySpanName, datasourceUid, - tempoDatasourceUid + tempoDatasourceUid, + histogramType ); if (serviceGraphView.fields.length === 0) { @@ -1146,7 +1161,8 @@ export function getFieldConfig( targetField: string, tempoField: string, sourceField?: string, - namespaceFields?: { targetNamespace: string; sourceNamespace?: string } + namespaceFields?: { targetNamespace: string; sourceNamespace?: string }, + histogramType?: string ) { let source = sourceField ? `client="\${${sourceField}}",` : ''; let target = `server="\${${targetField}}"`; @@ -1172,12 +1188,7 @@ export function getFieldConfig( datasourceUid, false ), - makePromLink( - 'Request histogram', - `histogram_quantile(0.9, sum(rate(${histogramMetric}{${source}${target}}[$__rate_interval])) by (le, client, ${serverSumBy}))`, - datasourceUid, - false - ), + ...makeHistogramLink(datasourceUid, source, target, serverSumBy, histogramType), makePromLink( 'Failed request rate', `sum by (client, ${serverSumBy})(rate(${failedMetric}{${source}${target}}[$__rate_interval]))`, @@ -1194,6 +1205,34 @@ export function getFieldConfig( }; } +export function makeHistogramLink( + datasourceUid: string, + source: string, + target: string, + serverSumBy: string, + histogramType?: string +) { + const createHistogramLink = (metric: string, title: string) => + makePromLink( + title, + `histogram_quantile(0.9, sum(rate(${metric}{${source}${target}}[$__rate_interval])) by (le, client, ${serverSumBy}))`, + datasourceUid, + false + ); + + switch (histogramType) { + case 'both': + return [ + createHistogramLink(histogramMetric, 'Request classic histogram'), + createHistogramLink(nativeHistogramMetric, 'Request native histogram'), + ]; + case 'native': + return [createHistogramLink(nativeHistogramMetric, 'Request native histogram')]; + default: + return [createHistogramLink(histogramMetric, 'Request classic histogram')]; + } +} + export function makeTempoLink( title: string, serviceNamespace: string | undefined, @@ -1300,11 +1339,17 @@ function makeTempoLinkServiceMap( }; } -function makePromServiceMapRequest(options: DataQueryRequest): DataQueryRequest { +export function makePromServiceMapRequest( + options: DataQueryRequest, + histogramType?: string +): DataQueryRequest { return { ...options, targets: serviceMapMetrics .map((metric) => { + if (histogramType === 'native' && metric.includes('_bucket')) { + metric = metric.replace('_bucket', ''); + } const { serviceMapQuery, serviceMapIncludeNamespace: serviceMapIncludeNamespace } = options.targets[0]; const extraSumByFields = serviceMapIncludeNamespace ? ', client_service_namespace, server_service_namespace' @@ -1345,7 +1390,8 @@ function getServiceGraphViewDataFrames( errorRateBySpanName: string, durationsBySpanName: string[], datasourceUid: string, - tempoDatasourceUid: string + tempoDatasourceUid: string, + histogramType?: string ) { let df: any = { fields: [] }; @@ -1470,6 +1516,7 @@ function getServiceGraphViewDataFrames( } }); if (Object.keys(durationObj).length > 0) { + const checkedDurationMetric = histogramType === 'native' ? nativeHistogramDurationMetric : durationMetric; df.fields.push({ ...duration[0].fields[1], name: 'Duration (p90)', @@ -1478,7 +1525,7 @@ function getServiceGraphViewDataFrames( links: [ makePromLink( 'Duration', - buildLinkExpr(buildExpr(durationMetric, 'span_name="${__data.fields[0]}"', request)), + buildLinkExpr(buildExpr(checkedDurationMetric, 'span_name="${__data.fields[0]}"', request)), datasourceUid, false ), diff --git a/public/app/plugins/datasource/tempo/graphTransform.ts b/public/app/plugins/datasource/tempo/graphTransform.ts index caf5f49ead7..c191d37124f 100644 --- a/public/app/plugins/datasource/tempo/graphTransform.ts +++ b/public/app/plugins/datasource/tempo/graphTransform.ts @@ -15,6 +15,7 @@ export const secondsMetric = 'traces_service_graph_request_server_seconds_sum'; export const totalsMetric = 'traces_service_graph_request_total'; export const failedMetric = 'traces_service_graph_request_failed_total'; export const histogramMetric = 'traces_service_graph_request_server_seconds_bucket'; +export const nativeHistogramMetric = 'traces_service_graph_request_server_seconds'; export const rateMetric = { expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)', @@ -30,6 +31,10 @@ export const durationMetric = { expr: 'histogram_quantile(.9, sum(rate(traces_spanmetrics_latency_bucket{}[$__range])) by (le))', params: [], }; +export const nativeHistogramDurationMetric = { + expr: 'histogram_quantile(.9, sum(rate(traces_spanmetrics_latency{}[$__range])) by (le))', + params: [], +}; export const defaultTableFilter = 'span_kind="SPAN_KIND_SERVER"'; export const serviceMapMetrics = [ diff --git a/public/app/plugins/datasource/tempo/types.ts b/public/app/plugins/datasource/tempo/types.ts index 02dbe712969..346195dcb4a 100644 --- a/public/app/plugins/datasource/tempo/types.ts +++ b/public/app/plugins/datasource/tempo/types.ts @@ -7,6 +7,7 @@ export interface TempoJsonData extends DataSourceJsonData { tracesToLogs?: TraceToLogsOptions; serviceMap?: { datasourceUid?: string; + histogramType?: 'classic' | 'native' | 'both'; }; search?: { hide?: boolean;