Tempo: Enable native histograms for Tempo service graph (#105989)

* add native histogram dropdown to tempo config for service graph

* add docs for configuring native histograms

* add config native histogram options to types

* add native histogram metric to graph transform

* add native histogram to service map query for links

* add native histogram duration metric for duration queries

* use native histogram for duration queries

* export for tests

* add tests for native histogram links, queries and dataframes

* update tempo devenv to use native histograms

* use union for histogramType

* run prettier

* remove comment
pull/106755/head
Brendan O'Handley 1 month ago committed by GitHub
parent 200f3a5f51
commit 6cb2c701e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      devenv/datasources.yaml
  2. 1
      devenv/docker/blocks/tempo/docker-compose.yaml
  3. 1
      devenv/docker/blocks/tempo/tempo.yaml
  4. 71
      public/app/plugins/datasource/tempo/configuration/ServiceGraphSettings.tsx
  5. 100
      public/app/plugins/datasource/tempo/datasource.test.ts
  6. 99
      public/app/plugins/datasource/tempo/datasource.ts
  7. 5
      public/app/plugins/datasource/tempo/graphTransform.ts
  8. 1
      public/app/plugins/datasource/tempo/types.ts

@ -341,6 +341,9 @@ datasources:
queries:
- name: 'Metrics'
query: 'sum(rate({$$__tags}[5m]))'
serviceMap:
datasourceUid: 'gdev-prometheus'
histogramType: 'both' # 'classic' or 'native' or 'both'
- name: gdev-pyroscope
type: grafana-pyroscope-datasource

@ -90,6 +90,7 @@
- --config.file=/etc/prometheus.yaml
- --web.enable-remote-write-receiver
- --enable-feature=exemplar-storage
- --enable-feature=native-histograms
volumes:
- ./docker/blocks/tempo/prometheus.yaml:/etc/prometheus.yaml
links:

@ -60,6 +60,7 @@ storage:
overrides:
defaults:
metrics_generator:
generate_native_histograms: both # 'classic' or 'native' or 'both'
processors: [local-blocks, service-graphs, span-metrics]
stream_over_http_enabled: true

@ -4,7 +4,7 @@ import {
updateDatasourcePluginJsonDataOption,
} from '@grafana/data';
import { DataSourcePicker } from '@grafana/runtime';
import { Button, InlineField, InlineFieldRow, useStyles2 } from '@grafana/ui';
import { Button, InlineField, InlineFieldRow, useStyles2, Combobox } from '@grafana/ui';
import { TempoJsonData } from '../types';
@ -15,6 +15,59 @@ interface Props extends DataSourcePluginOptionsEditorProps<TempoJsonData> {}
export function ServiceGraphSettings({ options, onOptionsChange }: Props) {
const styles = useStyles2(getStyles);
const histogramOptions = [
{ label: 'Classic', value: 'classic' },
{ label: 'Native', value: 'native' },
{ label: 'Both', value: 'both' },
];
const nativeHistogramDocs = (
<>
Select which type of histograms are configured in the {metricsGeneratorDocsLink()}. If native histograms are
configured, you must also configure native histograms ingestion in {prometheusNativeHistogramsDocsLink()} or{' '}
{mimirNativeHistogramsDocsLink()}.
</>
);
function metricsGeneratorDocsLink() {
return (
<a
style={{ textDecoration: 'underline' }}
href="https://grafana.com/docs/tempo/latest/setup-and-configuration/metrics-generator/"
target="_blank"
rel="noopener noreferrer"
>
Tempo metrics generator
</a>
);
}
function prometheusNativeHistogramsDocsLink() {
return (
<a
style={{ textDecoration: 'underline' }}
href="https://prometheus.io/docs/specs/native_histograms/#native-histograms"
target="_blank"
rel="noopener noreferrer"
>
Prometheus
</a>
);
}
function mimirNativeHistogramsDocsLink() {
return (
<a
style={{ textDecoration: 'underline' }}
href="https://grafana.com/docs/mimir/latest/configure/configure-native-histograms-ingestion/#configure-native-histograms-globally"
target="_blank"
rel="noopener noreferrer"
>
Mimir
</a>
);
}
return (
<div className={styles.container}>
<InlineFieldRow className={styles.row}>
@ -52,6 +105,22 @@ export function ServiceGraphSettings({ options, onOptionsChange }: Props) {
</Button>
) : null}
</InlineFieldRow>
<InlineFieldRow className={styles.row}>
<InlineField tooltip={nativeHistogramDocs} label="Histogram type" labelWidth={26} interactive={true}>
<Combobox
id="histogram-type-select"
value={options.jsonData.serviceMap?.histogramType || 'classic'}
width={40}
options={histogramOptions}
onChange={(value) =>
updateDatasourcePluginJsonDataOption({ onOptionsChange, options }, 'serviceMap', {
...options.jsonData.serviceMap,
histogramType: value.value,
})
}
/>
</InlineField>
</InlineFieldRow>
</div>
);
}

@ -42,6 +42,8 @@ import {
makeTempoLink,
getFieldConfig,
getEscapedSpanNames,
makeHistogramLink,
makePromServiceMapRequest,
} from './datasource';
import mockJson from './test/mockJsonResponse.json';
import mockServiceGraph from './test/mockServiceGraph.json';
@ -812,7 +814,7 @@ describe('Tempo service graph view', () => {
},
{
url: '',
title: 'Request histogram',
title: 'Request classic histogram',
internal: {
query: {
expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))',
@ -909,7 +911,7 @@ describe('Tempo service graph view', () => {
},
{
url: '',
title: 'Request histogram',
title: 'Request classic histogram',
internal: {
query: {
expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.sourceName}",client_service_namespace="${__data.fields.sourceNamespace}",server="${__data.fields.targetName}",server_service_namespace="${__data.fields.targetNamespace}"}[$__rate_interval])) by (le, client, server, server_service_namespace, client_service_namespace))',
@ -1247,6 +1249,98 @@ describe('should provide functionality for ad-hoc filters', () => {
});
});
describe('histogram type functionality', () => {
it('should create correct histogram links for classic histogram type', () => {
const datasourceUid = 'prom';
const source = 'client="${__data.fields.source}",';
const target = 'server="${__data.fields.target}"';
const serverSumBy = 'server';
const links = makeHistogramLink(datasourceUid, source, target, serverSumBy);
expect(links).toHaveLength(1);
expect(links[0].title).toBe('Request classic histogram');
expect(links[0].internal.query.expr).toBe(
'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))'
);
});
it('should create correct histogram links for native histogram type', () => {
const datasourceUid = 'prom';
const source = 'client="${__data.fields.source}",';
const target = 'server="${__data.fields.target}"';
const serverSumBy = 'server';
const links = makeHistogramLink(datasourceUid, source, target, serverSumBy, 'native');
expect(links).toHaveLength(1);
expect(links[0].title).toBe('Request native histogram');
expect(links[0].internal.query.expr).toBe(
'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))'
);
});
it('should create correct histogram links for both histogram types', () => {
const datasourceUid = 'prom';
const source = 'client="${__data.fields.source}",';
const target = 'server="${__data.fields.target}"';
const serverSumBy = 'server';
const links = makeHistogramLink(datasourceUid, source, target, serverSumBy, 'both');
expect(links).toHaveLength(2);
expect(links[0].title).toBe('Request classic histogram');
expect(links[1].title).toBe('Request native histogram');
expect(links[0].internal.query.expr).toBe(
'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))'
);
expect(links[1].internal.query.expr).toBe(
'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))'
);
});
it('should include histogram type in field config', () => {
const datasourceUid = 'prom';
const tempoDatasourceUid = 'tempo';
const targetField = '__data.fields.target';
const tempoField = '__data.fields.target';
const sourceField = '__data.fields.source';
const fieldConfig = getFieldConfig(
datasourceUid,
tempoDatasourceUid,
targetField,
tempoField,
sourceField,
undefined,
'native'
);
const histogramLink = fieldConfig.links.find((link) => link.title === 'Request native histogram');
expect(histogramLink).toBeDefined();
expect(histogramLink?.internal?.query).toBeDefined();
if (histogramLink?.internal?.query && 'expr' in histogramLink.internal.query) {
expect(histogramLink.internal.query.expr).toBe(
'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds{client="${__data.fields.source}",server="${__data.fields.target}"}[$__rate_interval])) by (le, client, server))'
);
}
});
it('should handle histogram type in service map query', () => {
const request = makePromServiceMapRequest(
{
targets: [{ serviceMapQuery: '{service="test"}' }],
range: getDefaultTimeRange(),
} as DataQueryRequest<TempoQuery>,
'native'
);
const bucketMetric = request.targets.find((t: PromQuery) => t.expr.includes('_bucket'));
expect(bucketMetric).toBeUndefined();
const nativeMetric = request.targets.find((t: PromQuery) =>
t.expr.includes('traces_service_graph_request_server_seconds')
);
expect(nativeMetric).toBeDefined();
});
});
const prometheusMock = (): DataSourceApi => {
return {
query: jest.fn(() =>
@ -1456,7 +1550,7 @@ const serviceGraphLinks = [
},
{
url: '',
title: 'Request histogram',
title: 'Request classic histogram',
internal: {
query: {
expr: 'histogram_quantile(0.9, sum(rate(traces_service_graph_request_server_seconds_bucket{server="${__data.fields.id}"}[$__rate_interval])) by (le, client, server))',

@ -47,10 +47,12 @@ import {
errorRateMetric,
failedMetric,
histogramMetric,
nativeHistogramMetric,
mapPromMetricsToServiceMap,
rateMetric,
serviceMapMetrics,
totalsMetric,
nativeHistogramDurationMetric,
} from './graphTransform';
import TempoLanguageProvider from './language_provider';
import {
@ -110,6 +112,7 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
tracesToLogs?: TraceToLogsOptions;
serviceMap?: {
datasourceUid?: string;
histogramType?: 'classic' | 'native' | 'both';
};
search?: {
hide?: boolean;
@ -512,13 +515,13 @@ export class TempoDatasource extends DataSourceWithBackend<TempoQuery, TempoJson
hasServiceMapQuery: targets.serviceMap[0].serviceMapQuery ? true : false,
});
const dsId = this.serviceMap.datasourceUid;
const { datasourceUid, histogramType } = this.serviceMap;
const tempoDsUid = this.uid;
subQueries.push(
serviceMapQuery(options, dsId, tempoDsUid).pipe(
serviceMapQuery(options, datasourceUid, tempoDsUid, histogramType).pipe(
concatMap((result) =>
rateQuery(options, result, dsId).pipe(
concatMap((result) => errorAndDurationQuery(options, result, dsId, tempoDsUid))
rateQuery(options, result, datasourceUid).pipe(
concatMap((result) => errorAndDurationQuery(options, result, datasourceUid, tempoDsUid, histogramType))
)
)
)
@ -943,9 +946,10 @@ function queryPrometheus(request: DataQueryRequest<PromQuery>, datasourceUid: st
function serviceMapQuery(
request: DataQueryRequest<TempoQuery>,
datasourceUid: string,
tempoDatasourceUid: string
tempoDatasourceUid: string,
histogramType?: string
): Observable<ServiceMapQueryResponse> {
const serviceMapRequest = makePromServiceMapRequest(request);
const serviceMapRequest = makePromServiceMapRequest(request, histogramType);
return queryPrometheus(serviceMapRequest, datasourceUid).pipe(
// Just collect all the responses first before processing into node graph data
@ -982,7 +986,8 @@ function serviceMapQuery(
'__data.fields.title', // targetField
'__data.fields[0]', // tempoField
undefined, // sourceField
{ targetNamespace: '__data.fields.subtitle' }
{ targetNamespace: '__data.fields.subtitle' },
histogramType
);
edges.fields[0].config = getFieldConfig(
@ -991,21 +996,27 @@ function serviceMapQuery(
'__data.fields.targetName', // targetField
'__data.fields.target', // tempoField
'__data.fields.sourceName', // sourceField
{ targetNamespace: '__data.fields.targetNamespace', sourceNamespace: '__data.fields.sourceNamespace' }
{ targetNamespace: '__data.fields.targetNamespace', sourceNamespace: '__data.fields.sourceNamespace' },
histogramType
);
} else {
nodes.fields[0].config = getFieldConfig(
datasourceUid,
tempoDatasourceUid,
'__data.fields.id',
'__data.fields[0]'
'__data.fields[0]',
undefined,
undefined,
histogramType
);
edges.fields[0].config = getFieldConfig(
datasourceUid,
tempoDatasourceUid,
'__data.fields.target',
'__data.fields.target',
'__data.fields.source'
'__data.fields.source',
undefined,
histogramType
);
}
@ -1021,9 +1032,10 @@ function serviceMapQuery(
function rateQuery(
request: DataQueryRequest<TempoQuery>,
serviceMapResponse: ServiceMapQueryResponse,
datasourceUid: string
datasourceUid: string,
histogramType?: string
): Observable<ServiceMapQueryResponseWithRates> {
const serviceMapRequest = makePromServiceMapRequest(request);
const serviceMapRequest = makePromServiceMapRequest(request, histogramType);
serviceMapRequest.targets = makeServiceGraphViewRequest([buildExpr(rateMetric, defaultTableFilter, request)]);
return queryPrometheus(serviceMapRequest, datasourceUid).pipe(
@ -1048,7 +1060,8 @@ function errorAndDurationQuery(
request: DataQueryRequest<TempoQuery>,
rateResponse: ServiceMapQueryResponseWithRates,
datasourceUid: string,
tempoDatasourceUid: string
tempoDatasourceUid: string,
histogramType?: string
) {
let serviceGraphViewMetrics = [];
let errorRateBySpanName = '';
@ -1074,13 +1087,14 @@ function errorAndDurationQuery(
errorRateBySpanName = buildExpr(errorRateMetric, 'span_name=~"' + spanNames.join('|') + '"', request);
serviceGraphViewMetrics.push(errorRateBySpanName);
spanNames.map((name: string) => {
const metric = buildExpr(durationMetric, 'span_name=~"' + name + '"', request);
const checkedDurationMetric = histogramType === 'native' ? nativeHistogramDurationMetric : durationMetric;
const metric = buildExpr(checkedDurationMetric, 'span_name=~"' + name + '"', request);
durationsBySpanName.push(metric);
serviceGraphViewMetrics.push(metric);
});
}
const serviceMapRequest = makePromServiceMapRequest(request);
const serviceMapRequest = makePromServiceMapRequest(request, histogramType);
serviceMapRequest.targets = makeServiceGraphViewRequest(serviceGraphViewMetrics);
return queryPrometheus(serviceMapRequest, datasourceUid).pipe(
@ -1099,7 +1113,8 @@ function errorAndDurationQuery(
errorRateBySpanName,
durationsBySpanName,
datasourceUid,
tempoDatasourceUid
tempoDatasourceUid,
histogramType
);
if (serviceGraphView.fields.length === 0) {
@ -1146,7 +1161,8 @@ export function getFieldConfig(
targetField: string,
tempoField: string,
sourceField?: string,
namespaceFields?: { targetNamespace: string; sourceNamespace?: string }
namespaceFields?: { targetNamespace: string; sourceNamespace?: string },
histogramType?: string
) {
let source = sourceField ? `client="\${${sourceField}}",` : '';
let target = `server="\${${targetField}}"`;
@ -1172,12 +1188,7 @@ export function getFieldConfig(
datasourceUid,
false
),
makePromLink(
'Request histogram',
`histogram_quantile(0.9, sum(rate(${histogramMetric}{${source}${target}}[$__rate_interval])) by (le, client, ${serverSumBy}))`,
datasourceUid,
false
),
...makeHistogramLink(datasourceUid, source, target, serverSumBy, histogramType),
makePromLink(
'Failed request rate',
`sum by (client, ${serverSumBy})(rate(${failedMetric}{${source}${target}}[$__rate_interval]))`,
@ -1194,6 +1205,34 @@ export function getFieldConfig(
};
}
export function makeHistogramLink(
datasourceUid: string,
source: string,
target: string,
serverSumBy: string,
histogramType?: string
) {
const createHistogramLink = (metric: string, title: string) =>
makePromLink(
title,
`histogram_quantile(0.9, sum(rate(${metric}{${source}${target}}[$__rate_interval])) by (le, client, ${serverSumBy}))`,
datasourceUid,
false
);
switch (histogramType) {
case 'both':
return [
createHistogramLink(histogramMetric, 'Request classic histogram'),
createHistogramLink(nativeHistogramMetric, 'Request native histogram'),
];
case 'native':
return [createHistogramLink(nativeHistogramMetric, 'Request native histogram')];
default:
return [createHistogramLink(histogramMetric, 'Request classic histogram')];
}
}
export function makeTempoLink(
title: string,
serviceNamespace: string | undefined,
@ -1300,11 +1339,17 @@ function makeTempoLinkServiceMap(
};
}
function makePromServiceMapRequest(options: DataQueryRequest<TempoQuery>): DataQueryRequest<PromQuery> {
export function makePromServiceMapRequest(
options: DataQueryRequest<TempoQuery>,
histogramType?: string
): DataQueryRequest<PromQuery> {
return {
...options,
targets: serviceMapMetrics
.map<PromQuery[]>((metric) => {
if (histogramType === 'native' && metric.includes('_bucket')) {
metric = metric.replace('_bucket', '');
}
const { serviceMapQuery, serviceMapIncludeNamespace: serviceMapIncludeNamespace } = options.targets[0];
const extraSumByFields = serviceMapIncludeNamespace
? ', client_service_namespace, server_service_namespace'
@ -1345,7 +1390,8 @@ function getServiceGraphViewDataFrames(
errorRateBySpanName: string,
durationsBySpanName: string[],
datasourceUid: string,
tempoDatasourceUid: string
tempoDatasourceUid: string,
histogramType?: string
) {
let df: any = { fields: [] };
@ -1470,6 +1516,7 @@ function getServiceGraphViewDataFrames(
}
});
if (Object.keys(durationObj).length > 0) {
const checkedDurationMetric = histogramType === 'native' ? nativeHistogramDurationMetric : durationMetric;
df.fields.push({
...duration[0].fields[1],
name: 'Duration (p90)',
@ -1478,7 +1525,7 @@ function getServiceGraphViewDataFrames(
links: [
makePromLink(
'Duration',
buildLinkExpr(buildExpr(durationMetric, 'span_name="${__data.fields[0]}"', request)),
buildLinkExpr(buildExpr(checkedDurationMetric, 'span_name="${__data.fields[0]}"', request)),
datasourceUid,
false
),

@ -15,6 +15,7 @@ export const secondsMetric = 'traces_service_graph_request_server_seconds_sum';
export const totalsMetric = 'traces_service_graph_request_total';
export const failedMetric = 'traces_service_graph_request_failed_total';
export const histogramMetric = 'traces_service_graph_request_server_seconds_bucket';
export const nativeHistogramMetric = 'traces_service_graph_request_server_seconds';
export const rateMetric = {
expr: 'sum(rate(traces_spanmetrics_calls_total{}[$__range])) by (span_name)',
@ -30,6 +31,10 @@ export const durationMetric = {
expr: 'histogram_quantile(.9, sum(rate(traces_spanmetrics_latency_bucket{}[$__range])) by (le))',
params: [],
};
export const nativeHistogramDurationMetric = {
expr: 'histogram_quantile(.9, sum(rate(traces_spanmetrics_latency{}[$__range])) by (le))',
params: [],
};
export const defaultTableFilter = 'span_kind="SPAN_KIND_SERVER"';
export const serviceMapMetrics = [

@ -7,6 +7,7 @@ export interface TempoJsonData extends DataSourceJsonData {
tracesToLogs?: TraceToLogsOptions;
serviceMap?: {
datasourceUid?: string;
histogramType?: 'classic' | 'native' | 'both';
};
search?: {
hide?: boolean;

Loading…
Cancel
Save