@ -25,14 +25,18 @@ const backendType = "prometheus"
const (
// Fixed error messages
MimirDuplicateTimestampError = "err-mimir-sample-duplicate-timestamp"
MimirInvalidLabelError = "err-mimir-label-invalid"
// Best effort error messages
PrometheusDuplicateTimestampError = "duplicate sample for timestamp"
)
var (
ErrWriteFailure = errors . New ( "failed to write time series" )
ErrBadFrame = errors . New ( "failed to read dataframe" )
// Unexpected, 500-like write errors.
ErrUnexpectedWriteFailure = errors . New ( "failed to write time series" )
// Expected, user-level write errors like trying to write an invalid series.
ErrRejectedWrite = errors . New ( "series was rejected" )
ErrBadFrame = errors . New ( "failed to read dataframe" )
)
var DuplicateTimestampErrors = [ ... ] string {
@ -213,10 +217,12 @@ func (w PrometheusWriter) Write(ctx context.Context, name string, t time.Time, f
lvs = append ( lvs , fmt . Sprint ( res . StatusCode ) )
w . metrics . WritesTotal . WithLabelValues ( lvs ... ) . Inc ( )
if err , ignored := checkWriteError ( writeErr ) ; err != nil {
return errors . Join ( ErrWriteFailure , err )
} else if ignored {
l . Debug ( "Ignored write error" , "error" , err , "status_code" , res . StatusCode )
if writeErr != nil {
if err , ignored := checkWriteError ( writeErr ) ; err != nil {
return err
} else if ignored {
l . Debug ( "Ignored write error" , "error" , err , "status_code" , res . StatusCode )
}
}
return nil
@ -242,7 +248,12 @@ func checkWriteError(writeErr promremote.WriteError) (err error, ignored bool) {
return nil , false
}
// special case for 400 status code
// All 500-range statuses are automatically unexpected and not the fault of the data.
if writeErr . StatusCode ( ) / 100 == 5 {
return errors . Join ( ErrUnexpectedWriteFailure , writeErr ) , false
}
// Special case for 400 status code. 400s may be ignorable in the event of HA writers, or the fault of the written data.
if writeErr . StatusCode ( ) == 400 {
msg := writeErr . Error ( )
// HA may potentially write different values for the same timestamp, so we ignore this error
@ -252,7 +263,16 @@ func checkWriteError(writeErr promremote.WriteError) (err error, ignored bool) {
return nil , true
}
}
if strings . Contains ( msg , MimirInvalidLabelError ) {
return errors . Join ( ErrRejectedWrite , writeErr ) , false
}
// For now, all 400s that are not previously known are considered unexpected.
// TODO: Consider blanket-converting all 400s to be known errors. This should only be done once we are confident this is not a problem with this client.
return errors . Join ( ErrUnexpectedWriteFailure , writeErr ) , false
}
return writeErr , false
// All other errors which do not fit into the above categories are also unexpected.
return errors . Join ( ErrUnexpectedWriteFailure , writeErr ) , false
}