001package io.prometheus.metrics.core.exemplars; 002 003import io.prometheus.metrics.tracer.common.SpanContext; 004import io.prometheus.metrics.model.snapshots.Exemplar; 005import io.prometheus.metrics.model.snapshots.Exemplars; 006import io.prometheus.metrics.model.snapshots.Labels; 007import io.prometheus.metrics.core.util.Scheduler; 008 009import java.util.ArrayList; 010import java.util.List; 011import java.util.concurrent.TimeUnit; 012import java.util.concurrent.atomic.AtomicBoolean; 013import java.util.function.LongSupplier; 014 015/** 016 * The ExemplarSampler selects Spans as exemplars. 017 * <p> 018 * There are two types of Exemplars: Regular exemplars are sampled implicitly if a supported tracing 019 * library is detected. Custom exemplars are provided explicitly in code, for example if a developer 020 * wants to make sure an Exemplar is created for a specific code path. 021 * <p> 022 * Spans will be marked as being an Exemplar by calling {@link SpanContext#markCurrentSpanAsExemplar()}. 023 * The tracer implementation should set a Span attribute to mark the current Span as an Exemplar. 024 * This attribute can be used by a trace sampling algorithm to make sure traces with Exemplars are sampled. 025 * <p> 026 * The ExemplarSample is rate-limited, so only a small fraction of Spans will be marked as Exemplars in 027 * an application with a large number of requests. 028 * <p> 029 * See {@link ExemplarSamplerConfig} for configuration options. 030 */ 031public class ExemplarSampler { 032 033 private final ExemplarSamplerConfig config; 034 private final Exemplar[] exemplars; 035 private final Exemplar[] customExemplars; // Separate from exemplars, because we don't want custom exemplars 036 // to be overwritten by automatic exemplar sampling. exemplars.lengt == customExemplars.length 037 private final AtomicBoolean acceptingNewExemplars = new AtomicBoolean(true); 038 private final AtomicBoolean acceptingNewCustomExemplars = new AtomicBoolean(true); 039 private final SpanContext spanContext; // may be null, in that case SpanContextSupplier.getSpanContext() is used. 040 041 public ExemplarSampler(ExemplarSamplerConfig config) { 042 this(config, null); 043 } 044 045 /** 046 * Constructor with an additional {code spanContext} argument. 047 * This is useful for testing, but may also be useful in some production scenarios. 048 * If {@code spanContext != null} that spanContext is used and 049 * {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier SpanContextSupplier} is not used. 050 * If {@code spanContext == null} 051 * {@link io.prometheus.metrics.tracer.initializer.SpanContextSupplier#getSpanContext() SpanContextSupplier.getSpanContext()} 052 * is called to find a span context. 053 */ 054 public ExemplarSampler(ExemplarSamplerConfig config, SpanContext spanContext) { 055 this.config = config; 056 this.exemplars = new Exemplar[config.getNumberOfExemplars()]; 057 this.customExemplars = new Exemplar[exemplars.length]; 058 this.spanContext = spanContext; 059 } 060 061 public Exemplars collect() { 062 // this may run in parallel with observe() 063 long now = System.currentTimeMillis(); 064 List<Exemplar> result = new ArrayList<>(exemplars.length); 065 for (int i = 0; i < customExemplars.length; i++) { 066 Exemplar exemplar = customExemplars[i]; 067 if (exemplar != null) { 068 if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 069 customExemplars[i] = null; 070 } else { 071 result.add(exemplar); 072 } 073 } 074 } 075 for (int i = 0; i < exemplars.length && result.size() < exemplars.length; i++) { 076 Exemplar exemplar = exemplars[i]; 077 if (exemplar != null) { 078 if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 079 exemplars[i] = null; 080 } else { 081 result.add(exemplar); 082 } 083 } 084 } 085 return Exemplars.of(result); 086 } 087 088 public void reset() { 089 for (int i = 0; i < exemplars.length; i++) { 090 exemplars[i] = null; 091 customExemplars[i] = null; 092 } 093 } 094 095 public void observe(double value) { 096 if (!acceptingNewExemplars.get()) { 097 return; // This is the hot path in a high-throughput application and should be as efficient as possible. 098 } 099 rateLimitedObserve(acceptingNewExemplars, value, exemplars, () -> doObserve(value)); 100 } 101 102 public void observeWithExemplar(double value, Labels labels) { 103 if (!acceptingNewCustomExemplars.get()) { 104 return; // This is the hot path in a high-throughput application and should be as efficient as possible. 105 } 106 rateLimitedObserve(acceptingNewCustomExemplars, value, customExemplars, () -> doObserveWithExemplar(value, labels)); 107 } 108 109 private long doObserve(double value) { 110 if (exemplars.length == 1) { 111 return doObserveSingleExemplar(value); 112 } else if (config.getHistogramClassicUpperBounds() != null) { 113 return doObserveWithUpperBounds(value); 114 } else { 115 return doObserveWithoutUpperBounds(value); 116 } 117 } 118 119 private long doObserveSingleExemplar(double value) { 120 long now = System.currentTimeMillis(); 121 Exemplar current = exemplars[0]; 122 if (current == null || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 123 return updateExemplar(0, value, now); 124 } 125 return 0; 126 } 127 128 private long doObserveWithUpperBounds(double value) { 129 long now = System.currentTimeMillis(); 130 double[] upperBounds = config.getHistogramClassicUpperBounds(); 131 for (int i = 0; i < upperBounds.length; i++) { 132 if (value <= upperBounds[i]) { 133 Exemplar previous = exemplars[i]; 134 if (previous == null || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 135 return updateExemplar(i, value, now); 136 } else { 137 return 0; 138 } 139 } 140 } 141 return 0; // will never happen, as upperBounds contains +Inf 142 } 143 144 private long doObserveWithoutUpperBounds(double value) { 145 final long now = System.currentTimeMillis(); 146 Exemplar smallest = null; 147 int smallestIndex = -1; 148 Exemplar largest = null; 149 int largestIndex = -1; 150 int nullIndex = -1; 151 for (int i = exemplars.length - 1; i >= 0; i--) { 152 Exemplar exemplar = exemplars[i]; 153 if (exemplar == null) { 154 nullIndex = i; 155 } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 156 exemplars[i] = null; 157 nullIndex = i; 158 } else { 159 if (smallest == null || exemplar.getValue() < smallest.getValue()) { 160 smallest = exemplar; 161 smallestIndex = i; 162 } 163 if (largest == null || exemplar.getValue() > largest.getValue()) { 164 largest = exemplar; 165 largestIndex = i; 166 } 167 } 168 } 169 if (nullIndex >= 0) { 170 return updateExemplar(nullIndex, value, now); 171 } 172 if (now - smallest.getTimestampMillis() > config.getMinRetentionPeriodMillis() && value < smallest.getValue()) { 173 return updateExemplar(smallestIndex, value, now); 174 } 175 if (now - largest.getTimestampMillis() > config.getMinRetentionPeriodMillis() && value > largest.getValue()) { 176 return updateExemplar(largestIndex, value, now); 177 } 178 long oldestTimestamp = 0; 179 int oldestIndex = -1; 180 for (int i = 0; i < exemplars.length; i++) { 181 Exemplar exemplar = exemplars[i]; 182 if (exemplar != null && exemplar != smallest && exemplar != largest) { 183 if (oldestTimestamp == 0 || exemplar.getTimestampMillis() < oldestTimestamp) { 184 oldestTimestamp = exemplar.getTimestampMillis(); 185 oldestIndex = i; 186 } 187 } 188 } 189 if (oldestIndex != -1 && now - oldestTimestamp > config.getMinRetentionPeriodMillis()) { 190 return updateExemplar(oldestIndex, value, now); 191 } 192 return 0; 193 } 194 195 // Returns the timestamp of the newly added Exemplar (which is System.currentTimeMillis()) 196 // or 0 if no Exemplar was added. 197 private long doObserveWithExemplar(double amount, Labels labels) { 198 if (customExemplars.length == 1) { 199 return doObserveSingleExemplar(amount, labels); 200 } else if (config.getHistogramClassicUpperBounds() != null) { 201 return doObserveWithExemplarWithUpperBounds(amount, labels); 202 } else { 203 return doObserveWithExemplarWithoutUpperBounds(amount, labels); 204 } 205 } 206 207 private long doObserveSingleExemplar(double amount, Labels labels) { 208 long now = System.currentTimeMillis(); 209 Exemplar current = customExemplars[0]; 210 if (current == null || now - current.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 211 return updateCustomExemplar(0, amount, labels, now); 212 } 213 return 0; 214 } 215 216 private long doObserveWithExemplarWithUpperBounds(double value, Labels labels) { 217 long now = System.currentTimeMillis(); 218 double[] upperBounds = config.getHistogramClassicUpperBounds(); 219 for (int i = 0; i < upperBounds.length; i++) { 220 if (value <= upperBounds[i]) { 221 Exemplar previous = customExemplars[i]; 222 if (previous == null || now - previous.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 223 return updateCustomExemplar(i, value, labels, now); 224 } else { 225 return 0; 226 } 227 } 228 } 229 return 0; // will never happen, as upperBounds contains +Inf 230 } 231 232 private long doObserveWithExemplarWithoutUpperBounds(double amount, Labels labels) { 233 final long now = System.currentTimeMillis(); 234 int nullPos = -1; 235 int oldestPos = -1; 236 Exemplar oldest = null; 237 for (int i = customExemplars.length - 1; i >= 0; i--) { 238 Exemplar exemplar = customExemplars[i]; 239 if (exemplar == null) { 240 nullPos = i; 241 } else if (now - exemplar.getTimestampMillis() > config.getMaxRetentionPeriodMillis()) { 242 customExemplars[i] = null; 243 nullPos = i; 244 } else { 245 if (oldest == null || exemplar.getTimestampMillis() < oldest.getTimestampMillis()) { 246 oldest = exemplar; 247 oldestPos = i; 248 } 249 } 250 } 251 if (nullPos != -1) { 252 return updateCustomExemplar(nullPos, amount, labels, now); 253 } else if (now - oldest.getTimestampMillis() > config.getMinRetentionPeriodMillis()) { 254 return updateCustomExemplar(oldestPos, amount, labels, now); 255 } else { 256 return 0; 257 } 258 } 259 260 /** 261 * Observing requires a system call to {@link System#currentTimeMillis()}, 262 * and it requires iterating over the existing exemplars to check if one of the existing 263 * exemplars can be replaced. 264 * <p> 265 * To avoid performance issues, we rate limit observing exemplars to 266 * {@link ExemplarSamplerConfig#getSampleIntervalMillis()} milliseconds. 267 */ 268 private void rateLimitedObserve(AtomicBoolean accepting, double value, Exemplar[] exemplars, LongSupplier observeFunc) { 269 if (Double.isNaN(value)) { 270 return; 271 } 272 if (!accepting.compareAndSet(true, false)) { 273 return; 274 } 275 // observeFunc returns the current timestamp or 0 if no Exemplar was added. 276 long now = observeFunc.getAsLong(); 277 long sleepTime = now == 0 ? config.getSampleIntervalMillis() : durationUntilNextExemplarExpires(now); 278 Scheduler.schedule(() -> accepting.compareAndSet(false, true), sleepTime, TimeUnit.MILLISECONDS); 279 } 280 281 private long durationUntilNextExemplarExpires(long now) { 282 long oldestTimestamp = now; 283 for (Exemplar exemplar : exemplars) { 284 if (exemplar == null) { 285 return config.getSampleIntervalMillis(); 286 } else if (exemplar.getTimestampMillis() < oldestTimestamp) { 287 oldestTimestamp = exemplar.getTimestampMillis(); 288 } 289 } 290 long oldestAge = now - oldestTimestamp; 291 if (oldestAge < config.getMinRetentionPeriodMillis()) { 292 return config.getMinRetentionPeriodMillis() - oldestAge; 293 } 294 return config.getSampleIntervalMillis(); 295 } 296 297 private long updateCustomExemplar(int index, double value, Labels labels, long now) { 298 if (!labels.contains(Exemplar.TRACE_ID) && !labels.contains(Exemplar.SPAN_ID)) { 299 labels = labels.merge(doSampleExemplar()); 300 } 301 customExemplars[index] = Exemplar.builder() 302 .value(value) 303 .labels(labels) 304 .timestampMillis(now) 305 .build(); 306 return now; 307 } 308 309 private long updateExemplar(int index, double value, long now) { 310 Labels traceLabels = doSampleExemplar(); 311 if (!traceLabels.isEmpty()) { 312 exemplars[index] = Exemplar.builder() 313 .value(value) 314 .labels(traceLabels) 315 .timestampMillis(now) 316 .build(); 317 return now; 318 } else { 319 return 0; 320 } 321 } 322 323 private Labels doSampleExemplar() { 324 // Using the qualified name so that Micrometer can exclude the dependency on prometheus-metrics-tracer-initializer 325 // as they provide their own implementation of SpanContextSupplier. 326 // If we had an import statement for SpanContextSupplier the dependency would be needed in any case. 327 SpanContext spanContext = this.spanContext != null ? this.spanContext : io.prometheus.metrics.tracer.initializer.SpanContextSupplier.getSpanContext(); 328 try { 329 if (spanContext != null) { 330 if (spanContext.isCurrentSpanSampled()) { 331 String spanId = spanContext.getCurrentSpanId(); 332 String traceId = spanContext.getCurrentTraceId(); 333 if (spanId != null && traceId != null) { 334 spanContext.markCurrentSpanAsExemplar(); 335 return Labels.of(Exemplar.TRACE_ID, traceId, Exemplar.SPAN_ID, spanId); 336 } 337 } 338 } 339 } catch (NoClassDefFoundError ignored) { 340 } 341 return Labels.EMPTY; 342 } 343}