Spring Boot
Distributed Tracing
Distributed tracing tracks a request as it flows through multiple services, providing end-to-end visibility into latency, errors, and dependencies. Spring Boot 3 integrates with Micrometer Tracing, which supports both Zipkin and OpenTelemetry exporters. Trace and span IDs are automatically propagated through HTTP headers, messaging systems, and async boundaries. This entry covers setup, Zipkin integration, OpenTelemetry, custom spans, baggage propagation, async and messaging tracing, and sampling strategies.
Setup with Micrometer Tracing and Zipkin
Spring Boot 3 uses Micrometer Tracing as the tracing facade. Add the Zipkin reporter and the Brave bridge to send traces to a Zipkin server. Spring Boot auto-configures the tracer and injects trace and span IDs into MDC automatically so they appear in every log line without manual instrumentation.
XML
<!-- pom.xml -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-tracing-bridge-brave</artifactId>
</dependency>
<dependency>
<groupId>io.zipkin.reporter2</groupId>
<artifactId>zipkin-reporter-brave</artifactId>
</dependency>
<!-- HTTP instrumentation (RestTemplate, WebClient, RestClient) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
# ── application.yml ────────────────────────────────────────────────────
management:
tracing:
sampling:
probability: 1.0 # 1.0 = 100% in dev; 0.1 = 10% in prod
propagation:
type: b3_multi # B3 multi-header (Zipkin default)
# or w3c for OpenTelemetry W3C TraceContext
spring:
zipkin:
base-url: http://localhost:9411
enabled: true
logging:
pattern:
# Inject traceId and spanId into every log line automatically
level: "%5p [${spring.application.name:},%X{traceId:-},%X{spanId:-}]"
# ── application-prod.yml ──────────────────────────────────────────────
management:
tracing:
sampling:
probability: 0.1 # sample 10% in production
# ── Docker Compose — run Zipkin locally ───────────────────────────────
# services:
# zipkin:
# image: openzipkin/zipkin:3
# ports:
# - "9411:9411"OpenTelemetry Setup
OpenTelemetry (OTel) is the vendor-neutral standard for distributed tracing. Replace the Brave bridge with the OTel bridge and configure an OTLP exporter to send traces to any OTel-compatible backend — Jaeger, Tempo, Honeycomb, Datadog, or a collector. The same Micrometer Tracing API works unchanged regardless of the bridge.
XML
<!-- pom.xml — OpenTelemetry bridge instead of Brave ─────────────── -->
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-tracing-bridge-otel</artifactId>
</dependency>
<!-- OTLP exporter — sends to any OTel-compatible collector -->
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-exporter-otlp</artifactId>
</dependency>
<!-- Or Zipkin exporter for OTel -->
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-exporter-zipkin</artifactId>
</dependency>
# ── application.yml — OTLP exporter to Grafana Tempo ─────────────────
management:
tracing:
sampling:
probability: 1.0
propagation:
type: w3c # W3C TraceContext + Baggage (OTel default)
otlp:
tracing:
endpoint: http://localhost:4318/v1/traces
# ── application.yml — OTLP to Jaeger ─────────────────────────────────
otlp:
tracing:
endpoint: http://jaeger:4318/v1/traces
# ── application.yml — OTLP to Honeycomb ──────────────────────────────
otlp:
tracing:
endpoint: https://api.honeycomb.io/v1/traces
headers:
x-honeycomb-team: ${HONEYCOMB_API_KEY}
x-honeycomb-dataset: my-service
# ── Docker Compose — Grafana LGTM stack (Loki, Grafana, Tempo, Mimir)
# services:
# tempo:
# image: grafana/tempo:latest
# command: [ "-config.file=/etc/tempo.yaml" ]
# ports:
# - "4318:4318" # OTLP HTTP
# - "3200:3200" # Tempo query
# grafana:
# image: grafana/grafana:latest
# ports:
# - "3000:3000"Automatic Instrumentation
Spring Boot auto-instruments HTTP server requests, RestTemplate, WebClient, RestClient, Spring Data repositories, scheduled tasks, and Kafka consumers. Every inbound HTTP request starts a new trace; outbound HTTP calls propagate the trace context through standard headers (B3 or W3C TraceContext) so the receiving service continues the same trace.
Java
// ── Inbound HTTP — trace started automatically ────────────────────────
// GET /api/v1/orders/42
// Spring creates: traceId=abc123, spanId=def456
// MDC populated: traceId=abc123, spanId=def456
// Logs include: [order-service,abc123,def456] INFO ...
@RestController
@RequestMapping("/api/v1/orders")
@RequiredArgsConstructor
@Slf4j
public class OrderController {
private final OrderService orderService;
@GetMapping("/{id}")
public ResponseEntity<OrderResponse> findById(
@PathVariable Long id) {
// traceId and spanId already in MDC from auto-instrumentation
log.info("Fetching order {}", id); // logged with trace context
return ResponseEntity.ok(orderService.findById(id));
}
}
// ── Outbound HTTP — trace propagated automatically ────────────────────
// RestClient, RestTemplate, and WebClient all propagate B3 headers
@Service
@RequiredArgsConstructor
@Slf4j
public class InventoryClient {
private final RestClient restClient; // auto-instrumented
public InventoryResponse checkStock(Long productId) {
// Spring injects:
// X-B3-TraceId: abc123 (same trace as caller)
// X-B3-SpanId: 789xyz (new child span)
// X-B3-Sampled: 1
return restClient.get()
.uri("http://inventory-service/api/v1/stock/{id}",
productId)
.retrieve()
.body(InventoryResponse.class);
}
}
// ── Configure RestClient with tracing ─────────────────────────────────
@Configuration
public class RestClientConfig {
@Bean
public RestClient restClient(
RestClient.Builder builder) {
// Builder is auto-configured with tracing interceptor
return builder
.baseUrl("http://inventory-service")
.build();
}
}
// ── WebClient with tracing ─────────────────────────────────────────────
@Configuration
public class WebClientConfig {
@Bean
public WebClient webClient(WebClient.Builder builder) {
// Builder is auto-configured with tracing filter
return builder
.baseUrl("http://payment-service")
.build();
}
}Custom Spans
Wrap significant business operations in custom spans to see them in the trace timeline. Inject the Tracer bean and use try-with-resources or a lambda to scope the span. Add tags (key-value metadata) to make spans searchable and meaningful in the tracing UI.
Java
@Service
@RequiredArgsConstructor
@Slf4j
public class OrderService {
private final OrderRepository orderRepo;
private final InventoryClient inventoryClient;
private final PaymentService paymentService;
private final Tracer tracer;
// ── Wrapping a business operation in a custom span ────────────────
@Transactional
public OrderResponse placeOrder(PlaceOrderRequest request,
Long userId) {
// Start a child span scoped to this business operation
Span span = tracer.nextSpan()
.name("order.place")
.tag("order.userId", String.valueOf(userId))
.tag("order.itemCount",
String.valueOf(request.items().size()))
.start();
try (Tracer.SpanInScope ws =
tracer.withSpan(span)) {
// ── Step 1: Reserve inventory ──────────────────────────
Span inventorySpan = tracer.nextSpan()
.name("inventory.reserve")
.start();
try (Tracer.SpanInScope is =
tracer.withSpan(inventorySpan)) {
inventoryClient.reserve(request.items());
} catch (Exception ex) {
inventorySpan.tag("error", ex.getMessage())
.error(ex);
throw ex;
} finally {
inventorySpan.end();
}
// ── Step 2: Persist order ──────────────────────────────
Order order = orderRepo.save(
Order.from(request, userId));
span.tag("order.id", String.valueOf(order.getId()));
// ── Step 3: Process payment ───────────────────────────
Span paymentSpan = tracer.nextSpan()
.name("payment.charge")
.tag("payment.amount",
order.getTotal().toPlainString())
.start();
try (Tracer.SpanInScope ps =
tracer.withSpan(paymentSpan)) {
paymentService.charge(order);
} catch (Exception ex) {
paymentSpan.tag("error", ex.getMessage())
.error(ex);
throw ex;
} finally {
paymentSpan.end();
}
log.info("Order {} placed successfully", order.getId());
return OrderResponse.from(order);
} catch (Exception ex) {
span.tag("error", ex.getMessage()).error(ex);
throw ex;
} finally {
span.end();
}
}
// ── Simpler span using @NewSpan (AOP-based) ───────────────────────
@NewSpan("order.findById")
@SpanTag("order.id") // tags the id parameter
@Transactional(readOnly = true)
public OrderResponse findById(
@SpanTag("order.id") Long id) {
return orderRepo.findById(id)
.map(OrderResponse::from)
.orElseThrow(() -> new OrderNotFoundException(id));
}
}Baggage Propagation
Baggage is key-value data that propagates alongside the trace context through every service in the call chain. Use it to carry correlation IDs, tenant IDs, feature flags, or user IDs without adding them to every method signature. Baggage is available in any service that participates in the same trace.
Java
// ── Define baggage fields ─────────────────────────────────────────────
@Configuration
public class TracingConfig {
// Declare baggage fields that should propagate
@Bean
public BaggageField tenantIdField() {
return BaggageField.create("tenant-id");
}
@Bean
public BaggageField correlationIdField() {
return BaggageField.create("correlation-id");
}
}
# ── application.yml — whitelist baggage fields for propagation ────────
management:
tracing:
baggage:
remote-fields: # propagate in HTTP headers
- tenant-id
- correlation-id
correlation: # also inject into MDC/logs
fields:
- tenant-id
- correlation-id
// ── Filter: set baggage on every inbound request ──────────────────────
@Component
@RequiredArgsConstructor
@Order(Ordered.HIGHEST_PRECEDENCE)
public class BaggageFilter extends OncePerRequestFilter {
private final BaggageField tenantIdField;
private final BaggageField correlationIdField;
@Override
protected void doFilterInternal(HttpServletRequest request,
HttpServletResponse response,
FilterChain chain)
throws ServletException, IOException {
// Set tenant-id from header or JWT claim
String tenantId = request.getHeader("X-Tenant-ID");
if (tenantId != null) {
tenantIdField.updateValue(tenantId);
}
// Set or generate correlation-id
String correlationId = Optional
.ofNullable(request.getHeader("X-Correlation-ID"))
.orElse(UUID.randomUUID().toString());
correlationIdField.updateValue(correlationId);
response.setHeader("X-Correlation-ID", correlationId);
chain.doFilter(request, response);
}
}
// ── Read baggage anywhere in the call chain ───────────────────────────
@Service
@RequiredArgsConstructor
@Slf4j
public class TenantAwareService {
private final BaggageField tenantIdField;
private final BaggageField correlationIdField;
private final ProductRepository productRepo;
public List<ProductResponse> findAll() {
String tenantId = tenantIdField.getValue();
String corrId = correlationIdField.getValue();
// tenantId is automatically logged via MDC
log.info("Loading products for tenant={}", tenantId);
return productRepo.findByTenantId(tenantId)
.stream().map(ProductResponse::from).toList();
}
}
// ── Baggage in downstream services ───────────────────────────────────
// When Service A calls Service B via HTTP:
// Spring propagates baggage as headers:
// baggage: tenant-id=acme,correlation-id=abc-123
// Service B receives and injects into its own BaggageField beans
// Logs in Service B automatically include tenant-id and correlation-idAsync and Messaging Tracing
Trace context does not propagate automatically to @Async threads or message consumers. Wrap async executors with tracing support and use Spring Kafka or RabbitMQ tracing instrumentation to propagate headers through messages. Without this, async operations appear as disconnected traces.
Java
// ── @Async with trace context propagation ────────────────────────────
@Configuration
@EnableAsync
@RequiredArgsConstructor
public class AsyncConfig {
private final Tracer tracer;
@Bean("tracingTaskExecutor")
public Executor tracingExecutor() {
ThreadPoolTaskExecutor executor =
new ThreadPoolTaskExecutor();
executor.setCorePoolSize(4);
executor.setMaxPoolSize(16);
executor.setQueueCapacity(100);
executor.setThreadNamePrefix("async-tracing-");
executor.initialize();
// Wrap with tracing — propagates trace context to async threads
return new io.micrometer.context.ContextExecutorService(
executor.getThreadPoolExecutor());
}
}
// ── Async service method ──────────────────────────────────────────────
@Service
@RequiredArgsConstructor
@Slf4j
public class NotificationService {
@Async("tracingTaskExecutor")
public CompletableFuture<Void> sendAsync(Long userId,
String message) {
// traceId and spanId preserved from the calling thread
log.info("Sending notification to user {}", userId);
// ... send notification
return CompletableFuture.completedFuture(null);
}
}
// ── Kafka tracing — add dependency ────────────────────────────────────
// <dependency>
// <groupId>io.micrometer</groupId>
// <artifactId>micrometer-tracing-bridge-brave</artifactId>
// </dependency>
// Spring Kafka auto-instruments producers and consumers when
// spring-kafka and micrometer-tracing are both on the classpath.
// ── Kafka producer — trace headers injected automatically ─────────────
@Service
@RequiredArgsConstructor
@Slf4j
public class OrderEventProducer {
private final KafkaTemplate<String, OrderEvent> kafkaTemplate;
public void publish(OrderEvent event) {
// Spring Kafka injects b3 or traceparent headers automatically
kafkaTemplate.send("orders.created",
String.valueOf(event.orderId()), event);
log.info("Published order event for order {}",
event.orderId());
}
}
// ── Kafka consumer — trace headers extracted and continued ────────────
@Component
@Slf4j
public class OrderEventConsumer {
@KafkaListener(topics = "orders.created",
groupId = "inventory-service")
public void onOrderCreated(OrderEvent event,
@Header(KafkaHeaders.RECEIVED_TOPIC) String topic) {
// Spring Kafka extracts trace headers from the message
// and continues the trace from the producer
log.info("Processing order {} from topic {}",
event.orderId(), topic);
inventoryService.reserve(event.items());
}
}
// ── RabbitMQ tracing ──────────────────────────────────────────────────
// spring-rabbit auto-instruments with micrometer-tracing on classpath
@Component
@Slf4j
public class PaymentEventConsumer {
@RabbitListener(queues = "payment.events")
public void onPaymentEvent(PaymentEvent event,
Message message) {
// Trace context extracted from message headers:
// x-b3-traceid, x-b3-spanid, x-b3-sampled
log.info("Processing payment event for order {}",
event.orderId());
}
}Sampling Strategies
Sampling controls what fraction of traces are recorded and exported. High-traffic production systems cannot trace every request without significant overhead and storage cost. Spring Boot supports probability-based sampling, rate-limited sampling, and custom samplers that make decisions based on request attributes.
yaml
// ── Probability sampler — fixed percentage ───────────────────────────
# application.yml
management:
tracing:
sampling:
probability: 0.1 # 10% of requests traced
// ── Custom sampler — always trace errors and slow requests ────────────
@Component
@Slf4j
public class AdaptiveSampler implements SamplerFunction<HttpRequest> {
private static final double BASE_RATE = 0.1;
private static final long SLOW_THRESHOLD = 1000; // ms
@Override
public Boolean trySample(HttpRequest request) {
// Always trace admin and auth endpoints
String path = request.path();
if (path.startsWith("/api/v1/admin") ||
path.startsWith("/api/v1/auth")) {
return true;
}
// Always trace health checks — sample everything else
if (path.equals("/actuator/health")) {
return false; // never trace health — too noisy
}
// Probabilistic sampling for regular traffic
return Math.random() < BASE_RATE;
}
}
// ── Register custom sampler ────────────────────────────────────────────
@Configuration
public class TracingConfig {
@Bean
public Sampler customSampler() {
// Brave sampler — always sample
return Sampler.ALWAYS_SAMPLE; // override in prod
}
}
// ── Rate-limited sampler — traces per second ──────────────────────────
@Configuration
public class RateLimitedTracingConfig {
@Bean
public Sampler rateLimitedSampler() {
// Sample at most 10 traces per second regardless of traffic
return RateLimitingSampler.create(10);
}
}
// ── Conditional sampling — trace all errors ───────────────────────────
@Configuration
public class ErrorTracingConfig {
@Bean
public SpanExportingPredicate alwaysExportErrors() {
return finishedSpan ->
// Always export spans that contain an error tag
finishedSpan.getTags().containsKey("error") ||
finishedSpan.getError() != null;
}
}
# ── Sampling decision summary ─────────────────────────────────────────
# probability: 0.0 → trace nothing (disable tracing)
# probability: 0.01 → trace 1% (high-traffic production)
# probability: 0.1 → trace 10% (medium-traffic production)
# probability: 1.0 → trace everything (development / staging)
#
# Never use 1.0 in production under real load —
# tracing overhead is non-trivial at high request rates.Correlating Traces with Logs and Metrics
The full observability picture comes from correlating traces, logs, and metrics. Micrometer Tracing injects traceId and spanId into the MDC automatically, so every log line carries the trace context. Grafana's LGTM stack (Loki, Grafana, Tempo, Mimir) provides a unified view where clicking a log line jumps to the trace, and clicking a trace shows the logs.
XML
// ── Structured JSON logging with trace context ───────────────────────
<!-- pom.xml — Logstash encoder for JSON logs -->
<dependency>
<groupId>net.logstash.logback</groupId>
<artifactId>logstash-logback-encoder</artifactId>
<version>7.4</version>
</dependency>
// ── logback-spring.xml ────────────────────────────────────────────────
// <configuration>
// <appender name="JSON"
// class="ch.qos.logback.core.ConsoleAppender">
// <encoder
// class="net.logstash.logback.encoder.LogstashEncoder">
// <customFields>
// {"service":"order-service","env":"prod"}
// </customFields>
// </encoder>
// </appender>
// <root level="INFO">
// <appender-ref ref="JSON"/>
// </root>
// </configuration>
// ── JSON log output (traceId and spanId injected by Micrometer) ───────
// {
// "timestamp": "2024-03-15T10:30:00.123Z",
// "level": "INFO",
// "logger": "com.myapp.OrderService",
// "message": "Order 42 placed successfully",
// "traceId": "abc123def456abc123def456abc12345",
// "spanId": "def456abc123def4",
// "service": "order-service",
// "env": "prod"
// }
// ── Add custom tags to correlate with metrics ─────────────────────────
@Component
@RequiredArgsConstructor
public class OrderMetrics {
private final MeterRegistry meterRegistry;
private final Tracer tracer;
public void recordOrderPlaced(String customerId,
BigDecimal total) {
// Tag the current span with business metrics
Span current = tracer.currentSpan();
if (current != null) {
current.tag("order.customerId", customerId)
.tag("order.totalRange",
total.compareTo(BigDecimal.valueOf(100)) < 0
? "small" : "large");
}
// Emit a metric with the same tags for correlation
meterRegistry.counter("orders.placed",
"customerId", customerId,
"totalRange",
total.compareTo(BigDecimal.valueOf(100)) < 0
? "small" : "large")
.increment();
}
}
// ── Grafana dashboard query examples ─────────────────────────────────
// Find logs for a specific trace:
// {service="order-service"} | json | traceId="abc123..."
//
// Jump from trace to logs in Grafana:
// Tempo → click span → "Logs for this span" → Loki query
//
// Find slow traces correlated with error logs:
// {level="ERROR", service="order-service"}
// → traceId extracted → Tempo shows full trace timeline
// ── application.yml — Grafana Loki log shipping ───────────────────────
# logging:
# loki:
# enabled: true
# url: http://loki:3100/loki/api/v1/push
# labels:
# service: order-service
# env: production