Adds two targeted observability improvements across all homelab services.
pkg/logger/access.go (new)
HTTP access log middleware that logs one structured line per request:
method, path, status, ms, trace_id
The trace_id comes from the OTel span already in context (created by
trace.Middleware which runs outside this one), so each log entry in
Loki has a clickable link into Jaeger. Health/metrics endpoints are
excluded to avoid noise. Level is ERROR for 5xx, WARN for 4xx, INFO
otherwise.
pkg/setup/setup.go
Wire the new middleware between trace.Middleware (which creates the
span) and metrics.Middleware:
trace → AccessMiddleware → metrics → mux
Order matters: span must exist before AccessMiddleware reads it.
infrastructure/terraform/monitoring.tf
Fluent Bit was shipping all container logs to Loki with a single
static label (job=fluent-bit), making it impossible to filter logs
by service. Added a `nest/lift` filter that flattens the kubernetes
metadata block to top-level fields (kube_namespace_name,
kube_container_name, …), then promoted those as Loki label_keys.
After this change you can query:
{kube_namespace_name="finance"} |= "trace_id"
and LogQL will only return finance-api logs.
Co-authored-by: Gonçalo Rodrigues <guga@Goncalos-MacBook-Pro.local>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
69 lines
1.6 KiB
Go
69 lines
1.6 KiB
Go
package logger
|
|
|
|
import (
|
|
"log/slog"
|
|
"net/http"
|
|
"time"
|
|
|
|
"go.opentelemetry.io/otel/trace"
|
|
)
|
|
|
|
// skipAccessLog lists paths that should not produce access log lines.
|
|
var skipAccessLog = map[string]bool{
|
|
"/healthz": true,
|
|
"/readyz": true,
|
|
"/metrics": true,
|
|
}
|
|
|
|
type statusRecorder struct {
|
|
http.ResponseWriter
|
|
status int
|
|
}
|
|
|
|
func (r *statusRecorder) WriteHeader(code int) {
|
|
r.status = code
|
|
r.ResponseWriter.WriteHeader(code)
|
|
}
|
|
|
|
// AccessMiddleware logs one structured line per HTTP request.
|
|
// It must run inside trace.Middleware so the OTel span is already in context.
|
|
//
|
|
// trace.Middleware → AccessMiddleware → metrics.Middleware → mux
|
|
//
|
|
// Each line includes method, path, status, latency in ms, and trace_id when
|
|
// tracing is enabled. Status ≥ 500 is logged at ERROR, 4xx at WARN, rest INFO.
|
|
func AccessMiddleware(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if skipAccessLog[r.URL.Path] {
|
|
next.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
start := time.Now()
|
|
rec := &statusRecorder{ResponseWriter: w, status: http.StatusOK}
|
|
next.ServeHTTP(rec, r)
|
|
|
|
attrs := []any{
|
|
"method", r.Method,
|
|
"path", r.URL.Path,
|
|
"status", rec.status,
|
|
"ms", time.Since(start).Milliseconds(),
|
|
}
|
|
|
|
sc := trace.SpanFromContext(r.Context()).SpanContext()
|
|
if sc.IsValid() {
|
|
attrs = append(attrs, "trace_id", sc.TraceID().String())
|
|
}
|
|
|
|
level := slog.LevelInfo
|
|
switch {
|
|
case rec.status >= 500:
|
|
level = slog.LevelError
|
|
case rec.status >= 400:
|
|
level = slog.LevelWarn
|
|
}
|
|
|
|
slog.Log(r.Context(), level, "http", attrs...)
|
|
})
|
|
}
|