obs: request access log middleware + Loki label enrichment (#36)

Adds two targeted observability improvements across all homelab services.

pkg/logger/access.go (new)
  HTTP access log middleware that logs one structured line per request:
    method, path, status, ms, trace_id
  The trace_id comes from the OTel span already in context (created by
  trace.Middleware which runs outside this one), so each log entry in
  Loki has a clickable link into Jaeger. Health/metrics endpoints are
  excluded to avoid noise. Level is ERROR for 5xx, WARN for 4xx, INFO
  otherwise.

pkg/setup/setup.go
  Wire the new middleware between trace.Middleware (which creates the
  span) and metrics.Middleware:
    trace → AccessMiddleware → metrics → mux
  Order matters: span must exist before AccessMiddleware reads it.

infrastructure/terraform/monitoring.tf
  Fluent Bit was shipping all container logs to Loki with a single
  static label (job=fluent-bit), making it impossible to filter logs
  by service. Added a `nest/lift` filter that flattens the kubernetes
  metadata block to top-level fields (kube_namespace_name,
  kube_container_name, …), then promoted those as Loki label_keys.
  After this change you can query:
    {kube_namespace_name="finance"} |= "trace_id"
  and LogQL will only return finance-api logs.

Co-authored-by: Gonçalo Rodrigues <guga@Goncalos-MacBook-Pro.local>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Gonçalo Rodrigues 2026-06-20 15:15:06 +01:00 committed by GitHub
parent 40c8632c7e
commit 99ed992d98
3 changed files with 97 additions and 3 deletions

View File

@ -190,8 +190,33 @@ resource "helm_release" "fluent_bit" {
config = {
service = "[SERVICE]\n Daemon Off\n Log_Level info\n Parsers_File /fluent-bit/etc/parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port 2020\n Health_Check On\n"
inputs = "[INPUT]\n Name tail\n Path /var/log/containers/*.log\n Exclude_Path /var/log/containers/fluent-bit-*.log\n multiline.parser docker,cri\n Tag kube.*\n Mem_Buf_Limit 50MB\n Skip_Long_Lines On\n"
filters = "[FILTER]\n Name kubernetes\n Match kube.*\n Annotations Off\n Labels On\n"
outputs = "[OUTPUT]\n Name loki\n Match *\n Host loki-gateway.monitoring.svc\n Port 80\n Labels job=fluent-bit\n"
filters = join("", [
"[FILTER]\n",
" Name kubernetes\n",
" Match kube.*\n",
" Annotations Off\n",
" Labels On\n",
"\n",
# Lift the nested 'kubernetes' object to the top level so label_keys
# can reference flat fields like $kube_namespace_name.
"[FILTER]\n",
" Name nest\n",
" Match kube.*\n",
" Operation lift\n",
" Nested_under kubernetes\n",
" Add_prefix kube_\n",
])
outputs = join("", [
"[OUTPUT]\n",
" Name loki\n",
" Match kube.*\n",
" Host loki-gateway.monitoring.svc\n",
" Port 80\n",
# Static label keeps backward compat; dynamic labels let you filter
# {namespace="finance"} or {app="api"} in Grafana/LogQL.
" Labels job=fluent-bit\n",
" label_keys $$kube_namespace_name,$$kube_container_name\n",
])
}
tolerations = [
{ operator = "Exists" }

68
pkg/logger/access.go Normal file
View File

@ -0,0 +1,68 @@
package logger
import (
"log/slog"
"net/http"
"time"
"go.opentelemetry.io/otel/trace"
)
// skipAccessLog lists paths that should not produce access log lines.
var skipAccessLog = map[string]bool{
"/healthz": true,
"/readyz": true,
"/metrics": true,
}
type statusRecorder struct {
http.ResponseWriter
status int
}
func (r *statusRecorder) WriteHeader(code int) {
r.status = code
r.ResponseWriter.WriteHeader(code)
}
// AccessMiddleware logs one structured line per HTTP request.
// It must run inside trace.Middleware so the OTel span is already in context.
//
// trace.Middleware → AccessMiddleware → metrics.Middleware → mux
//
// Each line includes method, path, status, latency in ms, and trace_id when
// tracing is enabled. Status ≥ 500 is logged at ERROR, 4xx at WARN, rest INFO.
func AccessMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if skipAccessLog[r.URL.Path] {
next.ServeHTTP(w, r)
return
}
start := time.Now()
rec := &statusRecorder{ResponseWriter: w, status: http.StatusOK}
next.ServeHTTP(rec, r)
attrs := []any{
"method", r.Method,
"path", r.URL.Path,
"status", rec.status,
"ms", time.Since(start).Milliseconds(),
}
sc := trace.SpanFromContext(r.Context()).SpanContext()
if sc.IsValid() {
attrs = append(attrs, "trace_id", sc.TraceID().String())
}
level := slog.LevelInfo
switch {
case rec.status >= 500:
level = slog.LevelError
case rec.status >= 400:
level = slog.LevelWarn
}
slog.Log(r.Context(), level, "http", attrs...)
})
}

View File

@ -11,6 +11,7 @@ import (
"syscall"
"time"
"homelab/pkg/logger"
"homelab/pkg/metrics"
"homelab/pkg/trace"
)
@ -48,7 +49,7 @@ func (s *Server) Run(ctx context.Context) error {
srv := &http.Server{
Addr: fmt.Sprintf(":%s", s.Port),
Handler: trace.Middleware(metrics.Middleware(mux)),
Handler: trace.Middleware(logger.AccessMiddleware(metrics.Middleware(mux))),
ReadTimeout: s.ReadTimeout,
WriteTimeout: s.WriteTimeout,
IdleTimeout: s.IdleTimeout,