homelab/pkg/setup/setup.go
Gonçalo Rodrigues 99ed992d98 obs: request access log middleware + Loki label enrichment (#36)
Adds two targeted observability improvements across all homelab services.

pkg/logger/access.go (new)
  HTTP access log middleware that logs one structured line per request:
    method, path, status, ms, trace_id
  The trace_id comes from the OTel span already in context (created by
  trace.Middleware which runs outside this one), so each log entry in
  Loki has a clickable link into Jaeger. Health/metrics endpoints are
  excluded to avoid noise. Level is ERROR for 5xx, WARN for 4xx, INFO
  otherwise.

pkg/setup/setup.go
  Wire the new middleware between trace.Middleware (which creates the
  span) and metrics.Middleware:
    trace → AccessMiddleware → metrics → mux
  Order matters: span must exist before AccessMiddleware reads it.

infrastructure/terraform/monitoring.tf
  Fluent Bit was shipping all container logs to Loki with a single
  static label (job=fluent-bit), making it impossible to filter logs
  by service. Added a `nest/lift` filter that flattens the kubernetes
  metadata block to top-level fields (kube_namespace_name,
  kube_container_name, …), then promoted those as Loki label_keys.
  After this change you can query:
    {kube_namespace_name="finance"} |= "trace_id"
  and LogQL will only return finance-api logs.

Co-authored-by: Gonçalo Rodrigues <guga@Goncalos-MacBook-Pro.local>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-20 15:15:06 +01:00

87 lines
1.8 KiB
Go

package setup
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"os"
"os/signal"
"syscall"
"time"
"homelab/pkg/logger"
"homelab/pkg/metrics"
"homelab/pkg/trace"
)
type Server struct {
Name string
Port string
Handler http.Handler
ReadTimeout time.Duration
WriteTimeout time.Duration
IdleTimeout time.Duration
ShutdownWait time.Duration
}
func Default(name string, handler http.Handler) *Server {
return &Server{
Name: name,
Port: env("PORT", "8080"),
Handler: handler,
ReadTimeout: 10 * time.Second,
WriteTimeout: 30 * time.Second,
IdleTimeout: 60 * time.Second,
ShutdownWait: 10 * time.Second,
}
}
func (s *Server) Run(ctx context.Context) error {
mux := http.NewServeMux()
mux.HandleFunc("/healthz", ok)
mux.HandleFunc("/readyz", ok)
mux.Handle("/metrics", metrics.Handler())
if s.Handler != nil {
mux.Handle("/", s.Handler)
}
srv := &http.Server{
Addr: fmt.Sprintf(":%s", s.Port),
Handler: trace.Middleware(logger.AccessMiddleware(metrics.Middleware(mux))),
ReadTimeout: s.ReadTimeout,
WriteTimeout: s.WriteTimeout,
IdleTimeout: s.IdleTimeout,
}
ctx, stop := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)
defer stop()
go func() {
slog.Info("listening", "addr", srv.Addr, "service", s.Name)
if err := srv.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
slog.Error("server error", "err", err)
os.Exit(1)
}
}()
<-ctx.Done()
slog.Info("shutting down", "service", s.Name)
shutdown, cancel := context.WithTimeout(context.Background(), s.ShutdownWait)
defer cancel()
return srv.Shutdown(shutdown)
}
func ok(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusOK)
}
func env(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}