diff --git a/infrastructure/terraform/monitoring.tf b/infrastructure/terraform/monitoring.tf index 26b44c8..8978a25 100644 --- a/infrastructure/terraform/monitoring.tf +++ b/infrastructure/terraform/monitoring.tf @@ -190,8 +190,33 @@ resource "helm_release" "fluent_bit" { config = { service = "[SERVICE]\n Daemon Off\n Log_Level info\n Parsers_File /fluent-bit/etc/parsers.conf\n HTTP_Server On\n HTTP_Listen 0.0.0.0\n HTTP_Port 2020\n Health_Check On\n" inputs = "[INPUT]\n Name tail\n Path /var/log/containers/*.log\n Exclude_Path /var/log/containers/fluent-bit-*.log\n multiline.parser docker,cri\n Tag kube.*\n Mem_Buf_Limit 50MB\n Skip_Long_Lines On\n" - filters = "[FILTER]\n Name kubernetes\n Match kube.*\n Annotations Off\n Labels On\n" - outputs = "[OUTPUT]\n Name loki\n Match *\n Host loki-gateway.monitoring.svc\n Port 80\n Labels job=fluent-bit\n" + filters = join("", [ + "[FILTER]\n", + " Name kubernetes\n", + " Match kube.*\n", + " Annotations Off\n", + " Labels On\n", + "\n", + # Lift the nested 'kubernetes' object to the top level so label_keys + # can reference flat fields like $kube_namespace_name. + "[FILTER]\n", + " Name nest\n", + " Match kube.*\n", + " Operation lift\n", + " Nested_under kubernetes\n", + " Add_prefix kube_\n", + ]) + outputs = join("", [ + "[OUTPUT]\n", + " Name loki\n", + " Match kube.*\n", + " Host loki-gateway.monitoring.svc\n", + " Port 80\n", + # Static label keeps backward compat; dynamic labels let you filter + # {namespace="finance"} or {app="api"} in Grafana/LogQL. + " Labels job=fluent-bit\n", + " label_keys $$kube_namespace_name,$$kube_container_name\n", + ]) } tolerations = [ { operator = "Exists" } diff --git a/pkg/logger/access.go b/pkg/logger/access.go new file mode 100644 index 0000000..00a77e8 --- /dev/null +++ b/pkg/logger/access.go @@ -0,0 +1,68 @@ +package logger + +import ( + "log/slog" + "net/http" + "time" + + "go.opentelemetry.io/otel/trace" +) + +// skipAccessLog lists paths that should not produce access log lines. +var skipAccessLog = map[string]bool{ + "/healthz": true, + "/readyz": true, + "/metrics": true, +} + +type statusRecorder struct { + http.ResponseWriter + status int +} + +func (r *statusRecorder) WriteHeader(code int) { + r.status = code + r.ResponseWriter.WriteHeader(code) +} + +// AccessMiddleware logs one structured line per HTTP request. +// It must run inside trace.Middleware so the OTel span is already in context. +// +// trace.Middleware → AccessMiddleware → metrics.Middleware → mux +// +// Each line includes method, path, status, latency in ms, and trace_id when +// tracing is enabled. Status ≥ 500 is logged at ERROR, 4xx at WARN, rest INFO. +func AccessMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if skipAccessLog[r.URL.Path] { + next.ServeHTTP(w, r) + return + } + + start := time.Now() + rec := &statusRecorder{ResponseWriter: w, status: http.StatusOK} + next.ServeHTTP(rec, r) + + attrs := []any{ + "method", r.Method, + "path", r.URL.Path, + "status", rec.status, + "ms", time.Since(start).Milliseconds(), + } + + sc := trace.SpanFromContext(r.Context()).SpanContext() + if sc.IsValid() { + attrs = append(attrs, "trace_id", sc.TraceID().String()) + } + + level := slog.LevelInfo + switch { + case rec.status >= 500: + level = slog.LevelError + case rec.status >= 400: + level = slog.LevelWarn + } + + slog.Log(r.Context(), level, "http", attrs...) + }) +} diff --git a/pkg/setup/setup.go b/pkg/setup/setup.go index e63563b..4d891ed 100644 --- a/pkg/setup/setup.go +++ b/pkg/setup/setup.go @@ -11,6 +11,7 @@ import ( "syscall" "time" + "homelab/pkg/logger" "homelab/pkg/metrics" "homelab/pkg/trace" ) @@ -48,7 +49,7 @@ func (s *Server) Run(ctx context.Context) error { srv := &http.Server{ Addr: fmt.Sprintf(":%s", s.Port), - Handler: trace.Middleware(metrics.Middleware(mux)), + Handler: trace.Middleware(logger.AccessMiddleware(metrics.Middleware(mux))), ReadTimeout: s.ReadTimeout, WriteTimeout: s.WriteTimeout, IdleTimeout: s.IdleTimeout,