From 546ab66b1a025585c198384f01b64352efccc3fd Mon Sep 17 00:00:00 2001 From: mkelvers Date: Tue, 23 Jun 2026 15:08:41 +0200 Subject: [PATCH] refactor: remove prometheus metrics subsystem --- internal/observability/db.go | 51 --- internal/observability/log.go | 2 +- internal/observability/metrics.go | 434 ------------------------- internal/observability/metrics_test.go | 99 ------ 4 files changed, 1 insertion(+), 585 deletions(-) delete mode 100644 internal/observability/db.go delete mode 100644 internal/observability/metrics.go delete mode 100644 internal/observability/metrics_test.go diff --git a/internal/observability/db.go b/internal/observability/db.go deleted file mode 100644 index ff2e398..0000000 --- a/internal/observability/db.go +++ /dev/null @@ -1,51 +0,0 @@ -package observability - -import ( - "context" - "database/sql" - "time" -) - -type instrumentedDB struct { - db interface { - ExecContext(context.Context, string, ...any) (sql.Result, error) - PrepareContext(context.Context, string) (*sql.Stmt, error) - QueryContext(context.Context, string, ...any) (*sql.Rows, error) - QueryRowContext(context.Context, string, ...any) *sql.Row - } - metrics *Metrics -} - -func InstrumentDB(db interface { - ExecContext(context.Context, string, ...any) (sql.Result, error) - PrepareContext(context.Context, string) (*sql.Stmt, error) - QueryContext(context.Context, string, ...any) (*sql.Rows, error) - QueryRowContext(context.Context, string, ...any) *sql.Row -}, metrics *Metrics) *instrumentedDB { - return &instrumentedDB{db: db, metrics: metrics} -} - -func (db *instrumentedDB) ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) { - start := time.Now() - result, err := db.db.ExecContext(ctx, query, args...) - db.metrics.ObserveDBQuery("exec", time.Since(start), err) - return result, err -} - -func (db *instrumentedDB) PrepareContext(ctx context.Context, query string) (*sql.Stmt, error) { - return db.db.PrepareContext(ctx, query) -} - -func (db *instrumentedDB) QueryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) { - start := time.Now() - rows, err := db.db.QueryContext(ctx, query, args...) - db.metrics.ObserveDBQuery("query", time.Since(start), err) - return rows, err -} - -func (db *instrumentedDB) QueryRowContext(ctx context.Context, query string, args ...any) *sql.Row { - start := time.Now() - row := db.db.QueryRowContext(ctx, query, args...) - db.metrics.ObserveDBQuery("query_row", time.Since(start), nil) - return row -} diff --git a/internal/observability/log.go b/internal/observability/log.go index ba43d74..7cdbac7 100644 --- a/internal/observability/log.go +++ b/internal/observability/log.go @@ -1,4 +1,4 @@ -// Package observability provides logging and metrics instrumentation. +// Package observability provides structured logging helpers. package observability import ( diff --git a/internal/observability/metrics.go b/internal/observability/metrics.go deleted file mode 100644 index 0419b0c..0000000 --- a/internal/observability/metrics.go +++ /dev/null @@ -1,434 +0,0 @@ -package observability - -import ( - "fmt" - "maps" - "net/http" - "sort" - "strconv" - "strings" - "sync" - "time" -) - -var defaultDurationBuckets = []float64{ - 0.005, - 0.01, - 0.025, - 0.05, - 0.1, - 0.25, - 0.5, - 1, - 2.5, - 5, - 10, -} - -type counterSample struct { - labels map[string]string - value uint64 -} - -type histogramSample struct { - labels map[string]string - buckets []uint64 - count uint64 - sum float64 -} - -type gaugeSample struct { - labels map[string]string - value float64 -} - -type counterVec struct { - mu sync.Mutex - labelNames []string - samples map[string]*counterSample -} - -type histogramVec struct { - mu sync.Mutex - labelNames []string - bounds []float64 - samples map[string]*histogramSample -} - -type gaugeVec struct { - mu sync.Mutex - labelNames []string - samples map[string]*gaugeSample -} - -type Metrics struct { - httpRequests *counterVec - httpRequestLatency *histogramVec - jikanRequests *counterVec - jikanRequestErrors *counterVec - jikanLatency *histogramVec - dbQueryLatency *histogramVec - workerTicks *counterVec - cacheOperations *counterVec - jikanCacheRows *gaugeVec - jikanCacheOldest *gaugeVec -} - -func NewMetrics() *Metrics { - return &Metrics{ - httpRequests: newCounterVec("method", "route", "status"), - httpRequestLatency: newHistogramVec(defaultDurationBuckets, "method", "route", "status"), - jikanRequests: newCounterVec("endpoint", "status"), - jikanRequestErrors: newCounterVec("endpoint", "status"), - jikanLatency: newHistogramVec(defaultDurationBuckets, "endpoint", "status"), - dbQueryLatency: newHistogramVec(defaultDurationBuckets, "operation", "result"), - workerTicks: newCounterVec("worker", "result"), - cacheOperations: newCounterVec("cache", "result"), - jikanCacheRows: newGaugeVec("state"), - jikanCacheOldest: newGaugeVec(), - } -} - -func (m *Metrics) Handler() http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") - w.WriteHeader(http.StatusOK) - if err := m.writePrometheus(w); err != nil { - WarnContext(r.Context(), "metrics_write_failed", "observability", "", nil, err) - } - }) -} - -func (m *Metrics) ObserveHTTPRequest(method string, route string, status int, duration time.Duration) { - statusLabel := strconv.Itoa(status) - m.httpRequests.Inc(method, route, statusLabel) - m.httpRequestLatency.Observe(duration.Seconds(), method, route, statusLabel) -} - -func (m *Metrics) ObserveJikanRequest(endpoint string, status int, duration time.Duration, err error) { - statusLabel := strconv.Itoa(status) - m.jikanRequests.Inc(endpoint, statusLabel) - m.jikanLatency.Observe(duration.Seconds(), endpoint, statusLabel) - if err != nil || status >= http.StatusBadRequest { - m.jikanRequestErrors.Inc(endpoint, statusLabel) - } -} - -func (m *Metrics) ObserveDBQuery(operation string, duration time.Duration, err error) { - result := "success" - if err != nil { - result = "error" - } - m.dbQueryLatency.Observe(duration.Seconds(), operation, result) -} - -func (m *Metrics) ObserveWorkerTick(worker string, err error) { - if err != nil { - m.workerTicks.Inc(worker, "failure") - return - } - m.workerTicks.Inc(worker, "success") -} - -func (m *Metrics) ObserveCache(cache string, result string) { - m.cacheOperations.Inc(cache, result) -} - -func (m *Metrics) ObserveJikanCacheStats(totalRows int64, expiredRows int64, oldestExpiresAtSeconds int64) { - m.jikanCacheRows.Set(float64(totalRows), "total") - m.jikanCacheRows.Set(float64(expiredRows), "expired") - m.jikanCacheOldest.Set(float64(oldestExpiresAtSeconds)) -} - -func (m *Metrics) writePrometheus(w http.ResponseWriter) error { - if err := writeCounterMetric(w, "mal_http_requests_total", "Total HTTP requests by method, route, and status.", m.httpRequests.snapshot()); err != nil { - return err - } - if err := writeHistogramMetric(w, "mal_http_request_duration_seconds", "HTTP request latency in seconds.", m.httpRequestLatency.snapshot(), m.httpRequestLatency.bounds); err != nil { - return err - } - if err := writeCounterMetric(w, "mal_jikan_upstream_requests_total", "Total upstream Jikan requests by endpoint and status.", m.jikanRequests.snapshot()); err != nil { - return err - } - if err := writeCounterMetric(w, "mal_jikan_upstream_errors_total", "Total upstream Jikan errors by endpoint and status.", m.jikanRequestErrors.snapshot()); err != nil { - return err - } - if err := writeHistogramMetric(w, "mal_jikan_upstream_request_duration_seconds", "Upstream Jikan request latency in seconds.", m.jikanLatency.snapshot(), m.jikanLatency.bounds); err != nil { - return err - } - if err := writeHistogramMetric(w, "mal_db_query_duration_seconds", "Database query latency in seconds.", m.dbQueryLatency.snapshot(), m.dbQueryLatency.bounds); err != nil { - return err - } - if err := writeCounterMetric(w, "mal_worker_ticks_total", "Total background worker ticks by worker and result.", m.workerTicks.snapshot()); err != nil { - return err - } - if err := writeCounterMetric(w, "mal_cache_operations_total", "Total cache hits and misses by cache name.", m.cacheOperations.snapshot()); err != nil { - return err - } - if err := writeGaugeMetric(w, "mal_jikan_cache_rows", "Current jikan_cache row count by state.", m.jikanCacheRows.snapshot()); err != nil { - return err - } - return writeGaugeMetric(w, "mal_jikan_cache_oldest_expires_at_seconds", "Unix timestamp for the oldest jikan_cache expires_at value, or 0 when empty.", m.jikanCacheOldest.snapshot()) -} - -func newCounterVec(labelNames ...string) *counterVec { - return &counterVec{ - labelNames: append([]string(nil), labelNames...), - samples: make(map[string]*counterSample), - } -} - -func (c *counterVec) Inc(labelValues ...string) { - c.mu.Lock() - defer c.mu.Unlock() - - key, labels := buildLabelKey(c.labelNames, labelValues) - if labels == nil { - return - } - sample, ok := c.samples[key] - if !ok { - sample = &counterSample{labels: labels} - c.samples[key] = sample - } - sample.value++ -} - -func (c *counterVec) snapshot() []counterSample { - c.mu.Lock() - defer c.mu.Unlock() - - keys := sortedCounterSampleKeys(c.samples) - out := make([]counterSample, 0, len(keys)) - for _, key := range keys { - sample := c.samples[key] - out = append(out, counterSample{ - labels: copyLabels(sample.labels), - value: sample.value, - }) - } - return out -} - -func newHistogramVec(bounds []float64, labelNames ...string) *histogramVec { - return &histogramVec{ - labelNames: append([]string(nil), labelNames...), - bounds: append([]float64(nil), bounds...), - samples: make(map[string]*histogramSample), - } -} - -func (h *histogramVec) Observe(value float64, labelValues ...string) { - h.mu.Lock() - defer h.mu.Unlock() - - key, labels := buildLabelKey(h.labelNames, labelValues) - if labels == nil { - return - } - sample, ok := h.samples[key] - if !ok { - sample = &histogramSample{ - labels: labels, - buckets: make([]uint64, len(h.bounds)), - } - h.samples[key] = sample - } - - sample.count++ - sample.sum += value - for idx, bound := range h.bounds { - if value <= bound { - sample.buckets[idx]++ - } - } -} - -func (h *histogramVec) snapshot() []histogramSample { - h.mu.Lock() - defer h.mu.Unlock() - - keys := sortedHistogramSampleKeys(h.samples) - out := make([]histogramSample, 0, len(keys)) - for _, key := range keys { - sample := h.samples[key] - buckets := make([]uint64, len(sample.buckets)) - copy(buckets, sample.buckets) - out = append(out, histogramSample{ - labels: copyLabels(sample.labels), - buckets: buckets, - count: sample.count, - sum: sample.sum, - }) - } - return out -} - -func newGaugeVec(labelNames ...string) *gaugeVec { - return &gaugeVec{ - labelNames: append([]string(nil), labelNames...), - samples: make(map[string]*gaugeSample), - } -} - -func (g *gaugeVec) Set(value float64, labelValues ...string) { - g.mu.Lock() - defer g.mu.Unlock() - - key, labels := buildLabelKey(g.labelNames, labelValues) - if labels == nil { - return - } - sample, ok := g.samples[key] - if !ok { - sample = &gaugeSample{labels: labels} - g.samples[key] = sample - } - sample.value = value -} - -func (g *gaugeVec) snapshot() []gaugeSample { - g.mu.Lock() - defer g.mu.Unlock() - - keys := sortedGaugeSampleKeys(g.samples) - out := make([]gaugeSample, 0, len(keys)) - for _, key := range keys { - sample := g.samples[key] - out = append(out, gaugeSample{ - labels: copyLabels(sample.labels), - value: sample.value, - }) - } - return out -} - -func buildLabelKey(labelNames []string, labelValues []string) (string, map[string]string) { - if len(labelNames) != len(labelValues) { - return "", nil - } - - labels := make(map[string]string, len(labelNames)) - parts := make([]string, 0, len(labelNames)*2) - for idx, name := range labelNames { - value := labelValues[idx] - labels[name] = value - parts = append(parts, name, value) - } - return strings.Join(parts, "\xff"), labels -} - -func copyLabels(labels map[string]string) map[string]string { - out := make(map[string]string, len(labels)) - maps.Copy(out, labels) - return out -} - -func sortedCounterSampleKeys(samples map[string]*counterSample) []string { - keys := make([]string, 0, len(samples)) - for key := range samples { - keys = append(keys, key) - } - sort.Strings(keys) - return keys -} - -func sortedHistogramSampleKeys(samples map[string]*histogramSample) []string { - keys := make([]string, 0, len(samples)) - for key := range samples { - keys = append(keys, key) - } - sort.Strings(keys) - return keys -} - -func sortedGaugeSampleKeys(samples map[string]*gaugeSample) []string { - keys := make([]string, 0, len(samples)) - for key := range samples { - keys = append(keys, key) - } - sort.Strings(keys) - return keys -} - -func writeCounterMetric(w http.ResponseWriter, name string, help string, samples []counterSample) error { - if _, err := fmt.Fprintf(w, "# HELP %s %s\n", name, help); err != nil { - return err - } - if _, err := fmt.Fprintf(w, "# TYPE %s counter\n", name); err != nil { - return err - } - for _, sample := range samples { - if _, err := fmt.Fprintf(w, "%s%s %d\n", name, formatLabels(sample.labels), sample.value); err != nil { - return err - } - } - return nil -} - -func writeGaugeMetric(w http.ResponseWriter, name string, help string, samples []gaugeSample) error { - if _, err := fmt.Fprintf(w, "# HELP %s %s\n", name, help); err != nil { - return err - } - if _, err := fmt.Fprintf(w, "# TYPE %s gauge\n", name); err != nil { - return err - } - for _, sample := range samples { - if _, err := fmt.Fprintf(w, "%s%s %s\n", name, formatLabels(sample.labels), formatFloat(sample.value)); err != nil { - return err - } - } - return nil -} - -func writeHistogramMetric(w http.ResponseWriter, name string, help string, samples []histogramSample, bounds []float64) error { - if _, err := fmt.Fprintf(w, "# HELP %s %s\n", name, help); err != nil { - return err - } - if _, err := fmt.Fprintf(w, "# TYPE %s histogram\n", name); err != nil { - return err - } - for _, sample := range samples { - for idx, bound := range bounds { - labels := copyLabels(sample.labels) - labels["le"] = formatFloat(bound) - if _, err := fmt.Fprintf(w, "%s_bucket%s %d\n", name, formatLabels(labels), sample.buckets[idx]); err != nil { - return err - } - } - labels := copyLabels(sample.labels) - labels["le"] = "+Inf" - if _, err := fmt.Fprintf(w, "%s_bucket%s %d\n", name, formatLabels(labels), sample.count); err != nil { - return err - } - if _, err := fmt.Fprintf(w, "%s_sum%s %s\n", name, formatLabels(sample.labels), formatFloat(sample.sum)); err != nil { - return err - } - if _, err := fmt.Fprintf(w, "%s_count%s %d\n", name, formatLabels(sample.labels), sample.count); err != nil { - return err - } - } - return nil -} - -func formatLabels(labels map[string]string) string { - if len(labels) == 0 { - return "" - } - - keys := make([]string, 0, len(labels)) - for key := range labels { - keys = append(keys, key) - } - sort.Strings(keys) - - parts := make([]string, 0, len(keys)) - for _, key := range keys { - parts = append(parts, fmt.Sprintf(`%s=%q`, key, labels[key])) - } - return "{" + strings.Join(parts, ",") + "}" -} - -func formatFloat(value float64) string { - return strconv.FormatFloat(value, 'f', -1, 64) -} diff --git a/internal/observability/metrics_test.go b/internal/observability/metrics_test.go deleted file mode 100644 index 6157c64..0000000 --- a/internal/observability/metrics_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package observability - -import ( - "context" - "database/sql" - "io" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" - - _ "github.com/mattn/go-sqlite3" -) - -func TestMetricsHandlerRendersPrometheusFamilies(t *testing.T) { - metrics := NewMetrics() - metrics.ObserveHTTPRequest(http.MethodGet, "/anime/:id", http.StatusOK, 125*time.Millisecond) - metrics.ObserveJikanRequest("/anime/{id}", http.StatusTooManyRequests, 800*time.Millisecond, assertErr{}) - metrics.ObserveDBQuery("query", 25*time.Millisecond, nil) - metrics.ObserveWorkerTick("episodes_availability", nil) - metrics.ObserveCache("jikan", "hit") - metrics.ObserveCache("episode_availability", "miss") - metrics.ObserveJikanCacheStats(12, 3, 1770000000) - - req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/metrics", nil) - rec := httptest.NewRecorder() - metrics.Handler().ServeHTTP(rec, req) - - body, err := io.ReadAll(rec.Result().Body) - if err != nil { - t.Fatalf("read body: %v", err) - } - - text := string(body) - assertContains(t, text, `mal_http_requests_total{method="GET",route="/anime/:id",status="200"} 1`) - assertContains(t, text, `mal_http_request_duration_seconds_count{method="GET",route="/anime/:id",status="200"} 1`) - assertContains(t, text, `mal_jikan_upstream_requests_total{endpoint="/anime/{id}",status="429"} 1`) - assertContains(t, text, `mal_jikan_upstream_errors_total{endpoint="/anime/{id}",status="429"} 1`) - assertContains(t, text, `mal_db_query_duration_seconds_count{operation="query",result="success"} 1`) - assertContains(t, text, `mal_worker_ticks_total{result="success",worker="episodes_availability"} 1`) - assertContains(t, text, `mal_cache_operations_total{cache="episode_availability",result="miss"} 1`) - assertContains(t, text, `mal_jikan_cache_rows{state="total"} 12`) - assertContains(t, text, `mal_jikan_cache_rows{state="expired"} 3`) - assertContains(t, text, `mal_jikan_cache_oldest_expires_at_seconds 1770000000`) -} - -func TestInstrumentDBRecordsQueryLatency(t *testing.T) { - metrics := NewMetrics() - sqlDB, err := sql.Open("sqlite3", ":memory:") - if err != nil { - t.Fatalf("open sqlite: %v", err) - } - defer func() { - if err := sqlDB.Close(); err != nil { - t.Errorf("close sqlite: %v", err) - } - }() - - instrumented := InstrumentDB(sqlDB, metrics) - if _, err := instrumented.ExecContext(context.Background(), `CREATE TABLE item (id INTEGER PRIMARY KEY)`); err != nil { - t.Fatalf("create table: %v", err) - } - if _, err := instrumented.QueryContext(context.Background(), `SELECT id FROM item`); err != nil { - t.Fatalf("query table: %v", err) - } - if _, err := instrumented.QueryContext(context.Background(), `SELECT id FROM missing_table`); err == nil { - t.Fatal("expected missing table query to fail") - } - - samples := metrics.dbQueryLatency.snapshot() - if len(samples) != 3 { - t.Fatalf("db samples = %d, want 3", len(samples)) - } - assertHistogramSample(t, samples, "exec", "success") - assertHistogramSample(t, samples, "query", "success") - assertHistogramSample(t, samples, "query", "error") -} - -type assertErr struct{} - -func (assertErr) Error() string { return "boom" } - -func assertContains(t *testing.T, text string, want string) { - t.Helper() - if !strings.Contains(text, want) { - t.Fatalf("missing metric line %q in:\n%s", want, text) - } -} - -func assertHistogramSample(t *testing.T, samples []histogramSample, operation string, result string) { - t.Helper() - for _, sample := range samples { - if sample.labels["operation"] == operation && sample.labels["result"] == result && sample.count == 1 { - return - } - } - t.Fatalf("missing db histogram sample operation=%q result=%q in %#v", operation, result, samples) -}