Skip to content

Commit 6ce422c

Browse files
DhanrajkshirsagarDhanrajclaude
authored
Fix/traefik metric type name (#198)
Co-authored-by: Dhanraj <dhanraj@Dhanrajs-MacBook-Pro-2.local> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c7a5034 commit 6ce422c

6 files changed

Lines changed: 55 additions & 55 deletions

File tree

app/services/metrics/metrics.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ func (mp *metricspusher) Push(cred credential.Credential) error {
338338
}
339339
}
340340

341-
// ── Traefik metrics (HTTP requests / response time / error rate per app) ──
341+
// ── Traefik metrics (HTTP requests / response time / error rate per entrypoint) ──
342342

343343
traefikSets, err := mp.traefikcollector.Collect(ctx)
344344
if err != nil {
@@ -348,7 +348,7 @@ func (mp *metricspusher) Push(cred credential.Credential) error {
348348
metricSets = append(metricSets, domainmetrics.MetricSet{
349349
Type: domainmetrics.MetricTypeTraefikService,
350350
Attributes: map[string]any{
351-
"service_name": ts.Attributes.ServiceName,
351+
"entrypoint_name": ts.Attributes.EntrypointName,
352352
},
353353
Metrics: ts.Metrics,
354354
})

app/services/metrics/metrics_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,12 +235,12 @@ type MockTraefikCollector struct {
235235
mock.Mock
236236
}
237237

238-
func (m *MockTraefikCollector) Collect(ctx context.Context) ([]traefikmetrics.ServiceMetricSet, error) {
238+
func (m *MockTraefikCollector) Collect(ctx context.Context) ([]traefikmetrics.EntrypointMetricSet, error) {
239239
args := m.Called(ctx)
240240
if args.Get(0) == nil {
241241
return nil, args.Error(1)
242242
}
243-
return args.Get(0).([]traefikmetrics.ServiceMetricSet), args.Error(1)
243+
return args.Get(0).([]traefikmetrics.EntrypointMetricSet), args.Error(1)
244244
}
245245

246246
type MockDockerDiscoverer struct {
@@ -313,7 +313,7 @@ func setupTestMetricsPusher() (*metricspusher, *testMocks) {
313313
mocks.containercollector.On("Collect", mock.Anything).
314314
Return([]containermetrics.ContainerMetricSet(nil), nil)
315315
mocks.traefikcollector.On("Collect", mock.Anything).
316-
Return([]traefikmetrics.ServiceMetricSet(nil), nil)
316+
Return([]traefikmetrics.EntrypointMetricSet(nil), nil)
317317

318318
return mp, mocks
319319
}

domain/metrics/metrics.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,11 @@ type ContainerMetrics struct {
163163
UptimeSeconds int64 `json:"uptime_seconds"`
164164
}
165165

166-
// TraefikServiceMetrics holds per-service HTTP metrics scraped from Traefik's
167-
// Prometheus endpoint. These are real user-traffic signals — not health probes.
166+
// TraefikEntrypointMetrics holds per-entrypoint HTTP metrics scraped from
167+
// Traefik's Prometheus endpoint. These are real user-traffic signals captured
168+
// at the entrypoint level — not health probes.
168169
// Up is false when Traefik is unreachable or metrics are not enabled.
169-
type TraefikServiceMetrics struct {
170+
type TraefikEntrypointMetrics struct {
170171
Up bool `json:"up"`
171172
RequestsTotal int64 `json:"requests_total"`
172173
RequestsPerSecond float64 `json:"requests_per_second"`
@@ -180,8 +181,8 @@ type TraefikServiceMetrics struct {
180181
P99ResponseTimeMs float64 `json:"p99_response_time_ms"`
181182
}
182183

183-
type TraefikServiceAttributes struct {
184-
ServiceName string `json:"service_name"`
184+
type TraefikEntrypointAttributes struct {
185+
EntrypointName string `json:"entrypoint_name"`
185186
}
186187

187188
type ContainerAttributes struct {

internal/containermetrics/collector_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,14 @@ func TestResolveContainerName(t *testing.T) {
1414
expected string
1515
}{
1616
{
17-
name: "prefer coolify name label",
17+
name: "prefer compose service over coolify name",
18+
id: "12345678901234567890",
19+
names: []string{"/docker-name"},
20+
labels: map[string]string{"com.docker.compose.service": "web-service", "coolify.name": "uuid-resource-id"},
21+
expected: "web-service",
22+
},
23+
{
24+
name: "fallback to coolify name when compose not set",
1825
id: "12345678901234567890",
1926
names: []string{"/docker-name"},
2027
labels: map[string]string{"coolify.name": "my-cool-app"},

internal/dockerutil/naming.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ import (
55
)
66

77
// ResolveContainerName returns a human-readable name for a container by
8-
// prioritizing Coolify labels, then Docker Compose labels, then the Docker
9-
// container name, and finally falling back to a truncated container ID.
8+
// prioritizing Docker Compose service names, then Coolify labels, then the
9+
// Docker container name, and finally falling back to a truncated container ID.
1010
func ResolveContainerName(id string, names []string, labels map[string]string) string {
11-
if coolName, ok := labels["coolify.name"]; ok && coolName != "" {
12-
return coolName
13-
}
1411
if composeService, ok := labels["com.docker.compose.service"]; ok && composeService != "" {
1512
return composeService
1613
}
14+
if coolName, ok := labels["coolify.name"]; ok && coolName != "" {
15+
return coolName
16+
}
1717
if len(names) > 0 {
1818
return strings.TrimPrefix(names[0], "/")
1919
}

internal/traefikmetrics/collector.go

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Package traefikmetrics scrapes Traefik's Prometheus metrics endpoint and
2-
// reports per-service HTTP signals: request rate, error rate, and response
2+
// reports per-entrypoint HTTP signals: request rate, error rate, and response
33
// time percentiles. These reflect real user traffic, not synthetic probes.
44
//
55
// Prerequisites: Traefik must have metrics enabled.
@@ -24,15 +24,15 @@ import (
2424
)
2525

2626
type Collector interface {
27-
Collect(ctx context.Context) ([]ServiceMetricSet, error)
27+
Collect(ctx context.Context) ([]EntrypointMetricSet, error)
2828
}
2929

30-
type ServiceMetricSet struct {
31-
Attributes domainmetrics.TraefikServiceAttributes
32-
Metrics domainmetrics.TraefikServiceMetrics
30+
type EntrypointMetricSet struct {
31+
Attributes domainmetrics.TraefikEntrypointAttributes
32+
Metrics domainmetrics.TraefikEntrypointMetrics
3333
}
3434

35-
type lastRequests struct {
35+
type lastRequestsEntrypoint struct {
3636
total int64
3737
collectedAt time.Time
3838
}
@@ -41,7 +41,7 @@ type traefikCollector struct {
4141
endpoint string
4242
client *http.Client
4343
mu sync.Mutex
44-
lastReqs map[string]lastRequests // keyed by clean service name
44+
lastReqs map[string]lastRequestsEntrypoint // keyed by entrypoint name
4545
}
4646

4747
func New() Collector {
@@ -52,11 +52,11 @@ func NewWithEndpoint(endpoint string) Collector {
5252
return &traefikCollector{
5353
endpoint: endpoint,
5454
client: &http.Client{Timeout: 5 * time.Second},
55-
lastReqs: make(map[string]lastRequests),
55+
lastReqs: make(map[string]lastRequestsEntrypoint),
5656
}
5757
}
5858

59-
func (tc *traefikCollector) Collect(ctx context.Context) ([]ServiceMetricSet, error) {
59+
func (tc *traefikCollector) Collect(ctx context.Context) ([]EntrypointMetricSet, error) {
6060
req, err := http.NewRequestWithContext(ctx, http.MethodGet, tc.endpoint, nil)
6161
if err != nil {
6262
return nil, fmt.Errorf("build request: %w", err)
@@ -166,9 +166,9 @@ func parseLabels(s string) map[string]string {
166166
return labels
167167
}
168168

169-
// ── Per-service aggregation ───────────────────────────────────────────────────
169+
// ── Per-entrypoint aggregation ───────────────────────────────────────────────
170170

171-
type serviceAgg struct {
171+
type entrypointAgg struct {
172172
requestsTotal int64
173173
requests2xx int64
174174
requests4xx int64
@@ -179,27 +179,26 @@ type serviceAgg struct {
179179
durationCount float64 // total request count from _count samples
180180
}
181181

182-
func (tc *traefikCollector) aggregate(text string) ([]ServiceMetricSet, error) {
182+
func (tc *traefikCollector) aggregate(text string) ([]EntrypointMetricSet, error) {
183183
samples := parseSamples(text)
184184

185-
svcs := make(map[string]*serviceAgg)
186-
ensure := func(name string) *serviceAgg {
187-
if svcs[name] == nil {
188-
svcs[name] = &serviceAgg{buckets: make(map[float64]float64)}
185+
eps := make(map[string]*entrypointAgg)
186+
ensure := func(name string) *entrypointAgg {
187+
if eps[name] == nil {
188+
eps[name] = &entrypointAgg{buckets: make(map[float64]float64)}
189189
}
190-
return svcs[name]
190+
return eps[name]
191191
}
192192

193193
for _, s := range samples {
194-
svcRaw, ok := s.labels["service"]
194+
entrypoint, ok := s.labels["entrypoint"]
195195
if !ok {
196196
continue
197197
}
198-
svc := cleanName(svcRaw)
199-
agg := ensure(svc)
198+
agg := ensure(entrypoint)
200199

201200
switch s.name {
202-
case "traefik_service_requests_total":
201+
case "traefik_entrypoint_requests_total":
203202
count := int64(s.value)
204203
agg.requestsTotal += count
205204
switch {
@@ -211,7 +210,7 @@ func (tc *traefikCollector) aggregate(text string) ([]ServiceMetricSet, error) {
211210
agg.requests5xx += count
212211
}
213212

214-
case "traefik_service_request_duration_seconds_bucket":
213+
case "traefik_entrypoint_request_duration_seconds_bucket":
215214
leStr := s.labels["le"]
216215
if leStr == "+Inf" {
217216
continue // use _count instead
@@ -221,10 +220,10 @@ func (tc *traefikCollector) aggregate(text string) ([]ServiceMetricSet, error) {
221220
agg.buckets[le] += s.value
222221
}
223222

224-
case "traefik_service_request_duration_seconds_sum":
223+
case "traefik_entrypoint_request_duration_seconds_sum":
225224
agg.durationSum += s.value
226225

227-
case "traefik_service_request_duration_seconds_count":
226+
case "traefik_entrypoint_request_duration_seconds_count":
228227
agg.durationCount += s.value
229228
}
230229
}
@@ -233,9 +232,9 @@ func (tc *traefikCollector) aggregate(text string) ([]ServiceMetricSet, error) {
233232
tc.mu.Lock()
234233
defer tc.mu.Unlock()
235234

236-
var results []ServiceMetricSet
237-
for svcName, agg := range svcs {
238-
m := domainmetrics.TraefikServiceMetrics{
235+
var results []EntrypointMetricSet
236+
for epName, agg := range eps {
237+
m := domainmetrics.TraefikEntrypointMetrics{
239238
Up: true,
240239
RequestsTotal: agg.requestsTotal,
241240
Requests2xx: agg.requests2xx,
@@ -260,16 +259,16 @@ func (tc *traefikCollector) aggregate(text string) ([]ServiceMetricSet, error) {
260259
}
261260

262261
// Requests/sec via delta on cumulative counter
263-
if prev, ok := tc.lastReqs[svcName]; ok {
262+
if prev, ok := tc.lastReqs[epName]; ok {
264263
elapsed := now.Sub(prev.collectedAt).Seconds()
265264
if elapsed > 0 && agg.requestsTotal >= prev.total {
266265
m.RequestsPerSecond = float64(agg.requestsTotal-prev.total) / elapsed
267266
}
268267
}
269-
tc.lastReqs[svcName] = lastRequests{total: agg.requestsTotal, collectedAt: now}
268+
tc.lastReqs[epName] = lastRequestsEntrypoint{total: agg.requestsTotal, collectedAt: now}
270269

271-
results = append(results, ServiceMetricSet{
272-
Attributes: domainmetrics.TraefikServiceAttributes{ServiceName: svcName},
270+
results = append(results, EntrypointMetricSet{
271+
Attributes: domainmetrics.TraefikEntrypointAttributes{EntrypointName: epName},
273272
Metrics: m,
274273
})
275274
}
@@ -311,11 +310,4 @@ func pct(buckets map[float64]float64, total, p float64) float64 {
311310
return prevLe
312311
}
313312

314-
// cleanName strips the provider suffix Traefik appends to service names
315-
// e.g. "myapp@docker" → "myapp", "api@internal" → "api".
316-
func cleanName(name string) string {
317-
if idx := strings.LastIndexByte(name, '@'); idx != -1 {
318-
return name[:idx]
319-
}
320-
return name
321-
}
313+

0 commit comments

Comments
 (0)