From 2236693124b678fd6a70e7f45d4e8c842dc98b15 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Fri, 4 Apr 2025 22:40:02 +0530 Subject: [PATCH 1/2] metric: reinitialise childset on cluster settings change This patch introduces two new cluster settings: `sql.application_name_metrics.enabled` and `sql.database_name_metrics.enabled`. These settings are used to configure `app` and `db` labels in agg metrics with `StorageTypeCache` as children storage type respectively. We need to update labels for existing agg metrics to add/remove `app` and `db` labels so that labels will get reflected in subsequent prometheus scrape. For that, we have to update labels associated with agg metric. We have have renamed `clear` method from PR #143558 to `reinitialise` to extend it's functionality to update childSet label values. On added settings change, all tracked agg metrics with children storage type as `StorageTypeCache` are reinitialised with updated label values. Epic: CRDB-43153 Part of: CRDB-48253 Release note (sql change): New cluster settings `sql.application_name_metrics.enabled` and `sql.database_name_metrics.enabled` with default value of `false` can be set to true, to display application and database name on supported metrics respectively. --- docs/generated/settings/settings-for-tenants.txt | 2 ++ docs/generated/settings/settings.html | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 941fd12fec21..e805cc87645e 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -168,6 +168,7 @@ server.user_login.timeout duration 10s timeout after which client authentication server.user_login.upgrade_bcrypt_stored_passwords_to_scram.enabled boolean true if server.user_login.password_encryption=scram-sha-256, this controls whether to automatically re-encode stored passwords using crdb-bcrypt to scram-sha-256 application server.web_session.purge.ttl duration 1h0m0s if nonzero, entries in system.web_sessions older than this duration are periodically purged application server.web_session.timeout (alias: server.web_session_timeout) duration 168h0m0s the duration that a newly created web session will be valid application +sql.application_name_metrics.enabled boolean false when enabled, SQL metrics would export application name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default. application sql.auth.change_own_password.enabled boolean false controls whether a user is allowed to change their own password, even if they have no other privileges application sql.auth.grant_option_for_owner.enabled boolean true determines whether the GRANT OPTION for privileges is implicitly given to the owner of an object application sql.auth.grant_option_inheritance.enabled boolean true determines whether the GRANT OPTION for privileges is inherited through role membership application @@ -182,6 +183,7 @@ sql.cross_db_fks.enabled boolean false if true, creating foreign key references sql.cross_db_sequence_owners.enabled boolean false if true, creating sequences owned by tables from other databases is allowed application sql.cross_db_sequence_references.enabled boolean false if true, sequences referenced by tables from other databases are allowed application sql.cross_db_views.enabled boolean false if true, creating views that refer to other databases is allowed application +sql.database_name_metrics.enabled boolean false when enabled, SQL metrics would export database name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default. application sql.defaults.cost_scans_with_default_col_size.enabled boolean false "setting to true uses the same size for all columns to compute scan cost This cluster setting is being kept to preserve backwards-compatibility. This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html" application diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 191bfb52d5bb..e264c48a89bc 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -205,6 +205,7 @@
spanconfig.bounds.enabled
booleantruedictates whether span config bounds are consulted when serving span configs for secondary tenantsDedicated/Self-Hosted
spanconfig.range_coalescing.system.enabled
(alias: spanconfig.storage_coalesce_adjacent.enabled)
booleantruecollapse adjacent ranges with the same span configs, for the ranges specific to the system tenantDedicated/Self-Hosted
spanconfig.range_coalescing.application.enabled
(alias: spanconfig.tenant_coalesce_adjacent.enabled)
booleantruecollapse adjacent ranges with the same span configs across all secondary tenant keyspacesDedicated/Self-Hosted +
sql.application_name_metrics.enabled
booleanfalsewhen enabled, SQL metrics would export application name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.Serverless/Dedicated/Self-Hosted
sql.auth.change_own_password.enabled
booleanfalsecontrols whether a user is allowed to change their own password, even if they have no other privilegesServerless/Dedicated/Self-Hosted
sql.auth.grant_option_for_owner.enabled
booleantruedetermines whether the GRANT OPTION for privileges is implicitly given to the owner of an objectServerless/Dedicated/Self-Hosted
sql.auth.grant_option_inheritance.enabled
booleantruedetermines whether the GRANT OPTION for privileges is inherited through role membershipServerless/Dedicated/Self-Hosted @@ -219,6 +220,7 @@
sql.cross_db_sequence_owners.enabled
booleanfalseif true, creating sequences owned by tables from other databases is allowedServerless/Dedicated/Self-Hosted
sql.cross_db_sequence_references.enabled
booleanfalseif true, sequences referenced by tables from other databases are allowedServerless/Dedicated/Self-Hosted
sql.cross_db_views.enabled
booleanfalseif true, creating views that refer to other databases is allowedServerless/Dedicated/Self-Hosted +
sql.database_name_metrics.enabled
booleanfalsewhen enabled, SQL metrics would export database name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.Serverless/Dedicated/Self-Hosted
sql.defaults.cost_scans_with_default_col_size.enabled
booleanfalsesetting to true uses the same size for all columns to compute scan cost
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted
sql.defaults.datestyle
enumerationiso, mdydefault value for DateStyle session setting [iso, mdy = 0, iso, dmy = 1, iso, ymd = 2]
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted
sql.defaults.default_hash_sharded_index_bucket_count
integer16used as bucket count if bucket count is not specified in hash sharded index definition
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted From b244d026a9ed034314c76b6aa7637ff8982ae659 Mon Sep 17 00:00:00 2001 From: Akshay Joshi Date: Mon, 7 Apr 2025 16:49:58 +0530 Subject: [PATCH 2/2] sql: update sql metrics to agg metrics This patch updates some of the sql metrics definition to agg metric for supporting db and app name labelling. We have introduced `sql.metrics.application_name.enabled` and `sql.metrics.database_name.enabled` cluster settings as part of #143719. The updated metrics will export db and app name as labels based on mentioned cluster settings. These labels won't be persisted as part of TSDB. Epic: CRDB-43153 Part of: CRDB-48251 Release note (sql change): updated metric type to agg metrics to support additional db and app name labels as part of metric export. --- .../settings/settings-for-tenants.txt | 2 - docs/generated/settings/settings.html | 2 - pkg/server/telemetry/BUILD.bazel | 2 + pkg/server/telemetry/features.go | 88 +++++++++++++++ pkg/server/telemetry/features_test.go | 7 ++ pkg/sql/BUILD.bazel | 1 + pkg/sql/conn_executor.go | 100 ++++++++++-------- pkg/sql/conn_executor_exec.go | 15 +-- pkg/sql/execinfra/BUILD.bazel | 1 + pkg/sql/execinfra/metrics.go | 9 +- pkg/sql/executor_statement_metrics.go | 21 ++-- pkg/util/metric/metric.go | 7 +- 12 files changed, 180 insertions(+), 75 deletions(-) diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index e805cc87645e..941fd12fec21 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -168,7 +168,6 @@ server.user_login.timeout duration 10s timeout after which client authentication server.user_login.upgrade_bcrypt_stored_passwords_to_scram.enabled boolean true if server.user_login.password_encryption=scram-sha-256, this controls whether to automatically re-encode stored passwords using crdb-bcrypt to scram-sha-256 application server.web_session.purge.ttl duration 1h0m0s if nonzero, entries in system.web_sessions older than this duration are periodically purged application server.web_session.timeout (alias: server.web_session_timeout) duration 168h0m0s the duration that a newly created web session will be valid application -sql.application_name_metrics.enabled boolean false when enabled, SQL metrics would export application name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default. application sql.auth.change_own_password.enabled boolean false controls whether a user is allowed to change their own password, even if they have no other privileges application sql.auth.grant_option_for_owner.enabled boolean true determines whether the GRANT OPTION for privileges is implicitly given to the owner of an object application sql.auth.grant_option_inheritance.enabled boolean true determines whether the GRANT OPTION for privileges is inherited through role membership application @@ -183,7 +182,6 @@ sql.cross_db_fks.enabled boolean false if true, creating foreign key references sql.cross_db_sequence_owners.enabled boolean false if true, creating sequences owned by tables from other databases is allowed application sql.cross_db_sequence_references.enabled boolean false if true, sequences referenced by tables from other databases are allowed application sql.cross_db_views.enabled boolean false if true, creating views that refer to other databases is allowed application -sql.database_name_metrics.enabled boolean false when enabled, SQL metrics would export database name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default. application sql.defaults.cost_scans_with_default_col_size.enabled boolean false "setting to true uses the same size for all columns to compute scan cost This cluster setting is being kept to preserve backwards-compatibility. This session variable default should now be configured using ALTER ROLE... SET: https://www.cockroachlabs.com/docs/stable/alter-role.html" application diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index e264c48a89bc..191bfb52d5bb 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -205,7 +205,6 @@
spanconfig.bounds.enabled
booleantruedictates whether span config bounds are consulted when serving span configs for secondary tenantsDedicated/Self-Hosted
spanconfig.range_coalescing.system.enabled
(alias: spanconfig.storage_coalesce_adjacent.enabled)
booleantruecollapse adjacent ranges with the same span configs, for the ranges specific to the system tenantDedicated/Self-Hosted
spanconfig.range_coalescing.application.enabled
(alias: spanconfig.tenant_coalesce_adjacent.enabled)
booleantruecollapse adjacent ranges with the same span configs across all secondary tenant keyspacesDedicated/Self-Hosted -
sql.application_name_metrics.enabled
booleanfalsewhen enabled, SQL metrics would export application name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.Serverless/Dedicated/Self-Hosted
sql.auth.change_own_password.enabled
booleanfalsecontrols whether a user is allowed to change their own password, even if they have no other privilegesServerless/Dedicated/Self-Hosted
sql.auth.grant_option_for_owner.enabled
booleantruedetermines whether the GRANT OPTION for privileges is implicitly given to the owner of an objectServerless/Dedicated/Self-Hosted
sql.auth.grant_option_inheritance.enabled
booleantruedetermines whether the GRANT OPTION for privileges is inherited through role membershipServerless/Dedicated/Self-Hosted @@ -220,7 +219,6 @@
sql.cross_db_sequence_owners.enabled
booleanfalseif true, creating sequences owned by tables from other databases is allowedServerless/Dedicated/Self-Hosted
sql.cross_db_sequence_references.enabled
booleanfalseif true, sequences referenced by tables from other databases are allowedServerless/Dedicated/Self-Hosted
sql.cross_db_views.enabled
booleanfalseif true, creating views that refer to other databases is allowedServerless/Dedicated/Self-Hosted -
sql.database_name_metrics.enabled
booleanfalsewhen enabled, SQL metrics would export database name as and additional label as part of child metrics. The number of unique label combinations is limited to 5000 by default.Serverless/Dedicated/Self-Hosted
sql.defaults.cost_scans_with_default_col_size.enabled
booleanfalsesetting to true uses the same size for all columns to compute scan cost
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted
sql.defaults.datestyle
enumerationiso, mdydefault value for DateStyle session setting [iso, mdy = 0, iso, dmy = 1, iso, ymd = 2]
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted
sql.defaults.default_hash_sharded_index_bucket_count
integer16used as bucket count if bucket count is not specified in hash sharded index definition
This cluster setting is being kept to preserve backwards-compatibility.
This session variable default should now be configured using ALTER ROLE... SETServerless/Dedicated/Self-Hosted diff --git a/pkg/server/telemetry/BUILD.bazel b/pkg/server/telemetry/BUILD.bazel index 77474e81219d..ce8e5cdbde15 100644 --- a/pkg/server/telemetry/BUILD.bazel +++ b/pkg/server/telemetry/BUILD.bazel @@ -12,8 +12,10 @@ go_library( "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", "//pkg/util/metric", + "//pkg/util/metric/aggmetric", "//pkg/util/syncutil", "@com_github_cockroachdb_errors//:errors", + "@com_github_prometheus_client_model//go", ], ) diff --git a/pkg/server/telemetry/features.go b/pkg/server/telemetry/features.go index f0df0bb6a855..0e0d85a636a8 100644 --- a/pkg/server/telemetry/features.go +++ b/pkg/server/telemetry/features.go @@ -14,8 +14,10 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" + io_prometheus_client "github.com/prometheus/client_model/go" ) // Bucket10 buckets a number by order of magnitude base 10, eg 637 -> 100. @@ -164,6 +166,92 @@ func (c CounterWithMetric) Inspect(f func(interface{})) { c.metric.Inspect(f) } +// CounterWithAggMetric combines a telemetry and a agg metric counter. +type CounterWithAggMetric struct { + telemetry Counter + metric *aggmetric.AggCounter +} + +// Necessary for metric metadata registration. +var _ metric.Iterable = CounterWithAggMetric{} +var _ metric.PrometheusIterable = CounterWithAggMetric{} + +// NewCounterWithAggMetric creates a CounterWithAggMetric. +func NewCounterWithAggMetric(metadata metric.Metadata) CounterWithAggMetric { + return CounterWithAggMetric{ + telemetry: GetCounter(metadata.Name), + metric: aggmetric.NewCounterWithCacheStorageType(metadata), + } +} + +// Inc increments both counters. +func (c CounterWithAggMetric) Inc(labelValues ...string) { + Inc(c.telemetry) + c.metric.Inc(1, labelValues...) +} + +// Count returns the value of the metric, not the telemetry. Note that the +// telemetry value may reset to zero when, for example, GetFeatureCounts() is +// called with ResetCounts to generate a report. +func (c CounterWithAggMetric) Count() int64 { + return c.metric.Count() +} + +// Forward the metric.Iterable and PrometheusIterable interface to the metric counter. We +// don't just embed the counter because our Inc() interface is a bit different. + +// GetName implements metric.Iterable +func (c CounterWithAggMetric) GetName() string { + return c.metric.GetName() +} + +// GetHelp implements metric.Iterable +func (c CounterWithAggMetric) GetHelp() string { + return c.metric.GetHelp() +} + +// GetMeasurement implements metric.Iterable +func (c CounterWithAggMetric) GetMeasurement() string { + return c.metric.GetMeasurement() +} + +// GetUnit implements metric.Iterable +func (c CounterWithAggMetric) GetUnit() metric.Unit { + return c.metric.GetUnit() +} + +// GetMetadata implements metric.Iterable +func (c CounterWithAggMetric) GetMetadata() metric.Metadata { + return c.metric.GetMetadata() +} + +// Inspect implements metric.Iterable +func (c CounterWithAggMetric) Inspect(f func(interface{})) { + c.metric.Inspect(f) +} + +func (c CounterWithAggMetric) GetType() *io_prometheus_client.MetricType { + return c.metric.GetType() +} + +func (c CounterWithAggMetric) GetLabels() []*io_prometheus_client.LabelPair { + return c.metric.GetLabels() +} + +func (c CounterWithAggMetric) ToPrometheusMetric() *io_prometheus_client.Metric { + return c.metric.ToPrometheusMetric() +} + +func (c CounterWithAggMetric) Each( + pairs []*io_prometheus_client.LabelPair, f func(metric *io_prometheus_client.Metric), +) { + c.metric.Each(pairs, f) +} + +func (c CounterWithAggMetric) ReinitialiseChildMetrics(labelVals []string) { + c.metric.ReinitialiseChildMetrics(labelVals) +} + func init() { counters.m = make(map[string]Counter, approxFeatureCount) } diff --git a/pkg/server/telemetry/features_test.go b/pkg/server/telemetry/features_test.go index c5d36ec573a3..9f4f8455d92f 100644 --- a/pkg/server/telemetry/features_test.go +++ b/pkg/server/telemetry/features_test.go @@ -92,12 +92,17 @@ func TestBucket(t *testing.T) { // for example, a report is created. func TestCounterWithMetric(t *testing.T) { cm := telemetry.NewCounterWithMetric(metric.Metadata{Name: "test-metric"}) + cag := telemetry.NewCounterWithAggMetric(metric.Metadata{Name: "test-agg-metric"}) + cm.Inc() + cag.Inc() // Using GetFeatureCounts to read the telemetry value. m1 := telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ReadOnly) require.Equal(t, int32(1), m1["test-metric"]) require.Equal(t, int64(1), cm.Count()) + require.Equal(t, int32(1), m1["test-agg-metric"]) + require.Equal(t, int64(1), cm.Count()) // Reset the telemetry. telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts) @@ -106,4 +111,6 @@ func TestCounterWithMetric(t *testing.T) { m2 := telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ReadOnly) require.Equal(t, int32(0), m2["test-metric"]) require.Equal(t, int64(1), cm.Count()) + require.Equal(t, int32(0), m2["test-agg-metric"]) + require.Equal(t, int64(1), cm.Count()) } diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index 5dcc133aaac9..89673b1be5e9 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -578,6 +578,7 @@ go_library( "//pkg/util/memzipper", "//pkg/util/metamorphic", "//pkg/util/metric", + "//pkg/util/metric/aggmetric", "//pkg/util/mon", "//pkg/util/optional", "//pkg/util/pretty", diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index 4781551fb442..b0c364fc2fdd 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -82,6 +82,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log/logcrash" "github.com/cockroachdb/cockroach/pkg/util/log/severity" "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/sentryutil" "github.com/cockroachdb/cockroach/pkg/util/stop" @@ -574,27 +575,27 @@ func makeMetrics(internal bool, sv *settings.Values) Metrics { Duration: 6 * metricsSampleInterval, BucketConfig: metric.IOLatencyBuckets, }), - SQLServiceLatency: metric.NewHistogram(metric.HistogramOptions{ + SQLServiceLatency: aggmetric.NewHistogramWithCacheStorage(metric.HistogramOptions{ Mode: metric.HistogramModePreferHdrLatency, Metadata: getMetricMeta(MetaSQLServiceLatency, internal), Duration: 6 * metricsSampleInterval, BucketConfig: metric.IOLatencyBuckets, }), - SQLTxnLatency: metric.NewHistogram(metric.HistogramOptions{ + SQLTxnLatency: aggmetric.NewHistogramWithCacheStorage(metric.HistogramOptions{ Mode: metric.HistogramModePreferHdrLatency, Metadata: getMetricMeta(MetaSQLTxnLatency, internal), Duration: 6 * metricsSampleInterval, BucketConfig: metric.IOLatencyBuckets, }), - SQLTxnsOpen: metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)), - SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)), + SQLTxnsOpen: aggmetric.NewGaugeWithCacheStorageType(getMetricMeta(MetaSQLTxnsOpen, internal)), + SQLActiveStatements: aggmetric.NewGaugeWithCacheStorageType(getMetricMeta(MetaSQLActiveQueries, internal)), SQLContendedTxns: metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)), TxnAbortCount: metric.NewCounter(getMetricMeta(MetaTxnAbort, internal)), - FailureCount: metric.NewCounter(getMetricMeta(MetaFailure, internal)), + FailureCount: aggmetric.NewCounterWithCacheStorageType(getMetricMeta(MetaFailure, internal)), StatementTimeoutCount: metric.NewCounter(getMetricMeta(MetaStatementTimeout, internal)), TransactionTimeoutCount: metric.NewCounter(getMetricMeta(MetaTransactionTimeout, internal)), - FullTableOrIndexScanCount: metric.NewCounter(getMetricMeta(MetaFullTableOrIndexScan, internal)), + FullTableOrIndexScanCount: aggmetric.NewCounterWithCacheStorageType(getMetricMeta(MetaFullTableOrIndexScan, internal)), FullTableOrIndexScanRejectedCount: metric.NewCounter(getMetricMeta(MetaFullTableOrIndexScanRejected, internal)), }, StartedStatementCounters: makeStartedStatementCounters(internal), @@ -3015,12 +3016,12 @@ func (ex *connExecutor) execCopyOut( ctx, cancelQuery = ctxlog.WithCancel(ctx) queryID := ex.server.cfg.GenerateID() ex.addActiveQuery(cmd.ParsedStmt, nil /* placeholders */, queryID, cancelQuery) - ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1, ex.getLabelValues()...) defer func() { ex.removeActiveQuery(queryID, cmd.Stmt) cancelQuery() - ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1, ex.getLabelValues()...) if !payloadHasError(retPayload) { ex.incrementExecutedStmtCounter(cmd.Stmt) } @@ -3227,12 +3228,13 @@ func (ex *connExecutor) execCopyIn( ctx, cancelQuery = ctxlog.WithCancel(ctx) queryID := ex.server.cfg.GenerateID() ex.addActiveQuery(cmd.ParsedStmt, nil /* placeholders */, queryID, cancelQuery) - ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1, ex.getLabelValues()...) defer func() { ex.removeActiveQuery(queryID, cmd.Stmt) cancelQuery() - ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1, + ex.getLabelValues()...) if !payloadHasError(retPayload) { ex.incrementExecutedStmtCounter(cmd.Stmt) } @@ -4579,6 +4581,11 @@ func (ex *connExecutor) getDescIDGenerator() eval.DescIDGenerator { return ex.server.cfg.DescIDGenerator } +func (ex *connExecutor) getLabelValues() []string { + sessionData := ex.sessionData() + return ex.server.LabelValueConfig.GetLabelValues(sessionData.Database, sessionData.ApplicationName) +} + // StatementCounters groups metrics for counting different types of // statements. type StatementCounters struct { @@ -4587,17 +4594,17 @@ type StatementCounters struct { QueryCount telemetry.CounterWithMetric // Basic CRUD statements. - SelectCount telemetry.CounterWithMetric - UpdateCount telemetry.CounterWithMetric - InsertCount telemetry.CounterWithMetric - DeleteCount telemetry.CounterWithMetric + SelectCount telemetry.CounterWithAggMetric + UpdateCount telemetry.CounterWithAggMetric + InsertCount telemetry.CounterWithAggMetric + DeleteCount telemetry.CounterWithAggMetric // CRUDQueryCount includes all 4 CRUD statements above. - CRUDQueryCount telemetry.CounterWithMetric + CRUDQueryCount telemetry.CounterWithAggMetric // Transaction operations. - TxnBeginCount telemetry.CounterWithMetric - TxnCommitCount telemetry.CounterWithMetric - TxnRollbackCount telemetry.CounterWithMetric + TxnBeginCount telemetry.CounterWithAggMetric + TxnCommitCount telemetry.CounterWithAggMetric + TxnRollbackCount telemetry.CounterWithAggMetric TxnUpgradedCount *metric.Counter // Transaction XA two-phase commit operations. @@ -4638,11 +4645,11 @@ type StatementCounters struct { func makeStartedStatementCounters(internal bool) StatementCounters { return StatementCounters{ - TxnBeginCount: telemetry.NewCounterWithMetric( + TxnBeginCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnBeginStarted, internal)), - TxnCommitCount: telemetry.NewCounterWithMetric( + TxnCommitCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnCommitStarted, internal)), - TxnRollbackCount: telemetry.NewCounterWithMetric( + TxnRollbackCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnRollbackStarted, internal)), TxnUpgradedCount: metric.NewCounter( getMetricMeta(MetaTxnUpgradedFromWeakIsolation, internal)), @@ -4664,15 +4671,15 @@ func makeStartedStatementCounters(internal bool) StatementCounters { getMetricMeta(MetaReleaseSavepointStarted, internal)), RollbackToSavepointCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaRollbackToSavepointStarted, internal)), - SelectCount: telemetry.NewCounterWithMetric( + SelectCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaSelectStarted, internal)), - UpdateCount: telemetry.NewCounterWithMetric( + UpdateCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaUpdateStarted, internal)), - InsertCount: telemetry.NewCounterWithMetric( + InsertCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaInsertStarted, internal)), - DeleteCount: telemetry.NewCounterWithMetric( + DeleteCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaDeleteStarted, internal)), - CRUDQueryCount: telemetry.NewCounterWithMetric( + CRUDQueryCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaCRUDStarted, internal)), DdlCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDdlStarted, internal)), @@ -4691,11 +4698,11 @@ func makeStartedStatementCounters(internal bool) StatementCounters { func makeExecutedStatementCounters(internal bool) StatementCounters { return StatementCounters{ - TxnBeginCount: telemetry.NewCounterWithMetric( + TxnBeginCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnBeginExecuted, internal)), - TxnCommitCount: telemetry.NewCounterWithMetric( + TxnCommitCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnCommitExecuted, internal)), - TxnRollbackCount: telemetry.NewCounterWithMetric( + TxnRollbackCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaTxnRollbackExecuted, internal)), TxnUpgradedCount: metric.NewCounter( getMetricMeta(MetaTxnUpgradedFromWeakIsolation, internal)), @@ -4717,15 +4724,15 @@ func makeExecutedStatementCounters(internal bool) StatementCounters { getMetricMeta(MetaReleaseSavepointExecuted, internal)), RollbackToSavepointCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaRollbackToSavepointExecuted, internal)), - SelectCount: telemetry.NewCounterWithMetric( + SelectCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaSelectExecuted, internal)), - UpdateCount: telemetry.NewCounterWithMetric( + UpdateCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaUpdateExecuted, internal)), - InsertCount: telemetry.NewCounterWithMetric( + InsertCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaInsertExecuted, internal)), - DeleteCount: telemetry.NewCounterWithMetric( + DeleteCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaDeleteExecuted, internal)), - CRUDQueryCount: telemetry.NewCounterWithMetric( + CRUDQueryCount: telemetry.NewCounterWithAggMetric( getMetricMeta(MetaCRUDExecuted, internal)), DdlCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDdlExecuted, internal)), @@ -4744,30 +4751,31 @@ func makeExecutedStatementCounters(internal bool) StatementCounters { func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statement) { sc.QueryCount.Inc() + labelValues := ex.getLabelValues() switch t := stmt.(type) { case *tree.BeginTransaction: - sc.TxnBeginCount.Inc() + sc.TxnBeginCount.Inc(labelValues...) case *tree.Select: - sc.SelectCount.Inc() - sc.CRUDQueryCount.Inc() + sc.SelectCount.Inc(labelValues...) + sc.CRUDQueryCount.Inc(labelValues...) case *tree.Update: - sc.UpdateCount.Inc() - sc.CRUDQueryCount.Inc() + sc.UpdateCount.Inc(labelValues...) + sc.CRUDQueryCount.Inc(labelValues...) case *tree.Insert: - sc.InsertCount.Inc() - sc.CRUDQueryCount.Inc() + sc.InsertCount.Inc(labelValues...) + sc.CRUDQueryCount.Inc(labelValues...) case *tree.Delete: - sc.DeleteCount.Inc() - sc.CRUDQueryCount.Inc() + sc.DeleteCount.Inc(labelValues...) + sc.CRUDQueryCount.Inc(labelValues...) case *tree.CommitTransaction: - sc.TxnCommitCount.Inc() + sc.TxnCommitCount.Inc(labelValues...) case *tree.RollbackTransaction: // The CommitWait state means that the transaction has already committed // after a specially handled `RELEASE SAVEPOINT cockroach_restart` command. if ex.getTransactionState() == CommitWaitStateStr { - sc.TxnCommitCount.Inc() + sc.TxnCommitCount.Inc(labelValues...) } else { - sc.TxnRollbackCount.Inc() + sc.TxnRollbackCount.Inc(labelValues...) } case *tree.PrepareTransaction: sc.TxnPrepareCount.Inc() diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index 5ddbe89132a5..8d43eeed6728 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -414,7 +414,7 @@ func (ex *connExecutor) execStmtInOpenState( // Note ex.metrics is Server.Metrics for the connExecutor that serves the // client connection, and is Server.InternalMetrics for internal executors. - ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1, ex.getLabelValues()...) }() // Special handling for SET TRANSACTION statements within a stored procedure @@ -429,7 +429,7 @@ func (ex *connExecutor) execStmtInOpenState( // Note ex.metrics is Server.Metrics for the connExecutor that serves the // client connection, and is Server.InternalMetrics for internal executors. - ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1, ex.getLabelValues()...) p := &ex.planner stmtTS := ex.server.cfg.Clock.PhysicalTime() @@ -1369,7 +1369,7 @@ func (ex *connExecutor) execStmtInOpenStateWithPausablePortal( // Note ex.metrics is Server.Metrics for the connExecutor that serves the // client connection, and is Server.InternalMetrics for internal executors. - ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Dec(1, ex.getLabelValues()...) }() // Special handling for SET TRANSACTION statements within a stored procedure @@ -1384,7 +1384,7 @@ func (ex *connExecutor) execStmtInOpenStateWithPausablePortal( // Note ex.metrics is Server.Metrics for the connExecutor that serves the // client connection, and is Server.InternalMetrics for internal executors. - ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1) + ex.metrics.EngineMetrics.SQLActiveStatements.Inc(1, ex.getLabelValues()...) // TODO(sql-sessions): persist the planner for a pausable portal, and reuse // it for each re-execution. @@ -4263,7 +4263,7 @@ func (ex *connExecutor) recordTransactionStart(txnID uuid.UUID) { // Note ex.metrics is Server.Metrics for the connExecutor that serves the // client connection, and is Server.InternalMetrics for internal executors. - ex.metrics.EngineMetrics.SQLTxnsOpen.Inc(1) + ex.metrics.EngineMetrics.SQLTxnsOpen.Inc(1, ex.getLabelValues()...) ex.extraTxnState.shouldExecuteOnTxnFinish = true ex.extraTxnState.txnFinishClosure.txnStartTime = txnStart @@ -4308,8 +4308,9 @@ func (ex *connExecutor) recordTransactionFinish( if contentionDuration := ex.extraTxnState.accumulatedStats.ContentionTime.Nanoseconds(); contentionDuration > 0 { ex.metrics.EngineMetrics.SQLContendedTxns.Inc(1) } - ex.metrics.EngineMetrics.SQLTxnsOpen.Dec(1) - ex.metrics.EngineMetrics.SQLTxnLatency.RecordValue(elapsedTime.Nanoseconds()) + labelValues := ex.getLabelValues() + ex.metrics.EngineMetrics.SQLTxnsOpen.Dec(1, labelValues...) + ex.metrics.EngineMetrics.SQLTxnLatency.RecordValue(elapsedTime.Nanoseconds(), labelValues...) ex.txnIDCacheWriter.Record(contentionpb.ResolvedTxnID{ TxnID: ev.txnID, diff --git a/pkg/sql/execinfra/BUILD.bazel b/pkg/sql/execinfra/BUILD.bazel index ee5af024e268..309504b6dd87 100644 --- a/pkg/sql/execinfra/BUILD.bazel +++ b/pkg/sql/execinfra/BUILD.bazel @@ -67,6 +67,7 @@ go_library( "//pkg/util/log", "//pkg/util/log/logcrash", "//pkg/util/metric", + "//pkg/util/metric/aggmetric", "//pkg/util/mon", "//pkg/util/optional", "//pkg/util/retry", diff --git a/pkg/sql/execinfra/metrics.go b/pkg/sql/execinfra/metrics.go index 952da48daae0..5a5d658b193f 100644 --- a/pkg/sql/execinfra/metrics.go +++ b/pkg/sql/execinfra/metrics.go @@ -9,6 +9,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" ) // DistSQLMetrics contains pointers to the metrics for monitoring DistSQL @@ -17,8 +18,8 @@ type DistSQLMetrics struct { QueriesActive *metric.Gauge QueriesTotal *metric.Counter DistributedCount *metric.Counter - ContendedQueriesCount *metric.Counter - CumulativeContentionNanos *metric.Counter + ContendedQueriesCount *aggmetric.AggCounter + CumulativeContentionNanos *aggmetric.AggCounter FlowsActive *metric.Gauge FlowsTotal *metric.Counter MaxBytesHist metric.IHistogram @@ -153,8 +154,8 @@ func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics { QueriesActive: metric.NewGauge(metaQueriesActive), QueriesTotal: metric.NewCounter(metaQueriesTotal), DistributedCount: metric.NewCounter(metaDistributedCount), - ContendedQueriesCount: metric.NewCounter(metaContendedQueriesCount), - CumulativeContentionNanos: metric.NewCounter(metaCumulativeContentionNanos), + ContendedQueriesCount: aggmetric.NewCounterWithCacheStorageType(metaContendedQueriesCount), + CumulativeContentionNanos: aggmetric.NewCounterWithCacheStorageType(metaCumulativeContentionNanos), FlowsActive: metric.NewGauge(metaFlowsActive), FlowsTotal: metric.NewCounter(metaFlowsTotal), MaxBytesHist: metric.NewHistogram(metric.HistogramOptions{ diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go index bb42960cd2d9..0c852cb21c48 100644 --- a/pkg/sql/executor_statement_metrics.go +++ b/pkg/sql/executor_statement_metrics.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlstats" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" ) // EngineMetrics groups a set of SQL metrics. @@ -34,10 +35,10 @@ type EngineMetrics struct { DistSQLExecLatency metric.IHistogram SQLExecLatency metric.IHistogram DistSQLServiceLatency metric.IHistogram - SQLServiceLatency metric.IHistogram - SQLTxnLatency metric.IHistogram - SQLTxnsOpen *metric.Gauge - SQLActiveStatements *metric.Gauge + SQLServiceLatency *aggmetric.AggHistogram + SQLTxnLatency *aggmetric.AggHistogram + SQLTxnsOpen *aggmetric.AggGauge + SQLActiveStatements *aggmetric.AggGauge SQLContendedTxns *metric.Counter // TxnAbortCount counts transactions that were aborted, either due @@ -46,7 +47,7 @@ type EngineMetrics struct { TxnAbortCount *metric.Counter // FailureCount counts non-retriable errors in open transactions. - FailureCount *metric.Counter + FailureCount *aggmetric.AggCounter // StatementTimeoutCount tracks the number of statement failures due // to exceeding the statement timeout. @@ -57,7 +58,7 @@ type EngineMetrics struct { TransactionTimeoutCount *metric.Counter // FullTableOrIndexScanCount counts the number of full table or index scans. - FullTableOrIndexScanCount *metric.Counter + FullTableOrIndexScanCount *aggmetric.AggCounter // FullTableOrIndexScanRejectedCount counts the number of queries that were // rejected because of the `disallow_full_table_scans` guardrail. @@ -236,8 +237,10 @@ func (ex *connExecutor) recordStatementSummary( } if queryLevelStats.ContentionTime > 0 { - ex.planner.DistSQLPlanner().distSQLSrv.Metrics.ContendedQueriesCount.Inc(1) - ex.planner.DistSQLPlanner().distSQLSrv.Metrics.CumulativeContentionNanos.Inc(queryLevelStats.ContentionTime.Nanoseconds()) + labelValues := ex.getLabelValues() + ex.planner.DistSQLPlanner().distSQLSrv.Metrics.ContendedQueriesCount.Inc(1, labelValues...) + ex.planner.DistSQLPlanner().distSQLSrv.Metrics.CumulativeContentionNanos.Inc(queryLevelStats.ContentionTime.Nanoseconds(), + labelValues...) } } @@ -318,7 +321,7 @@ func (ex *connExecutor) recordStatementLatencyMetrics( m.SQLExecLatencyDetail.Observe(labels, float64(runLatRaw.Nanoseconds())) } m.SQLExecLatency.RecordValue(runLatRaw.Nanoseconds()) - m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds()) + m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds(), ex.getLabelValues()...) } } } diff --git a/pkg/util/metric/metric.go b/pkg/util/metric/metric.go index 495827adcea2..cff8fa2e4ec5 100644 --- a/pkg/util/metric/metric.go +++ b/pkg/util/metric/metric.go @@ -1574,16 +1574,13 @@ func (lv *LabelValueConfig) SetAppNameLabelEnabled(enabled bool) { // GetLabelValues appends db and app label values to existing labelValues based on // dbNameLabelEnabled and appNameLabelEnabled. -func (lv *LabelValueConfig) GetLabelValues( - dbName string, appName string, labelValues ...string, -) []string { - var labels []string +func (lv *LabelValueConfig) GetLabelValues(dbName string, appName string) []string { + labels := make([]string, 0, 2) if lv.dbNameLabelEnabled { labels = append(labels, dbName) } if lv.appNameLabelEnabled { labels = append(labels, appName) } - labels = append(labels, labelValues...) return labels }