diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..bc946d7 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,23 @@ +version: 2 +updates: + # Python (pip) dependencies + - package-ecosystem: pip + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - dependencies + - python + + # GitHub Actions + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + day: monday + open-pull-requests-limit: 5 + labels: + - dependencies + - github-actions diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..8fe357f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,49 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + lint: + name: Lint + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install -r requirements-dev.txt + + - name: Ruff check + run: ruff check . + + - name: Ruff format check + run: ruff format --check . + + test: + name: Test + runs-on: ubuntu-24.04 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install -r requirements-dev.txt + + - name: Run tests + run: pytest tests/ -v diff --git a/checks/clickhouse_cloud.py b/checks/clickhouse_cloud.py index 236fd3f..9eecc66 100644 --- a/checks/clickhouse_cloud.py +++ b/checks/clickhouse_cloud.py @@ -9,15 +9,14 @@ import json import time -from typing import Any, Callable +from collections.abc import Callable +from typing import Any import requests +from datadog_checks.base import AgentCheck from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry -from datadog_checks.base import AgentCheck - - # --------------------------------------------------------------------------- # SQL templates # --------------------------------------------------------------------------- @@ -161,10 +160,12 @@ def __init__( self.custom_tags: list[str] = inst.get("tags", []) + # Cluster name → used as the Datadog "service" field on every log. + # Defaults to "clickhouse" so logs are attributed even without config. + self.cluster_name: str = inst.get("cluster_name", "clickhouse") + # HTTP session with automatic retries on transient failures - self.base_url: str = "https://queries.clickhouse.cloud/service/{}/run".format( - self.service_id - ) + self.base_url: str = f"https://queries.clickhouse.cloud/service/{self.service_id}/run" retry_strategy = Retry( total=2, @@ -198,11 +199,9 @@ def _validate_int( try: value = int(raw) except (TypeError, ValueError): - raise ValueError("{} must be an integer, got {!r}".format(key, raw)) + raise ValueError(f"{key} must be an integer, got {raw!r}") from None if value < lo or value > hi: - raise ValueError( - "{} must be between {} and {}, got {}".format(key, lo, hi, value) - ) + raise ValueError(f"{key} must be between {lo} and {hi}, got {value}") return value # ------------------------------------------------------------------ @@ -388,18 +387,15 @@ def _build_query_log_payload(self, row: dict[str, Any]) -> dict[str, Any]: type_label = "exception" else: duration_ms = int(row.get("query_duration_ms", 0)) - if duration_ms >= self.slow_query_threshold_ms: - level = "warning" - else: - level = "info" + level = "warning" if duration_ms >= self.slow_query_threshold_ms else "info" type_label = "finish" return { "timestamp": self._timestamp_seconds(row), "message": row.get("query", ""), - "ddsource": "clickhouse_cloud", + "ddsource": "clickhouse", "ddtags": ",".join(self.custom_tags) if self.custom_tags else "", - "service": "clickhouse", + "service": self.cluster_name, "status": level, "clickhouse.query_id": row.get("query_id", ""), "clickhouse.user": row.get("user", ""), @@ -437,9 +433,9 @@ def _build_text_log_payload(self, row: dict[str, Any]) -> dict[str, Any]: return { "timestamp": self._timestamp_seconds(row), "message": row.get("message", ""), - "ddsource": "clickhouse_cloud", + "ddsource": "clickhouse", "ddtags": ",".join(self.custom_tags) if self.custom_tags else "", - "service": "clickhouse", + "service": self.cluster_name, "status": level, "clickhouse.logger": row.get("logger_name", ""), "clickhouse.thread_id": str(row.get("thread_id", "")), diff --git a/conf.d/clickhouse_cloud.d/conf.yaml.example b/conf.d/clickhouse_cloud.d/conf.yaml.example index 318a1c0..ecb0a0f 100644 --- a/conf.d/clickhouse_cloud.d/conf.yaml.example +++ b/conf.d/clickhouse_cloud.d/conf.yaml.example @@ -5,6 +5,10 @@ instances: key_id: "" # required – Cloud API key ID key_secret: "" # required – Cloud API key secret + # Cluster / service identity + cluster_name: "" # used as the "service" field in Datadog Logs + # (default: "clickhouse") + # Log collection toggles collect_query_logs: true # default: true collect_text_logs: true # default: true @@ -19,3 +23,8 @@ instances: tags: - "env:prod" - "clickhouse_cluster:" + +logs: + - type: integration + source: clickhouse + diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..03fa0ce --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +# Development and CI dependencies. +# Runtime dependencies (requests, urllib3) are bundled with the Datadog Agent. +pytest>=9.0,<10.0 +requests>=2.28,<3.0 +ruff>=0.11,<1.0 diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..653ae5c --- /dev/null +++ b/ruff.toml @@ -0,0 +1,20 @@ +target-version = "py310" +line-length = 100 + +[lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "RUF", # ruff-specific rules +] +ignore = [ + "E501", # line too long -- handled by formatter +] + +[lint.isort] +known-first-party = ["checks"] diff --git a/tests/conftest.py b/tests/conftest.py index a2b8235..91e80f3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,10 +9,9 @@ import json import os import sys - -import pytest from unittest.mock import MagicMock as _MagicMock +import pytest # --------------------------------------------------------------------------- # Mock AgentCheck base class diff --git a/tests/test_logs.py b/tests/test_logs.py index eba799e..4ae0b95 100644 --- a/tests/test_logs.py +++ b/tests/test_logs.py @@ -6,16 +6,15 @@ import pytest import requests +from conftest import MockAgentCheck from checks.clickhouse_cloud import ( - ClickHouseCloudCheck, - SC_QUERY_LOG_CONNECT, - SC_TEXT_LOG_CONNECT, GAUGE_QUERY_LOG_ROWS, GAUGE_TEXT_LOG_ROWS, + SC_QUERY_LOG_CONNECT, + SC_TEXT_LOG_CONNECT, + ClickHouseCloudCheck, ) -from conftest import MockAgentCheck - # --------------------------------------------------------------------------- # Helpers @@ -69,9 +68,7 @@ def test_invalid_slow_query_threshold_raises(self, default_instance): def test_invalid_backfill_minutes_raises(self, default_instance): default_instance["initial_backfill_minutes"] = 0 - with pytest.raises( - ValueError, match="initial_backfill_minutes must be between" - ): + with pytest.raises(ValueError, match="initial_backfill_minutes must be between"): _make_check(default_instance) def test_query_timeout_configurable(self, default_instance): @@ -102,7 +99,7 @@ def test_normal_query_is_info(self, default_instance, query_log_rows): assert payload["status"] == "info" assert payload["clickhouse.query_type"] == "finish" - assert payload["ddsource"] == "clickhouse_cloud" + assert payload["ddsource"] == "clickhouse" assert payload["service"] == "clickhouse" assert payload["clickhouse.query_id"] == "abc-123-def-456" assert payload["clickhouse.user"] == "default" @@ -143,6 +140,22 @@ def test_no_tags(self, default_instance, query_log_rows): assert payload["ddtags"] == "" + def test_custom_cluster_name_sets_service(self, default_instance, query_log_rows): + default_instance["cluster_name"] = "analytics-prod" + check = _make_check(default_instance) + row = query_log_rows[0] + payload = check._build_query_log_payload(row) + + assert payload["service"] == "analytics-prod" + + def test_default_cluster_name_is_clickhouse(self, default_instance, query_log_rows): + # No cluster_name in config → defaults to "clickhouse" + check = _make_check(default_instance) + row = query_log_rows[0] + payload = check._build_query_log_payload(row) + + assert payload["service"] == "clickhouse" + def test_missing_fields_use_defaults(self, default_instance): """Rows with missing optional fields should not crash.""" check = _make_check(default_instance) @@ -168,7 +181,7 @@ def test_error_level(self, default_instance, text_log_rows): payload = check._build_text_log_payload(row) assert payload["status"] == "error" - assert payload["ddsource"] == "clickhouse_cloud" + assert payload["ddsource"] == "clickhouse" assert payload["clickhouse.logger"] == "MergeTreeBackgroundExecutor" assert "Memory limit exceeded" in payload["message"] @@ -323,24 +336,28 @@ def test_empty_response(self, default_instance): def test_http_error_raises(self, default_instance): check = _make_check(default_instance) - with patch.object( - check._session, - "post", - side_effect=requests.exceptions.HTTPError("500 Server Error"), + with ( + patch.object( + check._session, + "post", + side_effect=requests.exceptions.HTTPError("500 Server Error"), + ), + pytest.raises(requests.exceptions.HTTPError), ): - with pytest.raises(requests.exceptions.HTTPError): - check._query_clickhouse("SELECT 1") + check._query_clickhouse("SELECT 1") def test_connection_error_raises(self, default_instance): check = _make_check(default_instance) - with patch.object( - check._session, - "post", - side_effect=requests.exceptions.ConnectionError("DNS failed"), + with ( + patch.object( + check._session, + "post", + side_effect=requests.exceptions.ConnectionError("DNS failed"), + ), + pytest.raises(requests.exceptions.ConnectionError), ): - with pytest.raises(requests.exceptions.ConnectionError): - check._query_clickhouse("SELECT 1") + check._query_clickhouse("SELECT 1") def test_multiline_json_each_row_parsed(self, default_instance): check = _make_check(default_instance) @@ -372,9 +389,7 @@ def test_custom_timeout_used(self, default_instance): class TestCollectQueryLogs: @patch("checks.clickhouse_cloud.ClickHouseCloudCheck._query_clickhouse") - def test_sends_logs_and_updates_cursor( - self, mock_query, default_instance, query_log_rows - ): + def test_sends_logs_and_updates_cursor(self, mock_query, default_instance, query_log_rows): check = _make_check(default_instance) mock_query.return_value = query_log_rows @@ -411,9 +426,7 @@ def test_query_failure_reports_critical(self, mock_query, default_instance): assert len(check._sent_logs) == 0 @patch("checks.clickhouse_cloud.ClickHouseCloudCheck._query_clickhouse") - def test_no_send_log_method_does_not_crash( - self, mock_query, default_instance, query_log_rows - ): + def test_no_send_log_method_does_not_crash(self, mock_query, default_instance, query_log_rows): check = _make_check(default_instance) mock_query.return_value = query_log_rows[:1] @@ -464,9 +477,7 @@ def test_missing_cursor_us_does_not_advance(self, mock_query, default_instance): assert check.log.warning.called @patch("checks.clickhouse_cloud.ClickHouseCloudCheck._query_clickhouse") - def test_gauge_reports_row_count( - self, mock_query, default_instance, query_log_rows - ): + def test_gauge_reports_row_count(self, mock_query, default_instance, query_log_rows): check = _make_check(default_instance) mock_query.return_value = query_log_rows @@ -477,9 +488,7 @@ def test_gauge_reports_row_count( class TestCollectTextLogs: @patch("checks.clickhouse_cloud.ClickHouseCloudCheck._query_clickhouse") - def test_sends_logs_and_updates_cursor( - self, mock_query, default_instance, text_log_rows - ): + def test_sends_logs_and_updates_cursor(self, mock_query, default_instance, text_log_rows): check = _make_check(default_instance) mock_query.return_value = text_log_rows @@ -539,9 +548,7 @@ def test_check_respects_disabled_collectors(self, mock_query, default_instance): mock_query.assert_not_called() @patch("checks.clickhouse_cloud.ClickHouseCloudCheck._query_clickhouse") - def test_cursor_persists_across_runs( - self, mock_query, default_instance, query_log_rows - ): + def test_cursor_persists_across_runs(self, mock_query, default_instance, query_log_rows): check = _make_check(default_instance) default_instance["collect_text_logs"] = False check.collect_text_logs = False