diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index d745e30f2..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,104 +0,0 @@ ---- -version: 2.1 - -orbs: - prometheus: prometheus/prometheus@0.17.1 - -executors: - # This must match .promu.yml. - golang: - docker: - - image: cimg/go:1.23 - -jobs: - test: - executor: golang - - steps: - - prometheus/setup_environment - - run: GOHOSTARCH=386 GOARCH=386 make test - - run: make - - prometheus/store_artifact: - file: postgres_exporter - - integration: - docker: - - image: cimg/go:1.23 - - image: << parameters.postgres_image >> - environment: - POSTGRES_DB: circle_test - POSTGRES_USER: postgres - POSTGRES_PASSWORD: test - - parameters: - postgres_image: - type: string - - environment: - DATA_SOURCE_NAME: 'postgresql://postgres:test@localhost:5432/circle_test?sslmode=disable' - GOOPTS: '-v -tags integration' - - steps: - - checkout - - setup_remote_docker - - run: docker version - - run: make build - - run: make test - -workflows: - version: 2 - postgres_exporter: - jobs: - - test: - filters: - tags: - only: /.*/ - - integration: - matrix: - parameters: - postgres_image: - - circleci/postgres:11 - - circleci/postgres:12 - - circleci/postgres:13 - - cimg/postgres:14.9 - - cimg/postgres:15.4 - - cimg/postgres:16.0 - - prometheus/build: - name: build - parallelism: 3 - promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" - filters: - tags: - ignore: /^v.*/ - branches: - ignore: /^(main|master|release-.*|.*build-all.*)$/ - - prometheus/build: - name: build_all - parallelism: 12 - filters: - branches: - only: /^(main|master|release-.*|.*build-all.*)$/ - tags: - only: /^v.*/ - - prometheus/publish_master: - context: org-context - docker_hub_organization: prometheuscommunity - quay_io_organization: prometheuscommunity - requires: - - test - - build_all - filters: - branches: - only: master - - prometheus/publish_release: - context: org-context - docker_hub_organization: prometheuscommunity - quay_io_organization: prometheuscommunity - requires: - - test - - build_all - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..096709915 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +data/ +.build/ +.tarballs/ + +!.build/linux-amd64/ +!.build/linux-arm64/ +!.build/linux-armv7/ +!.build/linux-ppc64le/ +!.build/linux-riscv64/ +!.build/linux-s390x/ diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 202ae2366..76e99715c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,11 @@ updates: directory: "/" schedule: interval: "monthly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + # Exclude configs synced from upstream prometheus/prometheus. + exclude-paths: + - .github/workflows/container_description.yml + - .github/workflows/golangci-lint.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..791389600 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,75 @@ +--- +name: CI +on: + pull_request: + push: + branches: [main, master, 'release-*'] + tags: ['v*'] + +permissions: + contents: read + +jobs: + test_go: + name: Go tests + runs-on: ubuntu-latest + container: + # Whenever the Go version is updated here, .promu.yml + # should also be updated. + image: quay.io/prometheus/golang-builder:1.26-base + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: prometheus/promci-setup@5af30ba8c199a91d6c04ebdc3c48e630e355f62d # v0.1.0 + - run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1 + + build: + name: Build + runs-on: ubuntu-latest + strategy: + matrix: + thread: [ 0, 1, 2, 3] + steps: + - uses: prometheus/promci/build@d9d4f5688814f0b77bf003d07fb8c00507390634 # v0.8.2 + with: + parallelism: 4 + thread: ${{ matrix.thread }} + + publish_main: + name: Publish main branch artifacts + runs-on: ubuntu-latest + permissions: + packages: write + needs: [test_go, build] + if: | + (github.event_name == 'push' && github.event.ref == 'refs/heads/main') + || + (github.event_name == 'push' && github.event.ref == 'refs/heads/master') + steps: + - uses: prometheus/promci/publish_main@d9d4f5688814f0b77bf003d07fb8c00507390634 # v0.8.2 + with: + docker_hub_organization: prometheuscommunity + docker_hub_password: ${{ secrets.docker_hub_password }} + ghcr_io_password: ${{ github.token }} + quay_io_organization: prometheuscommunity + quay_io_password: ${{ secrets.quay_io_password }} + + publish_release: + name: Publish release artefacts + runs-on: ubuntu-latest + permissions: + contents: write + packages: write + needs: [test_go, build] + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + steps: + - uses: prometheus/promci/publish_release@d9d4f5688814f0b77bf003d07fb8c00507390634 # v0.8.2 + with: + docker_hub_organization: prometheuscommunity + docker_hub_password: ${{ secrets.docker_hub_password }} + ghcr_io_password: ${{ github.token }} + quay_io_organization: prometheuscommunity + quay_io_password: ${{ secrets.quay_io_password }} + github_token: ${{ github.token }} diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index dcca16ff3..d7b879f9c 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -18,7 +18,9 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -40,7 +42,9 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 305146993..d7477caf7 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -3,6 +3,7 @@ name: golangci-lint on: push: + branches: [main, master, 'release-*'] paths: - "go.sum" - "go.mod" @@ -10,6 +11,7 @@ on: - "scripts/errcheck_excludes.txt" - ".github/workflows/golangci-lint.yml" - ".golangci.yml" + tags: ['v*'] pull_request: permissions: # added using https://github.com/step-security/secure-repo @@ -24,16 +26,21 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false - name: Install Go - uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0 with: - go-version: 1.23.x + go-version: 1.26.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' + - name: Get golangci-lint version + id: golangci-lint-version + run: echo "version=$(make print-golangci-lint-version)" >> $GITHUB_OUTPUT - name: Lint - uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 + uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0 with: args: --verbose - version: v1.61.0 + version: ${{ steps.golangci-lint-version.outputs.version }} diff --git a/.github/workflows/govulncheck.yml b/.github/workflows/govulncheck.yml new file mode 100644 index 000000000..4ca92d302 --- /dev/null +++ b/.github/workflows/govulncheck.yml @@ -0,0 +1,26 @@ +--- +name: govulncheck +on: + pull_request: + paths: + - VERSION + - .github/workflows/govulncheck.yml + push: + branches: + - main + - master + schedule: + - cron: '33 2 * * *' + +permissions: + contents: read + +jobs: + govulncheck: + runs-on: ubuntu-latest + name: Run govulncheck + steps: + - id: govulncheck + uses: golang/govulncheck-action@b625fbe08f3bccbe446d94fbf87fcc875a4f50ee # v1.0.4 + env: + GOOS: ${{ contains(github.repository, 'windows_exporter') && 'windows' || '' }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml new file mode 100644 index 000000000..f864cf853 --- /dev/null +++ b/.github/workflows/integration.yml @@ -0,0 +1,51 @@ +--- +name: CI +on: + pull_request: + push: + branches: [main, master, 'release-*'] + tags: ['v*'] + +permissions: + contents: read + +jobs: + integration_tests: + name: Integration tests + runs-on: ubuntu-latest + strategy: + matrix: + # Define Postgres versions to test against. + postgres_version: + - 13.22 + - 14.19 + - 15.14 + - 16.10 + - 17.6 + - 18.1 + services: + postgres: + image: postgres:${{ matrix.postgres_version }} + env: + POSTGRES_DB: circle_test + POSTGRES_USER: postgres + POSTGRES_PASSWORD: test + # options: >- + # --health-cmd="pg_isready -U postgres -d circle_test" + # --health-interval=10s + # --health-timeout=5s + # --health-retries=5 + container: + # Whenever the Go version is updated here, .promu.yml + # should also be updated. + image: quay.io/prometheus/golang-builder:1.26-base + env: + DATA_SOURCE_NAME: 'postgresql://postgres:test@postgres:5432/circle_test?sslmode=disable' + GOOPTS: '-v -tags integration' + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: prometheus/promci-setup@5af30ba8c199a91d6c04ebdc3c48e630e355f62d # v0.1.0 + - run: make build + - run: make test diff --git a/.golangci.yml b/.golangci.yml index 96487c898..4b58b08b6 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,23 +1,36 @@ ---- +version: "2" linters: enable: - - misspell - - revive - -issues: - exclude-rules: - - path: _test.go - linters: - - errcheck - -linters-settings: - errcheck: - exclude-functions: - # Never check for logger errors. - - (github.com/go-kit/log.Logger).Log - revive: + - misspell + - revive + settings: + errcheck: + exclude-functions: + - (github.com/go-kit/log.Logger).Log + revive: + rules: + - name: unused-parameter + severity: warning + disabled: true + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling rules: - # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter - - name: unused-parameter - severity: warning - disabled: true + - linters: + - errcheck + path: _test.go + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/.promu.yml b/.promu.yml index c2b361240..0f1be42c7 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,6 +1,6 @@ go: - # This must match .circle/config.yml. - version: 1.23 + # This must match .github/workflows/ci.yml. + version: 1.26 repository: path: github.com/prometheus-community/postgres_exporter build: diff --git a/.yamllint b/.yamllint index 1859cb624..b329f464f 100644 --- a/.yamllint +++ b/.yamllint @@ -1,7 +1,8 @@ --- extends: default ignore: | - ui/react-app/node_modules + **/node_modules + web/api/v1/testdata/openapi_*_golden.yaml rules: braces: diff --git a/CHANGELOG.md b/CHANGELOG.md index 790a109c5..8cc3b9c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,72 @@ +## main / (unreleased) + +* [CHANGE] ... +* [FEATURE] ... +* [ENHANCEMENT] ... +* [BUGFIX] ... + +## 0.19.1 / 2026-02-25 + +* [CHANGE] Filter and warn about duplicates in pg_stat_statements (#998) by @Nudin in https://github.com/prometheus-community/postgres_exporter/pull/1259 +* [ENHANCEMENT] perf/reliability: Optimize pg_stat_statements queries that uses too many temp files by @eugercek in https://github.com/prometheus-community/postgres_exporter/pull/1267 +* [BUGFIX] fix: ignore setting `google_dataplex.max_messages` by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1261 +* [BUGFIX] wal: Fix collector NULL SUM(size) by @dannotripp in https://github.com/prometheus-community/postgres_exporter/pull/1265 + +## 0.19.0 / 2026-02-03 + +* [CHANGE] Reorganize code to remove the main package by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1238 +* [FEATURE] Allow setting `limit` in `pg_stat_statements` collector by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1205 +* [FEATURE] Allow excluding databases or users names in `stat_statements` collector by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1232 +* [ENHANCEMENT] Enable pprof endpoints for profiling by @SuperQ in https://github.com/prometheus-community/postgres_exporter/pull/1212 +* [ENHANCEMENT] Document `stat_checkpointer` collector option by @fabiorueda in https://github.com/prometheus-community/postgres_exporter/pull/1226 +* [ENHANCEMENT] Ensure collection of stats succeed in a maximum duration to avoid exhausting PG connections by @pierresouchay in https://github.com/prometheus-community/postgres_exporter/pull/1229 +* [BUGFIX] Fix NULL handling on `long_running_transactions` collector by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1230 +* [BUGFIX] Do not crash on bad `pg_settings` value by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1252 + +## 0.18.1 / 2025-09-29 + +* [BUGFIX] Fix swapped `flushedLsn` and `receiveStartTli` for `wal_receiver` collector by @spantaleev in https://github.com/prometheus-community/postgres_exporter/pull/1198 +* [BUGFIX] Fix superfluous semicolon breaking query in `process_idle` by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1197 and https://github.com/prometheus-community/postgres_exporter/pull/1201 + +## 0.18.0 / 2025-09-25 + +* [FEATURE] Add `stat_progress_vacuum` collector by @ianbibby in https://github.com/prometheus-community/postgres_exporter/pull/1141 +* [FEATURE] Add `buffercache_summary` collector by @sfc-gh-pnuttall in https://github.com/prometheus-community/postgres_exporter/pull/1165 +* [FEATURE] `stat_statements`: export query itself together with `queryId` by @Delorien84 in https://github.com/prometheus-community/postgres_exporter/pull/940 +* [ENHANCEMENT] Improve error handling for `Server.Scrape` by @BoweFlex in https://github.com/prometheus-community/postgres_exporter/pull/1158 +* [ENHANCEMENT] `stat_user_tables`: record table-only size bytes in addition to the total size bytes by @Sticksman Sticksman in https://github.com/prometheus-community/postgres_exporter/pull/1149 +* [ENHANCEMENT] (chore) Fix a typo and use `slices.Contains` by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1176 +* [ENHANCEMENT] Update mixin to latest changes from `grafana/postgres_exporter` by @cristiangreco, @gaantunes, @v-zhuravlev and @mshahzeb in https://github.com/prometheus-community/postgres_exporter/pull/1179 +* [ENHANCEMENT] Exclude the metrics fetching session's data from pg_stat_activity by @kmoppel in https://github.com/prometheus-community/postgres_exporter/pull/1185 +* [BUGFIX] Ensure database connections are always closed by @cristiangreco and @dehaansa in https://github.com/prometheus-community/postgres_exporter/pull/1177 +* [BUGFIX] Fix superfluous semicolon breaking query in `process_idle` by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1197 + +## 0.17.1 / 2025-02-26 + +* [BUGFIX] Fix: Handle incoming labels with invalid UTF-8 #1131 + +## 0.17.0 / 2025-02-16 + +## What's Changed +* [ENHANCEMENT] Add Postgres 17 for CI test by @khiemdoan in https://github.com/prometheus-community/postgres_exporter/pull/1105 +* [ENHANCEMENT] Add wait/backend to pg_stat_activity by @fgalind1 in https://github.com/prometheus-community/postgres_exporter/pull/1106 +* [ENHANCEMENT] Export last replay age in replication collector by @bitfehler in https://github.com/prometheus-community/postgres_exporter/pull/1085 +* [BUGFIX] Fix pg_long_running_transactions time by @jyothikirant-sayukth in https://github.com/prometheus-community/postgres_exporter/pull/1092 +* [BUGFIX] Fix to replace dashes with underscore in the metric names by @aagarwalla-fx in https://github.com/prometheus-community/postgres_exporter/pull/1103 +* [BIGFIX] Checkpoint related columns in PG 17 have been moved from pg_stat_bgwriter to pg_stat_checkpointer by @n-rodriguez in https://github.com/prometheus-community/postgres_exporter/pull/1072 +* [BUGFIX] Fix pg_stat_statements for PG17 by @NevermindZ4 in https://github.com/prometheus-community/postgres_exporter/pull/1114 +* [BUGFIX] Handle pg_replication_slots on pg<13 by @michael-todorovic in https://github.com/prometheus-community/postgres_exporter/pull/1098 +* [BUGFIX] Fix missing dsn sanitization for logging by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1104 + +## New Contributors +* @jyothikirant-sayukth made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1092 +* @aagarwalla-fx made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1103 +* @NevermindZ4 made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1114 +* @michael-todorovic made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1098 +* @fgalind1 made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1106 + +**Full Changelog**: https://github.com/prometheus-community/postgres_exporter/compare/v0.16.0...v0.17.0 + ## 0.16.0 / 2024-11-10 BREAKING CHANGES: diff --git a/Makefile.common b/Makefile.common index cbb5d8638..ef05881be 100644 --- a/Makefile.common +++ b/Makefile.common @@ -1,4 +1,4 @@ -# Copyright 2018 The Prometheus Authors +# Copyright The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,13 +55,14 @@ ifneq ($(shell command -v gotestsum 2> /dev/null),) endif endif -PROMU_VERSION ?= 0.17.0 +PROMU_VERSION ?= 0.18.1 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.60.2 +GOLANGCI_LINT_VERSION ?= v2.11.4 +GOLANGCI_FMT_OPTS ?= # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) @@ -81,11 +82,32 @@ endif PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) -DOCKERFILE_PATH ?= ./Dockerfile DOCKERBUILD_CONTEXT ?= ./ DOCKER_REPO ?= prom -DOCKER_ARCHS ?= amd64 +# Check if deprecated DOCKERFILE_PATH is set +ifdef DOCKERFILE_PATH +$(error DOCKERFILE_PATH is deprecated. Use DOCKERFILE_VARIANTS ?= $(DOCKERFILE_PATH) in the Makefile) +endif + +DOCKER_ARCHS ?= amd64 arm64 armv7 ppc64le riscv64 s390x +DOCKERFILE_VARIANTS ?= $(wildcard Dockerfile Dockerfile.*) + +# Function to extract variant from Dockerfile label. +# Returns the variant name from io.prometheus.image.variant label, or "default" if not found. +define dockerfile_variant +$(strip $(or $(shell sed -n 's/.*io\.prometheus\.image\.variant="\([^"]*\)".*/\1/p' $(1)),default)) +endef + +# Check for duplicate variant names (including default for Dockerfiles without labels). +DOCKERFILE_VARIANT_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df))) +DOCKERFILE_VARIANT_NAMES_SORTED := $(sort $(DOCKERFILE_VARIANT_NAMES)) +ifneq ($(words $(DOCKERFILE_VARIANT_NAMES)),$(words $(DOCKERFILE_VARIANT_NAMES_SORTED))) +$(error Duplicate variant names found. Each Dockerfile must have a unique io.prometheus.image.variant label, and only one can be without a label (default)) +endif + +# Build variant:dockerfile pairs for shell iteration. +DOCKERFILE_VARIANTS_WITH_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)):$(df)) BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) @@ -111,7 +133,7 @@ common-all: precheck style check_license lint yamllint unused build test .PHONY: common-style common-style: @echo ">> checking code style" - @fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \ + @fmtRes=$$($(GOFMT) -d $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -name '*.go' -print)); \ if [ -n "$${fmtRes}" ]; then \ echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \ echo "Please ensure you are using $$($(GO) version) for formatting code."; \ @@ -121,13 +143,19 @@ common-style: .PHONY: common-check_license common-check_license: @echo ">> checking license header" - @licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \ + @licRes=$$(for file in $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -type f -iname '*.go' -print) ; do \ awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \ done); \ if [ -n "$${licRes}" ]; then \ echo "license header checking failed:"; echo "$${licRes}"; \ exit 1; \ fi + @echo ">> checking for copyright years 2026 or later" + @futureYearRes=$$(git grep -E 'Copyright (202[6-9]|20[3-9][0-9])' -- '*.go' ':!:vendor/*' || true); \ + if [ -n "$${futureYearRes}" ]; then \ + echo "Files with copyright year 2026 or later found (should use 'Copyright The Prometheus Authors'):"; echo "$${futureYearRes}"; \ + exit 1; \ + fi .PHONY: common-deps common-deps: @@ -138,7 +166,7 @@ common-deps: update-go-deps: @echo ">> updating Go dependencies" @for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \ - $(GO) get -d $$m; \ + $(GO) get $$m; \ done $(GO) mod tidy @@ -156,9 +184,13 @@ $(GOTEST_DIR): @mkdir -p $@ .PHONY: common-format -common-format: +common-format: $(GOLANGCI_LINT) @echo ">> formatting code" $(GO) fmt $(pkgs) +ifdef GOLANGCI_LINT + @echo ">> formatting code with golangci-lint" + $(GOLANGCI_LINT) fmt $(GOLANGCI_FMT_OPTS) +endif .PHONY: common-vet common-vet: @@ -215,28 +247,142 @@ common-docker-repo-name: .PHONY: common-docker $(BUILD_DOCKER_ARCHS) common-docker: $(BUILD_DOCKER_ARCHS) $(BUILD_DOCKER_ARCHS): common-docker-%: - docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ - -f $(DOCKERFILE_PATH) \ - --build-arg ARCH="$*" \ - --build-arg OS="linux" \ - $(DOCKERBUILD_CONTEXT) + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + distroless_arch="$*"; \ + if [ "$*" = "armv7" ]; then \ + distroless_arch="arm"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Building default variant ($$variant_name) for linux-$* using $$dockerfile"; \ + docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ + -f $$dockerfile \ + --build-arg ARCH="$*" \ + --build-arg OS="linux" \ + --build-arg DISTROLESS_ARCH="$$distroless_arch" \ + $(DOCKERBUILD_CONTEXT); \ + if [ "$$variant_name" != "default" ]; then \ + echo "Tagging default variant with $$variant_name suffix"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ + "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + else \ + echo "Building $$variant_name variant for linux-$* using $$dockerfile"; \ + docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" \ + -f $$dockerfile \ + --build-arg ARCH="$*" \ + --build-arg OS="linux" \ + --build-arg DISTROLESS_ARCH="$$distroless_arch" \ + $(DOCKERBUILD_CONTEXT); \ + fi; \ + done .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) common-docker-publish: $(PUBLISH_DOCKER_ARCHS) $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: - docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Pushing $$variant_name variant for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Pushing default variant ($$variant_name) for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Pushing $$variant_name variant version tags for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Pushing default variant version tag for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ + done DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION))) .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) common-docker-tag-latest: $(TAG_DOCKER_ARCHS) $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: - docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest" - docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Tagging $$variant_name variant for linux-$* as latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest-$$variant_name"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Tagging default variant ($$variant_name) for linux-$* as latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + done .PHONY: common-docker-manifest common-docker-manifest: - DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)) - DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Creating manifest for $$variant_name variant"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping manifest for $$variant_name variant (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Creating default variant ($$variant_name) manifest"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping default variant manifest (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Creating manifest for $$variant_name variant version tag"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping version-tag manifest for $$variant_name variant (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Creating default variant version tag manifest"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping default variant version-tag manifest (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ + done .PHONY: promu promu: $(PROMU) @@ -248,8 +394,8 @@ $(PROMU): cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu rm -r $(PROMU_TMP) -.PHONY: proto -proto: +.PHONY: common-proto +common-proto: @echo ">> generating code from proto files" @./scripts/genproto.sh @@ -261,6 +407,10 @@ $(GOLANGCI_LINT): | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) endif +.PHONY: common-print-golangci-lint-version +common-print-golangci-lint-version: + @echo $(GOLANGCI_LINT_VERSION) + .PHONY: precheck precheck:: @@ -275,9 +425,3 @@ $(1)_precheck: exit 1; \ fi endef - -govulncheck: install-govulncheck - govulncheck ./... - -install-govulncheck: - command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/README-RDS.md b/README-RDS.md index 85bf8691f..e0135855f 100644 --- a/README-RDS.md +++ b/README-RDS.md @@ -22,9 +22,9 @@ Running postgres-exporter in a container like so: -e DATA_SOURCE_NAME="postgresql://${PGUSER}:${PGPASS}@${PGHOST}:5432/${DBNAME}?sslmode=disable" \ -e PG_EXPORTER_EXCLUDE_DATABASES=rdsadmin \ -e PG_EXPORTER_DISABLE_DEFAULT_METRICS=true \ - -e PG_EXPORTER_DISABLE_SETTINGS_METRICS=true \ -e PG_EXPORTER_EXTEND_QUERY_PATH='/var/lib/postgresql/queries.yaml' \ - quay.io/prometheuscommunity/postgres-exporter + quay.io/prometheuscommunity/postgres-exporter \ + --no-collector.settings ``` ### Expected changes to RDS: diff --git a/README.md b/README.md index 4c464e210..a3577c15f 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,14 @@ -[![Build Status](https://circleci.com/gh/prometheus-community/postgres_exporter.svg?style=svg)](https://circleci.com/gh/prometheus-community/postgres_exporter) +# PostgreSQL Server Exporter + +[![Build Status](https://github.com/prometheus-community/postgres_exporter/actions/workflows/ci.yml/badge.svg)](https://github.com/prometheus-community/postgres_exporter/actions/workflows/ci.yml) [![Coverage Status](https://coveralls.io/repos/github/prometheus-community/postgres_exporter/badge.svg?branch=master)](https://coveralls.io/github/prometheus-community/postgres_exporter?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/prometheus-community/postgres_exporter)](https://goreportcard.com/report/github.com/prometheus-community/postgres_exporter) [![Docker Pulls](https://img.shields.io/docker/pulls/prometheuscommunity/postgres-exporter.svg)](https://hub.docker.com/r/prometheuscommunity/postgres-exporter/tags) -# PostgreSQL Server Exporter Prometheus exporter for PostgreSQL server metrics. -CI Tested PostgreSQL versions: `11`, `12`, `13`, `14`, `15`, `16` +CI Tested PostgreSQL versions: `13`, `14`, `15`, `16`, `17`, `18`. ## Quick Start This package is available for Docker: @@ -135,18 +136,51 @@ This will build the docker image as `prometheuscommunity/postgres_exporter:${bra * `[no-]collector.replication_slot` Enable the `replication_slot` collector (default: enabled). +* `[no-]collector.settings` + Enable the `settings` collector (default: enabled). + +* `[no-]collector.stat_activity` + Enable the `stat_activity` collector (default: enabled). + * `[no-]collector.stat_activity_autovacuum` Enable the `stat_activity_autovacuum` collector (default: disabled). +* `[no-]collector.stat_archiver` + Enable the `stat_archiver` collector (default: enabled). + * `[no-]collector.stat_bgwriter` Enable the `stat_bgwriter` collector (default: enabled). +* `[no-]collector.stat_checkpointer` + Enable the `stat_checkpointer` collector (default: disabled). + * `[no-]collector.stat_database` Enable the `stat_database` collector (default: enabled). +* `[no-]collector.stat_progress_vacuum` + Enable the `stat_progress_vacuum` collector (default: enabled). + +* `[no-]collector.stat_replication` + Enable the `stat_replication` collector (default: enabled). + * `[no-]collector.stat_statements` Enable the `stat_statements` collector (default: disabled). +* `[no-]collector.stat_statements.include_query` + Enable selecting statement query together with queryId. (default: disabled) + +* `--collector.stat_statements.query_length` + Maximum length of the statement text. Default is 120. + +* `--collector.stat_statements.limit` + Maximum number of statements to return. Default is 100. + +* `--collector.stat_statements.exclude_databases` + Comma-separated list of database names to exclude from `pg_stat_statements` metrics. Default is none. + +* `--collector.stat_statements.exclude_users` + Comma-separated list of user names to exclude from `pg_stat_statements` metrics. Default is none. + * `[no-]collector.stat_user_tables` Enable the `stat_user_tables` collector (default: enabled). @@ -184,9 +218,6 @@ This will build the docker image as `prometheuscommunity/postgres_exporter:${bra * `disable-default-metrics` Use only metrics supplied from `queries.yaml` via `--extend.query-path`. Default is `false`. -* `disable-settings-metrics` - Use the flag if you don't want to scrape `pg_settings`. Default is `false`. - * `auto-discover-databases` (DEPRECATED) Whether to discover the databases on a server dynamically. Default is `false`. @@ -246,15 +277,20 @@ The following environment variables configure the exporter: * `DATA_SOURCE_PASS_FILE` The same as above but reads the password from a file. +* `PG_EXPORTER_COLLECTION_TIMEOUT` + Timeout duration to use when collecting the statistics, default to `1m`. + When the timeout is reached, the database connection will be dropped. + It avoids connections stacking when the database answers too slowly + (for instance if the database creates/drop a huge table and locks the tables) + and will avoid exhausting the pool of connections of the database. + Value of `0` or less than `1ms` is considered invalid and will report an error. + * `PG_EXPORTER_WEB_TELEMETRY_PATH` Path under which to expose metrics. Default is `/metrics`. * `PG_EXPORTER_DISABLE_DEFAULT_METRICS` Use only metrics supplied from `queries.yaml`. Value can be `true` or `false`. Default is `false`. -* `PG_EXPORTER_DISABLE_SETTINGS_METRICS` - Use the flag if you don't want to scrape `pg_settings`. Value can be `true` or `false`. Default is `false`. - * `PG_EXPORTER_AUTO_DISCOVER_DATABASES` (DEPRECATED) Whether to discover the databases on a server dynamically. Value can be `true` or `false`. Default is `false`. diff --git a/SECURITY.md b/SECURITY.md index fed02d85c..5e6f976db 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,4 +3,4 @@ The Prometheus security policy, including how to report vulnerabilities, can be found here: - +[https://prometheus.io/docs/operating/security/](https://prometheus.io/docs/operating/security/) diff --git a/VERSION b/VERSION index 04a373efe..41915c799 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.16.0 +0.19.1 diff --git a/cmd/postgres_exporter/main.go b/cmd/postgres_exporter/main.go index 093ddd301..93c56aec4 100644 --- a/cmd/postgres_exporter/main.go +++ b/cmd/postgres_exporter/main.go @@ -1,4 +1,4 @@ -// Copyright 2021 The Prometheus Authors +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,12 +16,14 @@ package main import ( "fmt" "net/http" + _ "net/http/pprof" "os" "strings" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/postgres_exporter/collector" "github.com/prometheus-community/postgres_exporter/config" + "github.com/prometheus-community/postgres_exporter/exporter" "github.com/prometheus/client_golang/prometheus" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -33,39 +35,33 @@ import ( ) var ( - c = config.Handler{ - Config: &config.Config{}, - } - - configFile = kingpin.Flag("config.file", "Postgres exporter configuration file.").Default("postgres_exporter.yml").String() - webConfig = kingpinflag.AddFlags(kingpin.CommandLine, ":9187") - metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").Envar("PG_EXPORTER_WEB_TELEMETRY_PATH").String() - disableDefaultMetrics = kingpin.Flag("disable-default-metrics", "Do not include default metrics.").Default("false").Envar("PG_EXPORTER_DISABLE_DEFAULT_METRICS").Bool() - disableSettingsMetrics = kingpin.Flag("disable-settings-metrics", "Do not include pg_settings metrics.").Default("false").Envar("PG_EXPORTER_DISABLE_SETTINGS_METRICS").Bool() - autoDiscoverDatabases = kingpin.Flag("auto-discover-databases", "Whether to discover the databases on a server dynamically. (DEPRECATED)").Default("false").Envar("PG_EXPORTER_AUTO_DISCOVER_DATABASES").Bool() - queriesPath = kingpin.Flag("extend.query-path", "Path to custom queries to run. (DEPRECATED)").Default("").Envar("PG_EXPORTER_EXTEND_QUERY_PATH").String() - onlyDumpMaps = kingpin.Flag("dumpmaps", "Do not run, simply dump the maps.").Bool() - constantLabelsList = kingpin.Flag("constantLabels", "A list of label=value separated by comma(,). (DEPRECATED)").Default("").Envar("PG_EXPORTER_CONSTANT_LABELS").String() - excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String() - includeDatabases = kingpin.Flag("include-databases", "A list of databases to include when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_INCLUDE_DATABASES").String() - metricPrefix = kingpin.Flag("metric-prefix", "A metric prefix can be used to have non-default (not \"pg\") prefixes for each of the metrics").Default("pg").Envar("PG_EXPORTER_METRIC_PREFIX").String() - logger = promslog.NewNopLogger() + c = newConfigHandler() + + configFile = kingpin.Flag("config.file", "Postgres exporter configuration file.").Default("postgres_exporter.yml").String() + webConfig = kingpinflag.AddFlags(kingpin.CommandLine, ":9187") + metricsPath = kingpin.Flag("web.telemetry-path", "Path under which to expose metrics.").Default("/metrics").Envar("PG_EXPORTER_WEB_TELEMETRY_PATH").String() + disableDefaultMetrics = kingpin.Flag("disable-default-metrics", "Do not include default metrics.").Default("false").Envar("PG_EXPORTER_DISABLE_DEFAULT_METRICS").Bool() + autoDiscoverDatabases = kingpin.Flag("auto-discover-databases", "Whether to discover the databases on a server dynamically. (DEPRECATED)").Default("false").Envar("PG_EXPORTER_AUTO_DISCOVER_DATABASES").Bool() + queriesPath = kingpin.Flag("extend.query-path", "Path to custom queries to run. (DEPRECATED)").Default("").Envar("PG_EXPORTER_EXTEND_QUERY_PATH").String() + onlyDumpMaps = kingpin.Flag("dumpmaps", "Do not run, simply dump the maps.").Bool() + constantLabelsList = kingpin.Flag("constantLabels", "A list of label=value separated by comma(,). (DEPRECATED)").Default("").Envar("PG_EXPORTER_CONSTANT_LABELS").String() + excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String() + includeDatabases = kingpin.Flag("include-databases", "A list of databases to include when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_INCLUDE_DATABASES").String() + metricPrefix = kingpin.Flag("metric-prefix", "A metric prefix can be used to have non-default (not \"pg\") prefixes for each of the metrics").Default("pg").Envar("PG_EXPORTER_METRIC_PREFIX").String() + collectionTimeout = kingpin.Flag("collection-timeout", "Timeout for collecting the statistics when the database is slow").Default("1m").Envar("PG_EXPORTER_COLLECTION_TIMEOUT").String() + logger = promslog.NewNopLogger() ) -// Metric name parts. -const ( - // Namespace for all metrics. - namespace = "pg" - // Subsystems. - exporter = "exporter" - // The name of the exporter. - exporterName = "postgres_exporter" - // Metric label used for static string data thats handy to send to Prometheus - // e.g. version - staticLabelName = "static" - // Metric label used for server identification. - serverLabelName = "server" -) +// The name of the exporter. +const exporterName = "postgres_exporter" + +func newConfigHandler() *config.Handler { + handler, err := config.NewHandler(prometheus.DefaultRegisterer) + if err != nil { + panic(err) + } + return handler +} func main() { kingpin.Version(version.Print(exporterName)) @@ -76,7 +72,7 @@ func main() { logger = promslog.New(promslogConfig) if *onlyDumpMaps { - dumpMaps() + exporter.DumpMaps() return } @@ -85,7 +81,7 @@ func main() { logger.Warn("Error loading config", "err", err) } - dsns, err := getDataSources() + dsns, err := exporter.GetDataSources() if err != nil { logger.Error("Failed reading data sources", "err", err.Error()) os.Exit(1) @@ -106,19 +102,19 @@ func main() { logger.Warn("Constant labels on all metrics is DEPRECATED") } - opts := []ExporterOpt{ - DisableDefaultMetrics(*disableDefaultMetrics), - DisableSettingsMetrics(*disableSettingsMetrics), - AutoDiscoverDatabases(*autoDiscoverDatabases), - WithUserQueriesPath(*queriesPath), - WithConstantLabels(*constantLabelsList), - ExcludeDatabases(excludedDatabases), - IncludeDatabases(*includeDatabases), + opts := []exporter.ExporterOpt{ + exporter.DisableDefaultMetrics(*disableDefaultMetrics), + exporter.AutoDiscoverDatabases(*autoDiscoverDatabases), + exporter.WithUserQueriesPath(*queriesPath), + exporter.WithConstantLabels(*constantLabelsList), + exporter.ExcludeDatabases(excludedDatabases), + exporter.IncludeDatabases(*includeDatabases), + exporter.WithMetricPrefix(*metricPrefix), } - exporter := NewExporter(dsns, opts...) + exporter := exporter.NewExporter(dsns, logger, opts...) defer func() { - exporter.servers.Close() + exporter.CloseServers() }() prometheus.MustRegister(versioncollector.NewCollector(exporterName)) @@ -136,7 +132,7 @@ func main() { excludedDatabases, dsn, []string{}, - ) + collector.WithCollectionTimeout(*collectionTimeout)) if err != nil { logger.Warn("Failed to create PostgresCollector", "err", err.Error()) } else { diff --git a/cmd/postgres_exporter/pg_setting_test.go b/cmd/postgres_exporter/pg_setting_test.go deleted file mode 100644 index 0e010444d..000000000 --- a/cmd/postgres_exporter/pg_setting_test.go +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright 2021 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !integration -// +build !integration - -package main - -import ( - "github.com/prometheus/client_golang/prometheus" - dto "github.com/prometheus/client_model/go" - . "gopkg.in/check.v1" -) - -type PgSettingSuite struct{} - -var _ = Suite(&PgSettingSuite{}) - -var fixtures = []fixture{ - { - p: pgSetting{ - name: "seconds_fixture_metric", - setting: "5", - unit: "s", - shortDesc: "Foo foo foo", - vartype: "integer", - }, - n: normalised{ - val: 5, - unit: "seconds", - err: "", - }, - d: `Desc{fqName: "pg_settings_seconds_fixture_metric_seconds", help: "Server Parameter: seconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, - v: 5, - }, - { - p: pgSetting{ - name: "milliseconds_fixture_metric", - setting: "5000", - unit: "ms", - shortDesc: "Foo foo foo", - vartype: "integer", - }, - n: normalised{ - val: 5, - unit: "seconds", - err: "", - }, - d: `Desc{fqName: "pg_settings_milliseconds_fixture_metric_seconds", help: "Server Parameter: milliseconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, - v: 5, - }, - { - p: pgSetting{ - name: "eight_kb_fixture_metric", - setting: "17", - unit: "8kB", - shortDesc: "Foo foo foo", - vartype: "integer", - }, - n: normalised{ - val: 139264, - unit: "bytes", - err: "", - }, - d: `Desc{fqName: "pg_settings_eight_kb_fixture_metric_bytes", help: "Server Parameter: eight_kb_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, - v: 139264, - }, - { - p: pgSetting{ - name: "16_kb_real_fixture_metric", - setting: "3.0", - unit: "16kB", - shortDesc: "Foo foo foo", - vartype: "real", - }, - n: normalised{ - val: 49152, - unit: "bytes", - err: "", - }, - d: `Desc{fqName: "pg_settings_16_kb_real_fixture_metric_bytes", help: "Server Parameter: 16_kb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, - v: 49152, - }, - { - p: pgSetting{ - name: "16_mb_real_fixture_metric", - setting: "3.0", - unit: "16MB", - shortDesc: "Foo foo foo", - vartype: "real", - }, - n: normalised{ - val: 5.0331648e+07, - unit: "bytes", - err: "", - }, - d: `Desc{fqName: "pg_settings_16_mb_real_fixture_metric_bytes", help: "Server Parameter: 16_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, - v: 5.0331648e+07, - }, - { - p: pgSetting{ - name: "32_mb_real_fixture_metric", - setting: "3.0", - unit: "32MB", - shortDesc: "Foo foo foo", - vartype: "real", - }, - n: normalised{ - val: 1.00663296e+08, - unit: "bytes", - err: "", - }, - d: `Desc{fqName: "pg_settings_32_mb_real_fixture_metric_bytes", help: "Server Parameter: 32_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, - v: 1.00663296e+08, - }, - { - p: pgSetting{ - name: "64_mb_real_fixture_metric", - setting: "3.0", - unit: "64MB", - shortDesc: "Foo foo foo", - vartype: "real", - }, - n: normalised{ - val: 2.01326592e+08, - unit: "bytes", - err: "", - }, - d: `Desc{fqName: "pg_settings_64_mb_real_fixture_metric_bytes", help: "Server Parameter: 64_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, - v: 2.01326592e+08, - }, - { - p: pgSetting{ - name: "bool_on_fixture_metric", - setting: "on", - unit: "", - shortDesc: "Foo foo foo", - vartype: "bool", - }, - n: normalised{ - val: 1, - unit: "", - err: "", - }, - d: `Desc{fqName: "pg_settings_bool_on_fixture_metric", help: "Server Parameter: bool_on_fixture_metric", constLabels: {}, variableLabels: {}}`, - v: 1, - }, - { - p: pgSetting{ - name: "bool_off_fixture_metric", - setting: "off", - unit: "", - shortDesc: "Foo foo foo", - vartype: "bool", - }, - n: normalised{ - val: 0, - unit: "", - err: "", - }, - d: `Desc{fqName: "pg_settings_bool_off_fixture_metric", help: "Server Parameter: bool_off_fixture_metric", constLabels: {}, variableLabels: {}}`, - v: 0, - }, - { - p: pgSetting{ - name: "special_minus_one_value", - setting: "-1", - unit: "d", - shortDesc: "foo foo foo", - vartype: "integer", - }, - n: normalised{ - val: -1, - unit: "seconds", - err: "", - }, - d: `Desc{fqName: "pg_settings_special_minus_one_value_seconds", help: "Server Parameter: special_minus_one_value [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, - v: -1, - }, - { - p: pgSetting{ - name: "rds.rds_superuser_reserved_connections", - setting: "2", - unit: "", - shortDesc: "Sets the number of connection slots reserved for rds_superusers.", - vartype: "integer", - }, - n: normalised{ - val: 2, - unit: "", - err: "", - }, - d: `Desc{fqName: "pg_settings_rds_rds_superuser_reserved_connections", help: "Server Parameter: rds.rds_superuser_reserved_connections", constLabels: {}, variableLabels: {}}`, - v: 2, - }, - { - p: pgSetting{ - name: "unknown_unit", - setting: "10", - unit: "nonexistent", - shortDesc: "foo foo foo", - vartype: "integer", - }, - n: normalised{ - val: 10, - unit: "", - err: `Unknown unit for runtime variable: "nonexistent"`, - }, - }, -} - -func (s *PgSettingSuite) TestNormaliseUnit(c *C) { - for _, f := range fixtures { - switch f.p.vartype { - case "integer", "real": - val, unit, err := f.p.normaliseUnit() - - c.Check(val, Equals, f.n.val) - c.Check(unit, Equals, f.n.unit) - - if err == nil { - c.Check("", Equals, f.n.err) - } else { - c.Check(err.Error(), Equals, f.n.err) - } - } - } -} - -func (s *PgSettingSuite) TestMetric(c *C) { - defer func() { - if r := recover(); r != nil { - if r.(error).Error() != `Unknown unit for runtime variable: "nonexistent"` { - panic(r) - } - } - }() - - for _, f := range fixtures { - d := &dto.Metric{} - m := f.p.metric(prometheus.Labels{}) - m.Write(d) // nolint: errcheck - - c.Check(m.Desc().String(), Equals, f.d) - c.Check(d.GetGauge().GetValue(), Equals, f.v) - } -} - -type normalised struct { - val float64 - unit string - err string -} - -type fixture struct { - p pgSetting - n normalised - d string - v float64 -} diff --git a/cmd/postgres_exporter/probe.go b/cmd/postgres_exporter/probe.go index 2c8c7652e..043c5a191 100644 --- a/cmd/postgres_exporter/probe.go +++ b/cmd/postgres_exporter/probe.go @@ -20,6 +20,7 @@ import ( "github.com/prometheus-community/postgres_exporter/collector" "github.com/prometheus-community/postgres_exporter/config" + "github.com/prometheus-community/postgres_exporter/exporter" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -64,20 +65,20 @@ func handleProbe(logger *slog.Logger, excludeDatabases []string) http.HandlerFun registry := prometheus.NewRegistry() - opts := []ExporterOpt{ - DisableDefaultMetrics(*disableDefaultMetrics), - DisableSettingsMetrics(*disableSettingsMetrics), - AutoDiscoverDatabases(*autoDiscoverDatabases), - WithUserQueriesPath(*queriesPath), - WithConstantLabels(*constantLabelsList), - ExcludeDatabases(excludeDatabases), - IncludeDatabases(*includeDatabases), + opts := []exporter.ExporterOpt{ + exporter.DisableDefaultMetrics(*disableDefaultMetrics), + exporter.AutoDiscoverDatabases(*autoDiscoverDatabases), + exporter.WithUserQueriesPath(*queriesPath), + exporter.WithConstantLabels(*constantLabelsList), + exporter.ExcludeDatabases(excludeDatabases), + exporter.IncludeDatabases(*includeDatabases), + exporter.WithMetricPrefix(*metricPrefix), } dsns := []string{dsn.GetConnectionString()} - exporter := NewExporter(dsns, opts...) + exporter := exporter.NewExporter(dsns, logger, opts...) defer func() { - exporter.servers.Close() + exporter.CloseServers() }() registry.MustRegister(exporter) diff --git a/collector/collector.go b/collector/collector.go index f2102d5ef..130f5313b 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -15,6 +15,7 @@ package collector import ( "context" + "database/sql" "errors" "fmt" "log/slog" @@ -37,8 +38,9 @@ const ( // Namespace for all metrics. namespace = "pg" - defaultEnabled = true - defaultDisabled = false + collectorFlagPrefix = "collector." + defaultEnabled = true + defaultDisabled = false ) var ( @@ -74,7 +76,7 @@ func registerCollector(name string, isDefaultEnabled bool, createFunc func(colle } // Create flag for this collector - flagName := fmt.Sprintf("collector.%s", name) + flagName := collectorFlagPrefix + name flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", name, helpDefaultState) defaultValue := fmt.Sprintf("%v", isDefaultEnabled) @@ -90,7 +92,8 @@ type PostgresCollector struct { Collectors map[string]Collector logger *slog.Logger - instance *instance + instance *instance + CollectionTimeout time.Duration } type Option func(*PostgresCollector) error @@ -156,6 +159,20 @@ func NewPostgresCollector(logger *slog.Logger, excludeDatabases []string, dsn st return p, nil } +func WithCollectionTimeout(s string) Option { + return func(e *PostgresCollector) error { + duration, err := time.ParseDuration(s) + if err != nil { + return err + } + if duration < 1*time.Millisecond { + return errors.New("timeout must be greater than 1ms") + } + e.CollectionTimeout = duration + return nil + } +} + // Describe implements the prometheus.Collector interface. func (p PostgresCollector) Describe(ch chan<- *prometheus.Desc) { ch <- scrapeDurationDesc @@ -164,18 +181,23 @@ func (p PostgresCollector) Describe(ch chan<- *prometheus.Desc) { // Collect implements the prometheus.Collector interface. func (p PostgresCollector) Collect(ch chan<- prometheus.Metric) { - ctx := context.TODO() - // copy the instance so that concurrent scrapes have independent instances inst := p.instance.copy() // Set up the database connection for the collector. err := inst.setup() + defer inst.Close() if err != nil { p.logger.Error("Error opening connection to database", "err", err) return } - defer inst.Close() + p.collectFromConnection(inst, ch) +} + +func (p PostgresCollector) collectFromConnection(inst *instance, ch chan<- prometheus.Metric) { + // Eventually, connect this to the http scraping context + ctx, cancel := context.WithTimeout(context.Background(), p.CollectionTimeout) + defer cancel() wg := sync.WaitGroup{} wg.Add(len(p.Collectors)) @@ -188,6 +210,10 @@ func (p PostgresCollector) Collect(ch chan<- prometheus.Metric) { wg.Wait() } +func (p *PostgresCollector) Close() error { + return p.instance.Close() +} + func execute(ctx context.Context, name string, c Collector, instance *instance, ch chan<- prometheus.Metric, logger *slog.Logger) { begin := time.Now() err := c.Update(ctx, instance, ch) @@ -227,3 +253,11 @@ var ErrNoData = errors.New("collector returned no data") func IsNoDataError(err error) bool { return err == ErrNoData } + +func Int32(m sql.NullInt32) float64 { + mM := 0.0 + if m.Valid { + mM = float64(m.Int32) + } + return mM +} diff --git a/collector/collector_test.go b/collector/collector_test.go index 18101f00e..984b2c0ff 100644 --- a/collector/collector_test.go +++ b/collector/collector_test.go @@ -14,9 +14,13 @@ package collector import ( "strings" + "testing" + "time" + "github.com/DATA-DOG/go-sqlmock" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" ) type labelMap map[string]string @@ -48,15 +52,84 @@ func readMetric(m prometheus.Metric) MetricResult { func sanitizeQuery(q string) string { q = strings.Join(strings.Fields(q), " ") - q = strings.Replace(q, "(", "\\(", -1) - q = strings.Replace(q, "?", "\\?", -1) - q = strings.Replace(q, ")", "\\)", -1) - q = strings.Replace(q, "[", "\\[", -1) - q = strings.Replace(q, "]", "\\]", -1) - q = strings.Replace(q, "{", "\\{", -1) - q = strings.Replace(q, "}", "\\}", -1) - q = strings.Replace(q, "*", "\\*", -1) - q = strings.Replace(q, "^", "\\^", -1) - q = strings.Replace(q, "$", "\\$", -1) + q = strings.ReplaceAll(q, "(", "\\(") + q = strings.ReplaceAll(q, "?", "\\?") + q = strings.ReplaceAll(q, ")", "\\)") + q = strings.ReplaceAll(q, "[", "\\[") + q = strings.ReplaceAll(q, "]", "\\]") + q = strings.ReplaceAll(q, "{", "\\{") + q = strings.ReplaceAll(q, "}", "\\}") + q = strings.ReplaceAll(q, "*", "\\*") + q = strings.ReplaceAll(q, "^", "\\^") + q = strings.ReplaceAll(q, "$", "\\$") return q } + +// We ensure that when the database respond after a long time +// The collection process still occurs in a predictable manner +// Will avoid accumulation of queries on a completely frozen DB +func TestWithConnectionTimeout(t *testing.T) { + + timeoutForQuery := time.Duration(100 * time.Millisecond) + + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{"pg_roles.rolname", "pg_roles.rolconnlimit"} + rows := sqlmock.NewRows(columns).AddRow("role1", 2) + mock.ExpectQuery(pgRolesConnectionLimitsQuery). + WillDelayFor(30 * time.Second). + WillReturnRows(rows) + + log_config := promslog.Config{} + + logger := promslog.New(&log_config) + + c, err := NewPostgresCollector(logger, []string{}, "postgresql://local", []string{}, WithCollectionTimeout(timeoutForQuery.String())) + if err != nil { + t.Fatalf("error creating NewPostgresCollector: %s", err) + } + collector_config := collectorConfig{ + logger: logger, + excludeDatabases: []string{}, + } + + collector, err := NewPGRolesCollector(collector_config) + if err != nil { + t.Fatalf("error creating collector: %s", err) + } + c.Collectors["test"] = collector + c.instance = inst + + ch := make(chan prometheus.Metric) + defer close(ch) + + go func() { + for { + <-ch + time.Sleep(1 * time.Millisecond) + } + }() + + startTime := time.Now() + c.collectFromConnection(inst, ch) + elapsed := time.Since(startTime) + + if elapsed <= timeoutForQuery { + t.Errorf("elapsed time was %v, should be bigger than timeout=%v", elapsed, timeoutForQuery) + } + + // Ensure we took more than timeout, but not too much + if elapsed >= timeoutForQuery+500*time.Millisecond { + t.Errorf("elapsed time was %v, should not be much bigger than timeout=%v", elapsed, timeoutForQuery) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_buffercache_summary.go b/collector/pg_buffercache_summary.go new file mode 100644 index 000000000..8b0e0f007 --- /dev/null +++ b/collector/pg_buffercache_summary.go @@ -0,0 +1,135 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const buffercacheSummarySubsystem = "buffercache_summary" + +func init() { + registerCollector(buffercacheSummarySubsystem, defaultDisabled, NewBuffercacheSummaryCollector) +} + +// BuffercacheSummaryCollector collects stats from pg_buffercache: https://www.postgresql.org/docs/current/pgbuffercache.html. +// +// It depends on the extension being loaded with +// +// create extension pg_buffercache; +// +// It does not take locks, see the PG docs above. +type BuffercacheSummaryCollector struct { + log *slog.Logger +} + +func NewBuffercacheSummaryCollector(config collectorConfig) (Collector, error) { + return &BuffercacheSummaryCollector{ + log: config.logger, + }, nil +} + +var ( + buffersUsedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_used"), + "Number of used shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersUnusedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_unused"), + "Number of unused shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersDirtyDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_dirty"), + "Number of dirty shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersPinnedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_pinned"), + "Number of pinned shared buffers", + []string{}, + prometheus.Labels{}, + ) + usageCountAvgDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "usagecount_avg"), + "Average usage count of used shared buffers", + []string{}, + prometheus.Labels{}, + ) + + buffercacheQuery = ` + SELECT + buffers_used, + buffers_unused, + buffers_dirty, + buffers_pinned, + usagecount_avg + FROM + pg_buffercache_summary() + ` +) + +// Update implements Collector +// It is called by the Prometheus registry when collecting metrics. +func (c BuffercacheSummaryCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + // pg_buffercache_summary is only in v16, and we don't need support for earlier currently. + if !instance.version.GE(semver.MustParse("16.0.0")) { + return nil + } + db := instance.getDB() + rows, err := db.QueryContext(ctx, buffercacheQuery) + if err != nil { + return err + } + defer rows.Close() + + var used, unused, dirty, pinned sql.NullInt32 + var usagecountAvg sql.NullFloat64 + + for rows.Next() { + if err := rows.Scan( + &used, + &unused, + &dirty, + &pinned, + &usagecountAvg, + ); err != nil { + return err + } + + usagecountAvgMetric := 0.0 + if usagecountAvg.Valid { + usagecountAvgMetric = usagecountAvg.Float64 + } + ch <- prometheus.MustNewConstMetric( + usageCountAvgDesc, + prometheus.GaugeValue, + usagecountAvgMetric) + ch <- prometheus.MustNewConstMetric(buffersUsedDesc, prometheus.GaugeValue, Int32(used)) + ch <- prometheus.MustNewConstMetric(buffersUnusedDesc, prometheus.GaugeValue, Int32(unused)) + ch <- prometheus.MustNewConstMetric(buffersDirtyDesc, prometheus.GaugeValue, Int32(dirty)) + ch <- prometheus.MustNewConstMetric(buffersPinnedDesc, prometheus.GaugeValue, Int32(pinned)) + } + + return rows.Err() +} diff --git a/collector/pg_buffercache_summary_test.go b/collector/pg_buffercache_summary_test.go new file mode 100644 index 000000000..86d91751f --- /dev/null +++ b/collector/pg_buffercache_summary_test.go @@ -0,0 +1,73 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestBuffercacheSummaryCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + columns := []string{ + "buffers_used", + "buffers_unused", + "buffers_dirty", + "buffers_pinned", + "usagecount_avg"} + + rows := sqlmock.NewRows(columns).AddRow(123, 456, 789, 234, 56.6778) + + mock.ExpectQuery(sanitizeQuery(buffercacheQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := BuffercacheSummaryCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 56.6778}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 123}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 456}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 789}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 234}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_database.go b/collector/pg_database.go index 4c0972080..7f98748f4 100644 --- a/collector/pg_database.go +++ b/collector/pg_database.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" "log/slog" + "slices" "github.com/prometheus/client_golang/prometheus" ) @@ -102,7 +103,7 @@ func (c PGDatabaseCollector) Update(ctx context.Context, instance *instance, ch // Ignore excluded databases // Filtering is done here instead of in the query to avoid // a complicated NOT IN query with a variable number of parameters - if sliceContains(c.excludedDatabases, database) { + if slices.Contains(c.excludedDatabases, database) { continue } @@ -138,12 +139,3 @@ func (c PGDatabaseCollector) Update(ctx context.Context, instance *instance, ch } return rows.Err() } - -func sliceContains(slice []string, s string) bool { - for _, item := range slice { - if item == s { - return true - } - } - return false -} diff --git a/collector/pg_long_running_transactions.go b/collector/pg_long_running_transactions.go index 846feaeed..ac1eaa70e 100644 --- a/collector/pg_long_running_transactions.go +++ b/collector/pg_long_running_transactions.go @@ -15,6 +15,7 @@ package collector import ( "context" + "database/sql" "log/slog" "github.com/prometheus/client_golang/prometheus" @@ -51,10 +52,13 @@ var ( longRunningTransactionsQuery = ` SELECT - COUNT(*) as transactions, - MAX(EXTRACT(EPOCH FROM clock_timestamp())) AS oldest_timestamp_seconds - FROM pg_catalog.pg_stat_activity - WHERE state is distinct from 'idle' AND query not like 'autovacuum:%' + COUNT(*) as transactions, + MAX(EXTRACT(EPOCH FROM clock_timestamp() - pg_stat_activity.xact_start)) AS oldest_timestamp_seconds +FROM pg_catalog.pg_stat_activity +WHERE state IS DISTINCT FROM 'idle' +AND query NOT LIKE 'autovacuum:%' +AND pg_stat_activity.xact_start IS NOT NULL +AND pid <> pg_backend_pid(); ` ) @@ -69,12 +73,20 @@ func (PGLongRunningTransactionsCollector) Update(ctx context.Context, instance * defer rows.Close() for rows.Next() { - var transactions, ageInSeconds float64 + var transactions float64 + var ageInSeconds sql.NullFloat64 if err := rows.Scan(&transactions, &ageInSeconds); err != nil { return err } + // If there are no long running transactions, ageInSeconds will be NULL + // so we set it to 0 + age := 0.0 + if ageInSeconds.Valid { + age = ageInSeconds.Float64 + } + ch <- prometheus.MustNewConstMetric( longRunningTransactionsCount, prometheus.GaugeValue, @@ -83,7 +95,7 @@ func (PGLongRunningTransactionsCollector) Update(ctx context.Context, instance * ch <- prometheus.MustNewConstMetric( longRunningTransactionsAgeInSeconds, prometheus.GaugeValue, - ageInSeconds, + age, ) } if err := rows.Err(); err != nil { diff --git a/collector/pg_long_running_transactions_test.go b/collector/pg_long_running_transactions_test.go index eedda7c65..ba7bc6a52 100644 --- a/collector/pg_long_running_transactions_test.go +++ b/collector/pg_long_running_transactions_test.go @@ -61,3 +61,44 @@ func TestPGLongRunningTransactionsCollector(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPGLongRunningTransactionsCollectorNull(t *testing.T) { + // Test when no long running transactions are present + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + inst := &instance{db: db} + columns := []string{ + "transactions", + "age_in_seconds", + } + rows := sqlmock.NewRows(columns). + AddRow(0, nil) + + mock.ExpectQuery(sanitizeQuery(longRunningTransactionsQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGLongRunningTransactionsCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGLongRunningTransactionsCollector.Update: %s", err) + } + }() + expected := []MetricResult{ + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_process_idle.go b/collector/pg_process_idle.go index 7f3ff6f0f..3cb08f114 100644 --- a/collector/pg_process_idle.go +++ b/collector/pg_process_idle.go @@ -56,6 +56,7 @@ func (PGProcessIdleCollector) Update(ctx context.Context, instance *instance, ch COUNT(*) AS process_idle_seconds_count FROM pg_stat_activity WHERE state ~ '^idle' + AND pid <> pg_backend_pid() GROUP BY state, application_name ), buckets AS ( @@ -72,6 +73,7 @@ func (PGProcessIdleCollector) Update(ctx context.Context, instance *instance, ch FROM pg_stat_activity, UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le + WHERE pid <> pg_backend_pid() GROUP BY state, application_name, le ORDER BY state, application_name, le ) diff --git a/collector/pg_replication.go b/collector/pg_replication.go index 6067cc9b1..7f8b2fbd7 100644 --- a/collector/pg_replication.go +++ b/collector/pg_replication.go @@ -51,6 +51,15 @@ var ( "Indicates if the server is a replica", []string{}, nil, ) + pgReplicationLastReplay = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + replicationSubsystem, + "last_replay_seconds", + ), + "Age of last replay in seconds", + []string{}, nil, + ) pgReplicationQuery = `SELECT CASE @@ -61,7 +70,8 @@ var ( CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 - END as is_replica` + END as is_replica, + GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) as last_replay` ) func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { @@ -72,7 +82,8 @@ func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, var lag float64 var isReplica int64 - err := row.Scan(&lag, &isReplica) + var replayAge float64 + err := row.Scan(&lag, &isReplica, &replayAge) if err != nil { return err } @@ -84,5 +95,9 @@ func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, pgReplicationIsReplica, prometheus.GaugeValue, float64(isReplica), ) + ch <- prometheus.MustNewConstMetric( + pgReplicationLastReplay, + prometheus.GaugeValue, replayAge, + ) return nil } diff --git a/collector/pg_replication_slot.go b/collector/pg_replication_slot.go index 27ccddefd..e6c9773eb 100644 --- a/collector/pg_replication_slot.go +++ b/collector/pg_replication_slot.go @@ -18,6 +18,7 @@ import ( "database/sql" "log/slog" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -81,8 +82,18 @@ var ( "availability of WAL files claimed by this slot", []string{"slot_name", "slot_type", "wal_status"}, nil, ) - pgReplicationSlotQuery = `SELECT + slot_name, + slot_type, + CASE WHEN pg_is_in_recovery() THEN + pg_last_wal_receive_lsn() - '0/0' + ELSE + pg_current_wal_lsn() - '0/0' + END AS current_wal_lsn, + COALESCE(confirmed_flush_lsn, '0/0') - '0/0' AS confirmed_flush_lsn, + active + FROM pg_replication_slots;` + pgReplicationSlotNewQuery = `SELECT slot_name, slot_type, CASE WHEN pg_is_in_recovery() THEN @@ -98,9 +109,15 @@ var ( ) func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + query := pgReplicationSlotQuery + abovePG13 := instance.version.GTE(semver.MustParse("13.0.0")) + if abovePG13 { + query = pgReplicationSlotNewQuery + } + db := instance.getDB() rows, err := db.QueryContext(ctx, - pgReplicationSlotQuery) + query) if err != nil { return err } @@ -114,7 +131,22 @@ func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance var isActive sql.NullBool var safeWalSize sql.NullInt64 var walStatus sql.NullString - if err := rows.Scan(&slotName, &slotType, &walLSN, &flushLSN, &isActive, &safeWalSize, &walStatus); err != nil { + + r := []any{ + &slotName, + &slotType, + &walLSN, + &flushLSN, + &isActive, + } + + if abovePG13 { + r = append(r, &safeWalSize) + r = append(r, &walStatus) + } + + err := rows.Scan(r...) + if err != nil { return err } diff --git a/collector/pg_replication_slot_test.go b/collector/pg_replication_slot_test.go index 174743ac3..981b5db62 100644 --- a/collector/pg_replication_slot_test.go +++ b/collector/pg_replication_slot_test.go @@ -17,6 +17,7 @@ import ( "testing" "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/smartystreets/goconvey/convey" @@ -29,12 +30,12 @@ func TestPgReplicationSlotCollectorActive(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). AddRow("test_slot", "physical", 5, 3, true, 323906992, "reserved") - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -72,12 +73,12 @@ func TestPgReplicationSlotCollectorInActive(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). AddRow("test_slot", "physical", 6, 12, false, -4000, "extended") - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -115,12 +116,12 @@ func TestPgReplicationSlotCollectorActiveNil(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). AddRow("test_slot", "physical", 6, 12, nil, nil, "lost") - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -156,12 +157,12 @@ func TestPgReplicationSlotCollectorTestNilValues(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). AddRow(nil, nil, nil, nil, true, nil, nil) - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { diff --git a/collector/pg_replication_test.go b/collector/pg_replication_test.go index b6df698e3..a48e9fd69 100644 --- a/collector/pg_replication_test.go +++ b/collector/pg_replication_test.go @@ -31,9 +31,9 @@ func TestPgReplicationCollector(t *testing.T) { inst := &instance{db: db} - columns := []string{"lag", "is_replica"} + columns := []string{"lag", "is_replica", "last_replay"} rows := sqlmock.NewRows(columns). - AddRow(1000, 1) + AddRow(1000, 1, 3) mock.ExpectQuery(sanitizeQuery(pgReplicationQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -49,6 +49,7 @@ func TestPgReplicationCollector(t *testing.T) { expected := []MetricResult{ {labels: labelMap{}, value: 1000, metricType: dto.MetricType_GAUGE}, {labels: labelMap{}, value: 1, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{}, value: 3, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { diff --git a/cmd/postgres_exporter/pg_setting.go b/collector/pg_setting.go similarity index 61% rename from cmd/postgres_exporter/pg_setting.go rename to collector/pg_setting.go index b02416a7c..73ef543fe 100644 --- a/cmd/postgres_exporter/pg_setting.go +++ b/collector/pg_setting.go @@ -1,4 +1,4 @@ -// Copyright 2021 The Prometheus Authors +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -11,10 +11,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package collector import ( + "context" "fmt" + "log/slog" "math" "strconv" "strings" @@ -22,55 +24,74 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +const settingsSubsystem = "settings" + +func init() { + registerCollector(settingsSubsystem, defaultEnabled, NewPGSettingsCollector) +} + +type PGSettingsCollector struct { + log *slog.Logger +} + +func NewPGSettingsCollector(config collectorConfig) (Collector, error) { + return &PGSettingsCollector{log: config.logger}, nil +} + var ( settingUnits = []string{ "ms", "s", "min", "h", "d", "B", "kB", "MB", "GB", "TB", } -) - -// Query the pg_settings view containing runtime variables -func querySettings(ch chan<- prometheus.Metric, server *Server) error { - logger.Debug("Querying pg_setting view", "server", server) // pg_settings docs: https://www.postgresql.org/docs/current/static/view-pg-settings.html // // NOTE: If you add more vartypes here, you must update the supported - // types in normaliseUnit() below - query := "SELECT name, setting, COALESCE(unit, ''), short_desc, vartype FROM pg_settings WHERE vartype IN ('bool', 'integer', 'real') AND name != 'sync_commit_cancel_wait';" + // types in normaliseUnit() below. + // + // Settings intentionally ignored due to invalid format: + // - `sync_commit_cancel_wait`, specific to Azure Postgres, see https://github.com/prometheus-community/postgres_exporter/issues/523 + // - `google_dataplex.max_messages`, specific to Google Cloud SQL, see https://github.com/prometheus-community/postgres_exporter/issues/1240 + pgSettingsQuery = "SELECT name, setting, COALESCE(unit, ''), short_desc, vartype FROM pg_settings WHERE vartype IN ('bool', 'integer', 'real') AND name NOT IN ('sync_commit_cancel_wait', 'google_dataplex.max_messages');" +) - rows, err := server.db.Query(query) +// Update implements Collector and exposes PostgreSQL runtime settings. +func (c PGSettingsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + rows, err := db.QueryContext(ctx, pgSettingsQuery) if err != nil { - return fmt.Errorf("Error running query on database %q: %s %v", server, namespace, err) + return err } - defer rows.Close() // nolint: errcheck + defer rows.Close() for rows.Next() { s := &pgSetting{} - err = rows.Scan(&s.name, &s.setting, &s.unit, &s.shortDesc, &s.vartype) + if err := rows.Scan(&s.name, &s.setting, &s.unit, &s.shortDesc, &s.vartype); err != nil { + return err + } + metric, err := s.metric() if err != nil { - return fmt.Errorf("Error retrieving rows on %q: %s %v", server, namespace, err) + c.log.Warn("Error normalising unit for setting", "setting", s.name, "value", s.setting, "unit", s.unit, "error", err) + continue } - - ch <- s.metric(server.labels) + ch <- metric } - return nil + return rows.Err() } -// pgSetting is represents a PostgreSQL runtime variable as returned by the +// pgSetting represents a PostgreSQL runtime variable as returned by the // pg_settings view. type pgSetting struct { name, setting, unit, shortDesc, vartype string } -func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { +func (s *pgSetting) metric() (prometheus.Metric, error) { var ( err error - name = strings.Replace(s.name, ".", "_", -1) + name = strings.ReplaceAll(strings.ReplaceAll(s.name, ".", "_"), "-", "_") unit = s.unit // nolint: ineffassign shortDesc = fmt.Sprintf("Server Parameter: %s", s.name) - subsystem = "settings" val float64 ) @@ -81,9 +102,7 @@ func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { } case "integer", "real": if val, unit, err = s.normaliseUnit(); err != nil { - // Panic, since we should recognise all units - // and don't want to silently exlude metrics - panic(err) + return nil, err } if len(unit) > 0 { @@ -91,12 +110,11 @@ func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { shortDesc = fmt.Sprintf("%s [Units converted to %s.]", shortDesc, unit) } default: - // Panic because we got a type we didn't ask for - panic(fmt.Sprintf("Unsupported vartype %q", s.vartype)) + return nil, fmt.Errorf("pgsetting: unsupported vartype %q", s.vartype) } - desc := newDesc(subsystem, name, shortDesc, labels) - return prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val) + desc := prometheus.NewDesc(prometheus.BuildFQName(namespace, settingsSubsystem, name), shortDesc, nil, nil) + return prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val), nil } // Removes units from any of the setting values. @@ -119,7 +137,7 @@ func (s *pgSetting) normaliseUnit() (val float64, unit string, err error) { val, err = strconv.ParseFloat(s.setting, 64) if err != nil { - return val, unit, fmt.Errorf("Error converting setting %q value %q to float: %s", s.name, s.setting, err) + return val, unit, fmt.Errorf("error converting setting %q value %q to float: %s", s.name, s.setting, err) } // Units defined in: https://www.postgresql.org/docs/current/static/config-setting.html @@ -131,7 +149,7 @@ func (s *pgSetting) normaliseUnit() (val float64, unit string, err error) { case "B", "kB", "MB", "GB", "TB", "1kB", "2kB", "4kB", "8kB", "16kB", "32kB", "64kB", "16MB", "32MB", "64MB": unit = "bytes" default: - err = fmt.Errorf("Unknown unit for runtime variable: %q", s.unit) + err = fmt.Errorf("unknown unit for runtime variable: %q", s.unit) return } diff --git a/collector/pg_setting_test.go b/collector/pg_setting_test.go new file mode 100644 index 000000000..ba66980cb --- /dev/null +++ b/collector/pg_setting_test.go @@ -0,0 +1,200 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" +) + +func TestPGSettingNormaliseUnit(t *testing.T) { + tests := []struct { + name string + setting pgSetting + wantValue float64 + wantUnit string + wantErr string + }{ + { + name: "seconds", + setting: pgSetting{name: "seconds_fixture_metric", setting: "5", unit: "s", vartype: "integer"}, + wantValue: 5, + wantUnit: "seconds", + }, + { + name: "milliseconds", + setting: pgSetting{name: "milliseconds_fixture_metric", setting: "5000", unit: "ms", vartype: "integer"}, + wantValue: 5, + wantUnit: "seconds", + }, + { + name: "8kB", + setting: pgSetting{name: "eight_kb_fixture_metric", setting: "17", unit: "8kB", vartype: "integer"}, + wantValue: 139264, + wantUnit: "bytes", + }, + { + name: "special minus one", + setting: pgSetting{name: "special_minus_one_value", setting: "-1", unit: "d", vartype: "integer"}, + wantValue: -1, + wantUnit: "seconds", + }, + { + name: "unknown unit", + setting: pgSetting{name: "unknown_unit", setting: "10", unit: "nonexistent", vartype: "integer"}, + wantValue: 10, + wantErr: `unknown unit for runtime variable: "nonexistent"`, + }, + { + name: "value with unit suffix", + setting: pgSetting{name: "aurora_value", setting: "16MB", unit: "MB", vartype: "integer"}, + wantValue: 16777216, + wantUnit: "bytes", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotValue, gotUnit, err := tt.setting.normaliseUnit() + if gotValue != tt.wantValue { + t.Fatalf("normaliseUnit() value = %v, want %v", gotValue, tt.wantValue) + } + if gotUnit != tt.wantUnit { + t.Fatalf("normaliseUnit() unit = %q, want %q", gotUnit, tt.wantUnit) + } + if tt.wantErr == "" && err != nil { + t.Fatalf("normaliseUnit() unexpected error: %v", err) + } + if tt.wantErr != "" { + if err == nil { + t.Fatalf("normaliseUnit() expected error %q", tt.wantErr) + } + if err.Error() != tt.wantErr { + t.Fatalf("normaliseUnit() error = %q, want %q", err.Error(), tt.wantErr) + } + } + }) + } +} + +func TestPGSettingMetric(t *testing.T) { + tests := []struct { + name string + setting pgSetting + wantDesc string + wantMetric float64 + }{ + { + name: "integer seconds", + setting: pgSetting{name: "seconds_fixture_metric", setting: "5", unit: "s", vartype: "integer"}, + wantDesc: `Desc{fqName: "pg_settings_seconds_fixture_metric_seconds", help: "Server Parameter: seconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, + wantMetric: 5, + }, + { + name: "bool on", + setting: pgSetting{name: "bool_on_fixture_metric", setting: "on", vartype: "bool"}, + wantDesc: `Desc{fqName: "pg_settings_bool_on_fixture_metric", help: "Server Parameter: bool_on_fixture_metric", constLabels: {}, variableLabels: {}}`, + wantMetric: 1, + }, + { + name: "sanitized name", + setting: pgSetting{name: "rds.rds-superuser-reserved-connections", setting: "2", vartype: "integer"}, + wantDesc: `Desc{fqName: "pg_settings_rds_rds_superuser_reserved_connections", help: "Server Parameter: rds.rds-superuser-reserved-connections", constLabels: {}, variableLabels: {}}`, + wantMetric: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric, err := tt.setting.metric() + if err != nil { + t.Fatalf("metric() unexpected error: %v", err) + } + got := &dto.Metric{} + if err := metric.Write(got); err != nil { + t.Fatalf("Write() unexpected error: %v", err) + } + if metric.Desc().String() != tt.wantDesc { + t.Fatalf("metric() desc = %q, want %q", metric.Desc().String(), tt.wantDesc) + } + if got.GetGauge().GetValue() != tt.wantMetric { + t.Fatalf("metric() value = %v, want %v", got.GetGauge().GetValue(), tt.wantMetric) + } + }) + } +} + +func TestPGSettingsCollectorUpdate(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + rows := sqlmock.NewRows([]string{"name", "setting", "unit", "short_desc", "vartype"}). + AddRow("shared_buffers", "128", "8kB", "Sets the number of shared memory buffers used by the server.", "integer"). + AddRow("track_counts", "on", "", "Collects statistics on database activity.", "bool"). + AddRow("bad_setting", "not-a-number", "", "Bad setting.", "integer") + mock.ExpectQuery(sanitizeQuery(pgSettingsQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + collector := PGSettingsCollector{log: promslog.NewNopLogger()} + if err := collector.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGSettingsCollector.Update: %s", err) + } + }() + + tests := []struct { + wantDesc string + wantValue float64 + }{ + { + wantDesc: `Desc{fqName: "pg_settings_shared_buffers_bytes", help: "Server Parameter: shared_buffers [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, + wantValue: 1048576, + }, + { + wantDesc: `Desc{fqName: "pg_settings_track_counts", help: "Server Parameter: track_counts", constLabels: {}, variableLabels: {}}`, + wantValue: 1, + }, + } + + for _, tt := range tests { + metric := <-ch + got := &dto.Metric{} + if err := metric.Write(got); err != nil { + t.Fatalf("Write() unexpected error: %v", err) + } + if metric.Desc().String() != tt.wantDesc { + t.Fatalf("metric desc = %q, want %q", metric.Desc().String(), tt.wantDesc) + } + if got.GetGauge().GetValue() != tt.wantValue { + t.Fatalf("metric value = %v, want %v", got.GetGauge().GetValue(), tt.wantValue) + } + } + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted after bad setting was skipped: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_activity.go b/collector/pg_stat_activity.go new file mode 100644 index 000000000..517fd25f2 --- /dev/null +++ b/collector/pg_stat_activity.go @@ -0,0 +1,181 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const statActivitySubsystem = "stat_activity" + +func init() { + registerCollector(statActivitySubsystem, defaultEnabled, NewPGStatActivityCollector) +} + +type PGStatActivityCollector struct{} + +func NewPGStatActivityCollector(collectorConfig) (Collector, error) { + return &PGStatActivityCollector{}, nil +} + +var ( + statActivityLabels = []string{ + "datname", + "state", + "usename", + "application_name", + "backend_type", + "wait_event_type", + "wait_event", + } + statActivityCountDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statActivitySubsystem, "count"), + "number of connections in this state", + statActivityLabels, + prometheus.Labels{}, + ) + statActivityMaxTxDurationDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statActivitySubsystem, "max_tx_duration"), + "max duration in seconds any active transaction has been running", + statActivityLabels, + prometheus.Labels{}, + ) + + statActivityQuery = ` + SELECT + pg_database.datname, + tmp.state, + tmp2.usename, + tmp2.application_name, + tmp2.backend_type, + tmp2.wait_event_type, + tmp2.wait_event, + COALESCE(count,0) as count, + COALESCE(max_tx_duration,0) as max_tx_duration + FROM + ( + VALUES ('active'), + ('idle'), + ('idle in transaction'), + ('idle in transaction (aborted)'), + ('fastpath function call'), + ('disabled') + ) AS tmp(state) CROSS JOIN pg_database + LEFT JOIN + ( + SELECT + datname, + state, + usename, + application_name, + backend_type, + wait_event_type, + wait_event, + count(*) AS count, + MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration + FROM pg_stat_activity + WHERE pid <> pg_backend_pid() + GROUP BY datname,state,usename,application_name,backend_type,wait_event_type,wait_event) AS tmp2 + ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname + ` + + statActivityQueryBefore92 = ` + SELECT + datname, + 'unknown' AS state, + usename, + application_name, + '' AS backend_type, + '' AS wait_event_type, + '' AS wait_event, + COALESCE(count(*),0) AS count, + COALESCE(MAX(EXTRACT(EPOCH FROM now() - xact_start))::float,0) AS max_tx_duration + FROM pg_stat_activity + WHERE procpid <> pg_backend_pid() + GROUP BY datname,usename,application_name + ` +) + +func (PGStatActivityCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + query := statActivityQuery + if instance.version.LT(semver.MustParse("9.2.0")) { + query = statActivityQueryBefore92 + } + + db := instance.getDB() + rows, err := db.QueryContext(ctx, query) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var datname, state, usename, applicationName, backendType, waitEventType, waitEvent sql.NullString + var count, maxTxDuration sql.NullFloat64 + if err := rows.Scan( + &datname, + &state, + &usename, + &applicationName, + &backendType, + &waitEventType, + &waitEvent, + &count, + &maxTxDuration, + ); err != nil { + return err + } + + labels := []string{ + stringValue(datname), + stringValue(state), + stringValue(usename), + stringValue(applicationName), + stringValue(backendType), + stringValue(waitEventType), + stringValue(waitEvent), + } + + if count.Valid { + ch <- prometheus.MustNewConstMetric( + statActivityCountDesc, + prometheus.GaugeValue, + count.Float64, + labels..., + ) + } + + if maxTxDuration.Valid { + ch <- prometheus.MustNewConstMetric( + statActivityMaxTxDurationDesc, + prometheus.GaugeValue, + maxTxDuration.Float64, + labels..., + ) + } + } + + return rows.Err() +} + +func stringValue(s sql.NullString) string { + if s.Valid { + return s.String + } + return "" +} diff --git a/collector/pg_stat_activity_test.go b/collector/pg_stat_activity_test.go new file mode 100644 index 000000000..0525153c4 --- /dev/null +++ b/collector/pg_stat_activity_test.go @@ -0,0 +1,174 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatActivityCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{ + "datname", + "state", + "usename", + "application_name", + "backend_type", + "wait_event_type", + "wait_event", + "count", + "max_tx_duration", + }).AddRow("postgres", "active", "postgres", "psql", "client backend", "Lock", "relation", 3, 12.5) + mock.ExpectQuery(sanitizeQuery(statActivityQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatActivityCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatActivityCollector.Update: %s", err) + } + }() + + expectedLabels := labelMap{ + "datname": "postgres", + "state": "active", + "usename": "postgres", + "application_name": "psql", + "backend_type": "client backend", + "wait_event_type": "Lock", + "wait_event": "relation", + } + expected := []MetricResult{ + {labels: expectedLabels, value: 3, metricType: dto.MetricType_GAUGE}, + {labels: expectedLabels, value: 12.5, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatActivityCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{ + "datname", + "state", + "usename", + "application_name", + "backend_type", + "wait_event_type", + "wait_event", + "count", + "max_tx_duration", + }).AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statActivityQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatActivityCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatActivityCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for NULL stat_activity value: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatActivityCollectorBefore92(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("9.1.0")} + + rows := sqlmock.NewRows([]string{ + "datname", + "state", + "usename", + "application_name", + "backend_type", + "wait_event_type", + "wait_event", + "count", + "max_tx_duration", + }).AddRow("postgres", "unknown", "postgres", "psql", "", "", "", 2, 7.25) + mock.ExpectQuery(sanitizeQuery(statActivityQueryBefore92)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatActivityCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatActivityCollector.Update: %s", err) + } + }() + + expectedLabels := labelMap{ + "datname": "postgres", + "state": "unknown", + "usename": "postgres", + "application_name": "psql", + "backend_type": "", + "wait_event_type": "", + "wait_event": "", + } + expected := []MetricResult{ + {labels: expectedLabels, value: 2, metricType: dto.MetricType_GAUGE}, + {labels: expectedLabels, value: 7.25, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_archiver.go b/collector/pg_stat_archiver.go new file mode 100644 index 000000000..abc315741 --- /dev/null +++ b/collector/pg_stat_archiver.go @@ -0,0 +1,103 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const statArchiverSubsystem = "stat_archiver" + +func init() { + registerCollector(statArchiverSubsystem, defaultEnabled, NewPGStatArchiverCollector) +} + +type PGStatArchiverCollector struct{} + +func NewPGStatArchiverCollector(collectorConfig) (Collector, error) { + return &PGStatArchiverCollector{}, nil +} + +var ( + statArchiverArchivedCountDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statArchiverSubsystem, "archived_count"), + "Number of WAL files that have been successfully archived", + []string{}, + prometheus.Labels{}, + ) + statArchiverFailedCountDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statArchiverSubsystem, "failed_count"), + "Number of failed attempts for archiving WAL files", + []string{}, + prometheus.Labels{}, + ) + statArchiverLastArchiveAgeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statArchiverSubsystem, "last_archive_age"), + "Time in seconds since last WAL segment was successfully archived", + []string{}, + prometheus.Labels{}, + ) + + statArchiverQuery = `SELECT + archived_count, + failed_count, + extract(epoch from now() - last_archived_time) AS last_archive_age + FROM pg_stat_archiver;` +) + +func (PGStatArchiverCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + if instance.version.LT(semver.MustParse("9.4.0")) { + return nil + } + + db := instance.getDB() + row := db.QueryRowContext(ctx, statArchiverQuery) + + var archivedCount, failedCount sql.NullInt64 + var lastArchiveAge sql.NullFloat64 + + if err := row.Scan(&archivedCount, &failedCount, &lastArchiveAge); err != nil { + return err + } + + if archivedCount.Valid { + ch <- prometheus.MustNewConstMetric( + statArchiverArchivedCountDesc, + prometheus.CounterValue, + float64(archivedCount.Int64), + ) + } + + if failedCount.Valid { + ch <- prometheus.MustNewConstMetric( + statArchiverFailedCountDesc, + prometheus.CounterValue, + float64(failedCount.Int64), + ) + } + + if lastArchiveAge.Valid { + ch <- prometheus.MustNewConstMetric( + statArchiverLastArchiveAgeDesc, + prometheus.GaugeValue, + lastArchiveAge.Float64, + ) + } + + return nil +} diff --git a/collector/pg_stat_archiver_test.go b/collector/pg_stat_archiver_test.go new file mode 100644 index 000000000..f1b141d91 --- /dev/null +++ b/collector/pg_stat_archiver_test.go @@ -0,0 +1,119 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatArchiverCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{"archived_count", "failed_count", "last_archive_age"}). + AddRow(42, 7, 13.5) + mock.ExpectQuery(sanitizeQuery(statArchiverQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatArchiverCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatArchiverCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 42}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 7}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 13.5}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatArchiverCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{"archived_count", "failed_count", "last_archive_age"}). + AddRow(nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statArchiverQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatArchiverCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatArchiverCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for NULL stat_archiver value: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatArchiverCollectorBeforePostgres94(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("9.3.0")} + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatArchiverCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatArchiverCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for PostgreSQL 9.3: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_bgwriter.go b/collector/pg_stat_bgwriter.go index ec446d58c..6e3bd09cb 100644 --- a/collector/pg_stat_bgwriter.go +++ b/collector/pg_stat_bgwriter.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -101,7 +102,7 @@ var ( prometheus.Labels{}, ) - statBGWriterQuery = `SELECT + statBGWriterQueryBefore17 = `SELECT checkpoints_timed ,checkpoints_req ,checkpoint_write_time @@ -114,121 +115,177 @@ var ( ,buffers_alloc ,stats_reset FROM pg_stat_bgwriter;` + + statBGWriterQueryAfter17 = `SELECT + buffers_clean + ,maxwritten_clean + ,buffers_alloc + ,stats_reset + FROM pg_stat_bgwriter;` ) func (PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - db := instance.getDB() - row := db.QueryRowContext(ctx, - statBGWriterQuery) + if instance.version.GE(semver.MustParse("17.0.0")) { + db := instance.getDB() + row := db.QueryRowContext(ctx, statBGWriterQueryAfter17) - var cpt, cpr, bcp, bc, mwc, bb, bbf, ba sql.NullInt64 - var cpwt, cpst sql.NullFloat64 - var sr sql.NullTime + var bc, mwc, ba sql.NullInt64 + var sr sql.NullTime - err := row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr) - if err != nil { - return err - } + err := row.Scan(&bc, &mwc, &ba, &sr) + if err != nil { + return err + } - cptMetric := 0.0 - if cpt.Valid { - cptMetric = float64(cpt.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsTimedDesc, - prometheus.CounterValue, - cptMetric, - ) - cprMetric := 0.0 - if cpr.Valid { - cprMetric = float64(cpr.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsReqDesc, - prometheus.CounterValue, - cprMetric, - ) - cpwtMetric := 0.0 - if cpwt.Valid { - cpwtMetric = float64(cpwt.Float64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsReqTimeDesc, - prometheus.CounterValue, - cpwtMetric, - ) - cpstMetric := 0.0 - if cpst.Valid { - cpstMetric = float64(cpst.Float64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsSyncTimeDesc, - prometheus.CounterValue, - cpstMetric, - ) - bcpMetric := 0.0 - if bcp.Valid { - bcpMetric = float64(bcp.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersCheckpointDesc, - prometheus.CounterValue, - bcpMetric, - ) - bcMetric := 0.0 - if bc.Valid { - bcMetric = float64(bc.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersCleanDesc, - prometheus.CounterValue, - bcMetric, - ) - mwcMetric := 0.0 - if mwc.Valid { - mwcMetric = float64(mwc.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterMaxwrittenCleanDesc, - prometheus.CounterValue, - mwcMetric, - ) - bbMetric := 0.0 - if bb.Valid { - bbMetric = float64(bb.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersBackendDesc, - prometheus.CounterValue, - bbMetric, - ) - bbfMetric := 0.0 - if bbf.Valid { - bbfMetric = float64(bbf.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersBackendFsyncDesc, - prometheus.CounterValue, - bbfMetric, - ) - baMetric := 0.0 - if ba.Valid { - baMetric = float64(ba.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersAllocDesc, - prometheus.CounterValue, - baMetric, - ) - srMetric := 0.0 - if sr.Valid { - srMetric = float64(sr.Time.Unix()) + bcMetric := 0.0 + if bc.Valid { + bcMetric = float64(bc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCleanDesc, + prometheus.CounterValue, + bcMetric, + ) + mwcMetric := 0.0 + if mwc.Valid { + mwcMetric = float64(mwc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterMaxwrittenCleanDesc, + prometheus.CounterValue, + mwcMetric, + ) + baMetric := 0.0 + if ba.Valid { + baMetric = float64(ba.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersAllocDesc, + prometheus.CounterValue, + baMetric, + ) + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) + } else { + db := instance.getDB() + row := db.QueryRowContext(ctx, statBGWriterQueryBefore17) + + var cpt, cpr, bcp, bc, mwc, bb, bbf, ba sql.NullInt64 + var cpwt, cpst sql.NullFloat64 + var sr sql.NullTime + + err := row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr) + if err != nil { + return err + } + + cptMetric := 0.0 + if cpt.Valid { + cptMetric = float64(cpt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsTimedDesc, + prometheus.CounterValue, + cptMetric, + ) + cprMetric := 0.0 + if cpr.Valid { + cprMetric = float64(cpr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsReqDesc, + prometheus.CounterValue, + cprMetric, + ) + cpwtMetric := 0.0 + if cpwt.Valid { + cpwtMetric = float64(cpwt.Float64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsReqTimeDesc, + prometheus.CounterValue, + cpwtMetric, + ) + cpstMetric := 0.0 + if cpst.Valid { + cpstMetric = float64(cpst.Float64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsSyncTimeDesc, + prometheus.CounterValue, + cpstMetric, + ) + bcpMetric := 0.0 + if bcp.Valid { + bcpMetric = float64(bcp.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCheckpointDesc, + prometheus.CounterValue, + bcpMetric, + ) + bcMetric := 0.0 + if bc.Valid { + bcMetric = float64(bc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCleanDesc, + prometheus.CounterValue, + bcMetric, + ) + mwcMetric := 0.0 + if mwc.Valid { + mwcMetric = float64(mwc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterMaxwrittenCleanDesc, + prometheus.CounterValue, + mwcMetric, + ) + bbMetric := 0.0 + if bb.Valid { + bbMetric = float64(bb.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersBackendDesc, + prometheus.CounterValue, + bbMetric, + ) + bbfMetric := 0.0 + if bbf.Valid { + bbfMetric = float64(bbf.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersBackendFsyncDesc, + prometheus.CounterValue, + bbfMetric, + ) + baMetric := 0.0 + if ba.Valid { + baMetric = float64(ba.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersAllocDesc, + prometheus.CounterValue, + baMetric, + ) + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) } - ch <- prometheus.MustNewConstMetric( - statBGWriterStatsResetDesc, - prometheus.CounterValue, - srMetric, - ) return nil } diff --git a/collector/pg_stat_bgwriter_test.go b/collector/pg_stat_bgwriter_test.go index 1c2cf98de..6fde2fb6a 100644 --- a/collector/pg_stat_bgwriter_test.go +++ b/collector/pg_stat_bgwriter_test.go @@ -52,7 +52,7 @@ func TestPGStatBGWriterCollector(t *testing.T) { rows := sqlmock.NewRows(columns). AddRow(354, 4945, 289097744, 1242257, int64(3275602074), 89320867, 450139, 2034563757, 0, int64(2725688749), srT) - mock.ExpectQuery(sanitizeQuery(statBGWriterQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statBGWriterQueryBefore17)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -113,7 +113,7 @@ func TestPGStatBGWriterCollectorNullValues(t *testing.T) { rows := sqlmock.NewRows(columns). AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil) - mock.ExpectQuery(sanitizeQuery(statBGWriterQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statBGWriterQueryBefore17)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { diff --git a/collector/pg_stat_checkpointer.go b/collector/pg_stat_checkpointer.go new file mode 100644 index 000000000..31e9c5d62 --- /dev/null +++ b/collector/pg_stat_checkpointer.go @@ -0,0 +1,231 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const statCheckpointerSubsystem = "stat_checkpointer" + +func init() { + // WARNING: + // Disabled by default because this set of metrics is only available from Postgres 17 + registerCollector(statCheckpointerSubsystem, defaultDisabled, NewPGStatCheckpointerCollector) +} + +type PGStatCheckpointerCollector struct { + log *slog.Logger +} + +func NewPGStatCheckpointerCollector(config collectorConfig) (Collector, error) { + return &PGStatCheckpointerCollector{log: config.logger}, nil +} + +var ( + statCheckpointerNumTimedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "num_timed_total"), + "Number of scheduled checkpoints due to timeout", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerNumRequestedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "num_requested_total"), + "Number of requested checkpoints that have been performed", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsTimedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_timed_total"), + "Number of scheduled restartpoints due to timeout or after a failed attempt to perform it", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsReqDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_req_total"), + "Number of requested restartpoints", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsDoneDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_done_total"), + "Number of restartpoints that have been performed", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerWriteTimeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "write_time_total"), + "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are written to disk, in milliseconds", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerSyncTimeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "sync_time_total"), + "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are synchronized to disk, in milliseconds", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerBuffersWrittenDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "buffers_written_total"), + "Number of buffers written during checkpoints and restartpoints", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerStatsResetDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "stats_reset_total"), + "Time at which these statistics were last reset", + []string{}, + prometheus.Labels{}, + ) + + statCheckpointerQuery = `SELECT + num_timed + ,num_requested + ,restartpoints_timed + ,restartpoints_req + ,restartpoints_done + ,write_time + ,sync_time + ,buffers_written + ,stats_reset + FROM pg_stat_checkpointer;` +) + +func (c PGStatCheckpointerCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + + before17 := instance.version.LT(semver.MustParse("17.0.0")) + if before17 { + c.log.Warn("pg_stat_checkpointer collector is not available on PostgreSQL < 17.0.0, skipping") + return nil + } + + row := db.QueryRowContext(ctx, statCheckpointerQuery) + + // num_timed = nt = bigint + // num_requested = nr = bigint + // restartpoints_timed = rpt = bigint + // restartpoints_req = rpr = bigint + // restartpoints_done = rpd = bigint + // write_time = wt = double precision + // sync_time = st = double precision + // buffers_written = bw = bigint + // stats_reset = sr = timestamp + + var nt, nr, rpt, rpr, rpd, bw sql.NullInt64 + var wt, st sql.NullFloat64 + var sr sql.NullTime + + err := row.Scan(&nt, &nr, &rpt, &rpr, &rpd, &wt, &st, &bw, &sr) + if err != nil { + return err + } + + ntMetric := 0.0 + if nt.Valid { + ntMetric = float64(nt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerNumTimedDesc, + prometheus.CounterValue, + ntMetric, + ) + + nrMetric := 0.0 + if nr.Valid { + nrMetric = float64(nr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerNumRequestedDesc, + prometheus.CounterValue, + nrMetric, + ) + + rptMetric := 0.0 + if rpt.Valid { + rptMetric = float64(rpt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsTimedDesc, + prometheus.CounterValue, + rptMetric, + ) + + rprMetric := 0.0 + if rpr.Valid { + rprMetric = float64(rpr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsReqDesc, + prometheus.CounterValue, + rprMetric, + ) + + rpdMetric := 0.0 + if rpd.Valid { + rpdMetric = float64(rpd.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsDoneDesc, + prometheus.CounterValue, + rpdMetric, + ) + + wtMetric := 0.0 + if wt.Valid { + wtMetric = float64(wt.Float64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerWriteTimeDesc, + prometheus.CounterValue, + wtMetric, + ) + + stMetric := 0.0 + if st.Valid { + stMetric = float64(st.Float64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerSyncTimeDesc, + prometheus.CounterValue, + stMetric, + ) + + bwMetric := 0.0 + if bw.Valid { + bwMetric = float64(bw.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerBuffersWrittenDesc, + prometheus.CounterValue, + bwMetric, + ) + + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) + + return nil +} diff --git a/collector/pg_stat_checkpointer_test.go b/collector/pg_stat_checkpointer_test.go new file mode 100644 index 000000000..9a8dd7f21 --- /dev/null +++ b/collector/pg_stat_checkpointer_test.go @@ -0,0 +1,144 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + "time" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatCheckpointerCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{ + "num_timed", + "num_requested", + "restartpoints_timed", + "restartpoints_req", + "restartpoints_done", + "write_time", + "sync_time", + "buffers_written", + "stats_reset"} + + srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") + if err != nil { + t.Fatalf("Error parsing time: %s", err) + } + + rows := sqlmock.NewRows(columns). + AddRow(354, 4945, 289097744, 1242257, int64(3275602074), 89320867, 450139, 2034563757, srT) + mock.ExpectQuery(sanitizeQuery(statCheckpointerQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatCheckpointerCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatCheckpointerCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 354}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 4945}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 289097744}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1242257}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 3275602074}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 89320867}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 450139}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 2034563757}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatCheckpointerCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{ + "num_timed", + "num_requested", + "restartpoints_timed", + "restartpoints_req", + "restartpoints_done", + "write_time", + "sync_time", + "buffers_written", + "stats_reset"} + + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statCheckpointerQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatCheckpointerCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatCheckpointerCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_progress_vacuum.go b/collector/pg_stat_progress_vacuum.go new file mode 100644 index 000000000..f8083a49f --- /dev/null +++ b/collector/pg_stat_progress_vacuum.go @@ -0,0 +1,222 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +const progressVacuumSubsystem = "stat_progress_vacuum" + +func init() { + registerCollector(progressVacuumSubsystem, defaultEnabled, NewPGStatProgressVacuumCollector) +} + +type PGStatProgressVacuumCollector struct { + log *slog.Logger +} + +func NewPGStatProgressVacuumCollector(config collectorConfig) (Collector, error) { + return &PGStatProgressVacuumCollector{log: config.logger}, nil +} + +var vacuumPhases = []string{ + "initializing", + "scanning heap", + "vacuuming indexes", + "vacuuming heap", + "cleaning up indexes", + "truncating heap", + "performing final cleanup", +} + +var ( + statProgressVacuumPhase = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "phase"), + "Current vacuum phase (1 = active, 0 = inactive). Label 'phase' is human-readable.", + []string{"datname", "relname", "phase"}, + nil, + ) + + statProgressVacuumHeapBlksTotal = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks"), + "Total number of heap blocks in the table being vacuumed.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumHeapBlksScanned = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks_scanned"), + "Number of heap blocks scanned so far.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumHeapBlksVacuumed = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks_vacuumed"), + "Number of heap blocks vacuumed so far.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumIndexVacuumCount = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "index_vacuums"), + "Number of completed index vacuum cycles.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumMaxDeadTuples = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "max_dead_tuples"), + "Maximum number of dead tuples that can be stored before cleanup is performed.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumNumDeadTuples = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "num_dead_tuples"), + "Current number of dead tuples found so far.", + []string{"datname", "relname"}, + nil, + ) + + // This is the view definition of pg_stat_progress_vacuum, albeit without the conversion + // of "phase" to a human-readable string. We will prefer the numeric representation. + statProgressVacuumQuery = `SELECT + d.datname, + s.relid::regclass::text AS relname, + s.param1 AS phase, + s.param2 AS heap_blks_total, + s.param3 AS heap_blks_scanned, + s.param4 AS heap_blks_vacuumed, + s.param5 AS index_vacuum_count, + s.param6 AS max_dead_tuples, + s.param7 AS num_dead_tuples + FROM + pg_stat_get_progress_info('VACUUM'::text) + s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) + LEFT JOIN + pg_database d ON s.datid = d.oid` +) + +func (c *PGStatProgressVacuumCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + rows, err := db.QueryContext(ctx, + statProgressVacuumQuery) + + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var ( + datname sql.NullString + relname sql.NullString + phase sql.NullInt64 + heapBlksTotal sql.NullInt64 + heapBlksScanned sql.NullInt64 + heapBlksVacuumed sql.NullInt64 + indexVacuumCount sql.NullInt64 + maxDeadTuples sql.NullInt64 + numDeadTuples sql.NullInt64 + ) + + if err := rows.Scan( + &datname, + &relname, + &phase, + &heapBlksTotal, + &heapBlksScanned, + &heapBlksVacuumed, + &indexVacuumCount, + &maxDeadTuples, + &numDeadTuples, + ); err != nil { + return err + } + + datnameLabel := "unknown" + if datname.Valid { + datnameLabel = datname.String + } + relnameLabel := "unknown" + if relname.Valid { + relnameLabel = relname.String + } + + labels := []string{datnameLabel, relnameLabel} + + var phaseMetric *float64 + if phase.Valid { + v := float64(phase.Int64) + phaseMetric = &v + } + + for i, label := range vacuumPhases { + v := 0.0 + // Only the current phase should be 1.0. + if phaseMetric != nil && float64(i) == *phaseMetric { + v = 1.0 + } + labelsCopy := append(labels, label) + ch <- prometheus.MustNewConstMetric(statProgressVacuumPhase, prometheus.GaugeValue, v, labelsCopy...) + } + + heapTotal := 0.0 + if heapBlksTotal.Valid { + heapTotal = float64(heapBlksTotal.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksTotal, prometheus.GaugeValue, heapTotal, labels...) + + heapScanned := 0.0 + if heapBlksScanned.Valid { + heapScanned = float64(heapBlksScanned.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksScanned, prometheus.GaugeValue, heapScanned, labels...) + + heapVacuumed := 0.0 + if heapBlksVacuumed.Valid { + heapVacuumed = float64(heapBlksVacuumed.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksVacuumed, prometheus.GaugeValue, heapVacuumed, labels...) + + indexCount := 0.0 + if indexVacuumCount.Valid { + indexCount = float64(indexVacuumCount.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumIndexVacuumCount, prometheus.GaugeValue, indexCount, labels...) + + maxDead := 0.0 + if maxDeadTuples.Valid { + maxDead = float64(maxDeadTuples.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumMaxDeadTuples, prometheus.GaugeValue, maxDead, labels...) + + numDead := 0.0 + if numDeadTuples.Valid { + numDead = float64(numDeadTuples.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumNumDeadTuples, prometheus.GaugeValue, numDead, labels...) + } + + if err := rows.Err(); err != nil { + return err + } + return nil +} diff --git a/collector/pg_stat_progress_vacuum_test.go b/collector/pg_stat_progress_vacuum_test.go new file mode 100644 index 000000000..80572feb8 --- /dev/null +++ b/collector/pg_stat_progress_vacuum_test.go @@ -0,0 +1,135 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatProgressVacuumCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{ + "datname", "relname", "phase", "heap_blks_total", "heap_blks_scanned", + "heap_blks_vacuumed", "index_vacuum_count", "max_dead_tuples", "num_dead_tuples", + } + + rows := sqlmock.NewRows(columns).AddRow( + "postgres", "a_table", 3, 3000, 400, 200, 2, 500, 123) + + mock.ExpectQuery(sanitizeQuery(statProgressVacuumQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatProgressVacuumCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatProgressVacuumCollector.Update; %+v", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "initializing"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "scanning heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "vacuuming indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "vacuuming heap"}, metricType: dto.MetricType_GAUGE, value: 1}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "cleaning up indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "truncating heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "performing final cleanup"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 3000}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 400}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 200}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 2}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 500}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 123}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(m, convey.ShouldResemble, expect) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled exceptions: %+v", err) + } +} + +func TestPGStatProgressVacuumCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{ + "datname", "relname", "phase", "heap_blks_total", "heap_blks_scanned", + "heap_blks_vacuumed", "index_vacuum_count", "max_dead_tuples", "num_dead_tuples", + } + + rows := sqlmock.NewRows(columns).AddRow( + "postgres", nil, nil, nil, nil, nil, nil, nil, nil) + + mock.ExpectQuery(sanitizeQuery(statProgressVacuumQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatProgressVacuumCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatProgressVacuumCollector.Update; %+v", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "initializing"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "scanning heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "vacuuming indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "vacuuming heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "cleaning up indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "truncating heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "performing final cleanup"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled exceptions: %+v", err) + } +} diff --git a/collector/pg_stat_replication.go b/collector/pg_stat_replication.go new file mode 100644 index 000000000..816f93f60 --- /dev/null +++ b/collector/pg_stat_replication.go @@ -0,0 +1,170 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const statReplicationSubsystem = "stat_replication" + +func init() { + registerCollector(statReplicationSubsystem, defaultEnabled, NewPGStatReplicationCollector) +} + +type PGStatReplicationCollector struct{} + +func NewPGStatReplicationCollector(collectorConfig) (Collector, error) { + return &PGStatReplicationCollector{}, nil +} + +var ( + statReplicationLabels = []string{"application_name", "client_addr", "state", "slot_name"} + + statReplicationCurrentWalLSNBytesDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statReplicationSubsystem, "pg_current_wal_lsn_bytes"), + "WAL position in bytes", + statReplicationLabels, + prometheus.Labels{}, + ) + statReplicationWalLSNDiffDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statReplicationSubsystem, "pg_wal_lsn_diff"), + "Lag in bytes between master and slave", + statReplicationLabels, + prometheus.Labels{}, + ) + statReplicationXlogLocationDiffDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statReplicationSubsystem, "pg_xlog_location_diff"), + "Lag in bytes between master and slave", + statReplicationLabels, + prometheus.Labels{}, + ) + + statReplicationQuery = ` + SELECT + application_name, + client_addr::text, + state, + slot_name, + (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_lsn('0/0'))::float else pg_wal_lsn_diff(pg_current_wal_lsn(), pg_lsn('0/0'))::float end) AS pg_current_wal_lsn_bytes, + (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), replay_lsn)::float else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff + FROM pg_stat_replication + ` + + statReplicationQueryBefore10 = ` + SELECT + application_name, + client_addr::text, + state, + slot_name, + (case pg_is_in_recovery() when 't' then pg_xlog_location_diff(pg_last_xlog_receive_location(), replay_location)::float else pg_xlog_location_diff(pg_current_xlog_location(), replay_location)::float end) AS pg_xlog_location_diff + FROM pg_stat_replication + ` +) + +func (PGStatReplicationCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + switch { + case instance.version.GTE(semver.MustParse("10.0.0")): + return updateStatReplication(ctx, instance, ch) + case instance.version.GTE(semver.MustParse("9.2.0")): + return updateStatReplicationBefore10(ctx, instance, ch) + default: + return nil + } +} + +func updateStatReplication(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + rows, err := db.QueryContext(ctx, statReplicationQuery) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var applicationName, clientAddr, state, slotName sql.NullString + var currentWalLSNBytes, walLSNDiff sql.NullFloat64 + if err := rows.Scan(&applicationName, &clientAddr, &state, &slotName, ¤tWalLSNBytes, &walLSNDiff); err != nil { + return err + } + labels := statReplicationLabelValues(applicationName, clientAddr, state, slotName) + + if currentWalLSNBytes.Valid { + ch <- prometheus.MustNewConstMetric( + statReplicationCurrentWalLSNBytesDesc, + prometheus.GaugeValue, + currentWalLSNBytes.Float64, + labels..., + ) + } + if walLSNDiff.Valid { + ch <- prometheus.MustNewConstMetric( + statReplicationWalLSNDiffDesc, + prometheus.GaugeValue, + walLSNDiff.Float64, + labels..., + ) + } + } + + return rows.Err() +} + +func updateStatReplicationBefore10(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + rows, err := db.QueryContext(ctx, statReplicationQueryBefore10) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var applicationName, clientAddr, state, slotName sql.NullString + var xlogLocationDiff sql.NullFloat64 + if err := rows.Scan(&applicationName, &clientAddr, &state, &slotName, &xlogLocationDiff); err != nil { + return err + } + + if xlogLocationDiff.Valid { + ch <- prometheus.MustNewConstMetric( + statReplicationXlogLocationDiffDesc, + prometheus.GaugeValue, + xlogLocationDiff.Float64, + statReplicationLabelValues(applicationName, clientAddr, state, slotName)..., + ) + } + } + + return rows.Err() +} + +func statReplicationLabelValues(applicationName, clientAddr, state, slotName sql.NullString) []string { + return []string{ + nullStringValue(applicationName), + nullStringValue(clientAddr), + nullStringValue(state), + nullStringValue(slotName), + } +} + +func nullStringValue(s sql.NullString) string { + if s.Valid { + return s.String + } + return "" +} diff --git a/collector/pg_stat_replication_test.go b/collector/pg_stat_replication_test.go new file mode 100644 index 000000000..60917c0a9 --- /dev/null +++ b/collector/pg_stat_replication_test.go @@ -0,0 +1,221 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatReplicationCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{ + "application_name", + "client_addr", + "state", + "slot_name", + "pg_current_wal_lsn_bytes", + "pg_wal_lsn_diff", + }).AddRow("standby", "10.0.0.1", "streaming", "slot_a", 1024.0, 64.0) + mock.ExpectQuery(sanitizeQuery(statReplicationQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatReplicationCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatReplicationCollector.Update: %s", err) + } + }() + + expectedLabels := labelMap{ + "application_name": "standby", + "client_addr": "10.0.0.1", + "state": "streaming", + "slot_name": "slot_a", + } + expected := []MetricResult{ + {labels: expectedLabels, value: 1024, metricType: dto.MetricType_GAUGE}, + {labels: expectedLabels, value: 64, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatReplicationCollectorBefore10(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("9.6.0")} + + rows := sqlmock.NewRows([]string{ + "application_name", + "client_addr", + "state", + "slot_name", + "pg_xlog_location_diff", + }).AddRow("standby", "10.0.0.1", "streaming", "slot_a", 32.0) + mock.ExpectQuery(sanitizeQuery(statReplicationQueryBefore10)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatReplicationCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatReplicationCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + { + labels: labelMap{ + "application_name": "standby", + "client_addr": "10.0.0.1", + "state": "streaming", + "slot_name": "slot_a", + }, + value: 32, + metricType: dto.MetricType_GAUGE, + }, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatReplicationCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + rows := sqlmock.NewRows([]string{ + "application_name", + "client_addr", + "state", + "slot_name", + "pg_current_wal_lsn_bytes", + "pg_wal_lsn_diff", + }).AddRow(nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statReplicationQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatReplicationCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatReplicationCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for NULL stat_replication value: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatReplicationCollectorBefore10NullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("9.6.0")} + + rows := sqlmock.NewRows([]string{ + "application_name", + "client_addr", + "state", + "slot_name", + "pg_xlog_location_diff", + }).AddRow(nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statReplicationQueryBefore10)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatReplicationCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatReplicationCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for NULL stat_replication value: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatReplicationCollectorBefore92(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("9.1.0")} + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatReplicationCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatReplicationCollector.Update: %s", err) + } + }() + + if metric, ok := <-ch; ok { + t.Fatalf("unexpected metric emitted for PostgreSQL 9.1: %s", metric.Desc()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_statements.go b/collector/pg_stat_statements.go index 7926f533e..5c5c036d1 100644 --- a/collector/pg_stat_statements.go +++ b/collector/pg_stat_statements.go @@ -16,31 +16,101 @@ package collector import ( "context" "database/sql" + "fmt" "log/slog" + "strings" + "github.com/alecthomas/kingpin/v2" "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) -const statStatementsSubsystem = "stat_statements" +const ( + statStatementsSubsystem = "stat_statements" + defaultStatementLimit = "100" +) + +var ( + includeQueryFlag *bool = nil + statementLengthFlag *uint = nil + statementLimitFlag *uint = nil + excludedDatabasesFlag *string = nil + excludedUsersFlag *string = nil +) func init() { // WARNING: // Disabled by default because this set of metrics can be quite expensive on a busy server // Every unique query will cause a new timeseries to be created registerCollector(statStatementsSubsystem, defaultDisabled, NewPGStatStatementsCollector) + + includeQueryFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".include_query"), + "Enable selecting statement query together with queryId. (default: disabled)"). + Default(fmt.Sprintf("%v", defaultDisabled)). + Bool() + statementLengthFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".query_length"), + "Maximum length of the statement text."). + Default("120"). + Uint() + statementLimitFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".limit"), + "Maximum number of statements to return."). + Default(defaultStatementLimit). + Uint() + excludedDatabasesFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".exclude_databases"), + "Comma-separated list of database names to exclude. (default: none)"). + Default(""). + String() + excludedUsersFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".exclude_users"), + "Comma-separated list of user names to exclude. (default: none)"). + Default(""). + String() } type PGStatStatementsCollector struct { - log *slog.Logger + log *slog.Logger + includeQueryStatement bool + statementLength uint + statementLimit uint + excludedDatabases []string + excludedUsers []string } func NewPGStatStatementsCollector(config collectorConfig) (Collector, error) { - return &PGStatStatementsCollector{log: config.logger}, nil + var excludedDatabases []string + if *excludedDatabasesFlag != "" { + for db := range strings.SplitSeq(*excludedDatabasesFlag, ",") { + if trimmed := strings.TrimSpace(db); trimmed != "" { + excludedDatabases = append(excludedDatabases, trimmed) + } + } + } + + var excludedUsers []string + if *excludedUsersFlag != "" { + for user := range strings.SplitSeq(*excludedUsersFlag, ",") { + if trimmed := strings.TrimSpace(user); trimmed != "" { + excludedUsers = append(excludedUsers, trimmed) + } + } + } + + return &PGStatStatementsCollector{ + log: config.logger, + includeQueryStatement: *includeQueryFlag, + statementLength: *statementLengthFlag, + statementLimit: *statementLimitFlag, + excludedDatabases: excludedDatabases, + excludedUsers: excludedUsers, + }, nil } var ( - statSTatementsCallsTotal = prometheus.NewDesc( + statStatementsCallsTotal = prometheus.NewDesc( prometheus.BuildFQName(namespace, statStatementsSubsystem, "calls_total"), "Number of times executed", []string{"user", "datname", "queryid"}, @@ -71,10 +141,24 @@ var ( prometheus.Labels{}, ) + statStatementsQuery = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statStatementsSubsystem, "query_id"), + "SQL Query to queryid mapping", + []string{"queryid", "query"}, + prometheus.Labels{}, + ) +) + +const ( + pgStatStatementQuerySelect = `LEFT(pg_stat_statements.query, %d) as query,` + pgStatStatementExcludeDatabases = `AND pg_database.datname NOT IN (%s) ` + pgStatStatementExcludeUsers = `AND pg_get_userbyid(userid) NOT IN (%s) ` + pgStatStatementsQuery = `SELECT pg_get_userbyid(userid) as user, pg_database.datname, pg_stat_statements.queryid, + %s pg_stat_statements.calls as calls_total, pg_stat_statements.total_time / 1000.0 as seconds_total, pg_stat_statements.rows as rows_total, @@ -89,50 +173,101 @@ var ( WITHIN GROUP (ORDER BY total_time) FROM pg_stat_statements ) + %s %s ORDER BY seconds_total DESC - LIMIT 100;` + LIMIT %s;` - pgStatStatementsNewQuery = `SELECT + pgStatStatementsQuery_PG13 = `SELECT pg_get_userbyid(userid) as user, pg_database.datname, pg_stat_statements.queryid, + %s pg_stat_statements.calls as calls_total, pg_stat_statements.total_exec_time / 1000.0 as seconds_total, pg_stat_statements.rows as rows_total, pg_stat_statements.blk_read_time / 1000.0 as block_read_seconds_total, pg_stat_statements.blk_write_time / 1000.0 as block_write_seconds_total - FROM pg_stat_statements + FROM pg_stat_statements(%t) JOIN pg_database ON pg_database.oid = pg_stat_statements.dbid WHERE total_exec_time > ( SELECT percentile_cont(0.1) WITHIN GROUP (ORDER BY total_exec_time) - FROM pg_stat_statements + FROM pg_stat_statements(false) + ) + %s %s + ORDER BY seconds_total DESC + LIMIT %s;` + + pgStatStatementsQuery_PG17 = `SELECT + pg_get_userbyid(userid) as user, + pg_database.datname, + pg_stat_statements.queryid, + %s + pg_stat_statements.calls as calls_total, + pg_stat_statements.total_exec_time / 1000.0 as seconds_total, + pg_stat_statements.rows as rows_total, + pg_stat_statements.shared_blk_read_time / 1000.0 as block_read_seconds_total, + pg_stat_statements.shared_blk_write_time / 1000.0 as block_write_seconds_total + FROM pg_stat_statements(%t) + JOIN pg_database + ON pg_database.oid = pg_stat_statements.dbid + WHERE + total_exec_time > ( + SELECT percentile_cont(0.1) + WITHIN GROUP (ORDER BY total_exec_time) + FROM pg_stat_statements(false) ) + %s %s ORDER BY seconds_total DESC - LIMIT 100;` + LIMIT %s;` ) -func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - query := pgStatStatementsQuery - if instance.version.GE(semver.MustParse("13.0.0")) { - query = pgStatStatementsNewQuery +func (c PGStatStatementsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + querySelect := "" + if c.includeQueryStatement { + querySelect = fmt.Sprintf(pgStatStatementQuerySelect, c.statementLength) + } + databaseFilter := c.buildExclusionClause(c.excludedDatabases, pgStatStatementExcludeDatabases) + userFilter := c.buildExclusionClause(c.excludedUsers, pgStatStatementExcludeUsers) + statementLimit := defaultStatementLimit + if c.statementLimit > 0 { + statementLimit = fmt.Sprintf("%d", c.statementLimit) + } + + var query string + switch { + case instance.version.GE(semver.MustParse("17.0.0")): + query = fmt.Sprintf(pgStatStatementsQuery_PG17, querySelect, c.includeQueryStatement, databaseFilter, userFilter, statementLimit) + case instance.version.GE(semver.MustParse("13.0.0")): + query = fmt.Sprintf(pgStatStatementsQuery_PG13, querySelect, c.includeQueryStatement, databaseFilter, userFilter, statementLimit) + default: + query = fmt.Sprintf(pgStatStatementsQuery, querySelect, databaseFilter, userFilter, statementLimit) } db := instance.getDB() rows, err := db.QueryContext(ctx, query) + presentQueryIds := make(map[string]struct{}) + + seen := make(map[string]struct{}) // to track duplicates by (user, datname, queryid) + if err != nil { return err } defer rows.Close() for rows.Next() { - var user, datname, queryid sql.NullString + var user, datname, queryid, statement sql.NullString var callsTotal, rowsTotal sql.NullInt64 var secondsTotal, blockReadSecondsTotal, blockWriteSecondsTotal sql.NullFloat64 - - if err := rows.Scan(&user, &datname, &queryid, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal); err != nil { + var columns []any + if c.includeQueryStatement { + columns = []any{&user, &datname, &queryid, &statement, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal} + } else { + columns = []any{&user, &datname, &queryid, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal} + } + if err := rows.Scan(columns...); err != nil { return err } @@ -149,12 +284,20 @@ func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, queryidLabel = queryid.String } + key := fmt.Sprintf("%s|%s|%s", userLabel, datnameLabel, queryidLabel) + _, ok := seen[key] + if ok { + c.log.Warn("Duplicate found", "user", userLabel, "datname", datnameLabel, "queryid", queryidLabel) + continue + } + seen[key] = struct{}{} + callsTotalMetric := 0.0 if callsTotal.Valid { callsTotalMetric = float64(callsTotal.Int64) } ch <- prometheus.MustNewConstMetric( - statSTatementsCallsTotal, + statStatementsCallsTotal, prometheus.CounterValue, callsTotalMetric, userLabel, datnameLabel, queryidLabel, @@ -203,9 +346,41 @@ func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, blockWriteSecondsTotalMetric, userLabel, datnameLabel, queryidLabel, ) + + if c.includeQueryStatement { + _, ok := presentQueryIds[queryidLabel] + if !ok { + presentQueryIds[queryidLabel] = struct{}{} + + queryLabel := "unknown" + if statement.Valid { + queryLabel = statement.String + } + + ch <- prometheus.MustNewConstMetric( + statStatementsQuery, + prometheus.CounterValue, + 1, + queryidLabel, queryLabel, + ) + } + } } if err := rows.Err(); err != nil { return err } return nil } + +func (c PGStatStatementsCollector) buildExclusionClause(identifiers []string, clauseTemplate string) string { + if len(identifiers) == 0 { + return "" + } + + escaped := make([]string, 0, len(identifiers)) + for _, identifier := range identifiers { + escaped = append(escaped, fmt.Sprintf("'%s'", strings.ReplaceAll(identifier, "'", "''"))) + } + + return fmt.Sprintf(clauseTemplate, strings.Join(escaped, ", ")) +} diff --git a/collector/pg_stat_statements_test.go b/collector/pg_stat_statements_test.go index 08aba34c2..00171c83c 100644 --- a/collector/pg_stat_statements_test.go +++ b/collector/pg_stat_statements_test.go @@ -14,6 +14,7 @@ package collector import ( "context" + "fmt" "testing" "github.com/DATA-DOG/go-sqlmock" @@ -23,7 +24,7 @@ import ( "github.com/smartystreets/goconvey/convey" ) -func TestPGStateStatementsCollector(t *testing.T) { +func TestPGStatStatementsCollector(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -35,7 +36,7 @@ func TestPGStateStatementsCollector(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -66,7 +67,95 @@ func TestPGStateStatementsCollector(t *testing.T) { } } -func TestPGStateStatementsCollectorNull(t *testing.T) { +func TestPGStatStatementsCollectorWithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 100) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, fmt.Sprintf(pgStatStatementQuerySelect, 100), "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 100} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 100) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, fmt.Sprintf(pgStatStatementQuerySelect, 100), "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 100, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorNull(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -78,7 +167,7 @@ func TestPGStateStatementsCollectorNull(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow(nil, nil, nil, nil, nil, nil, nil, nil) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsNewQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -109,7 +198,95 @@ func TestPGStateStatementsCollectorNull(t *testing.T) { } } -func TestPGStateStatementsCollectorNewPG(t *testing.T) { +func TestPGStatStatementsCollectorNullWithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 200) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 200), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 200} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"queryid": "unknown", "query": "unknown"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorNullWithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 200) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 200), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 200, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"queryid": "unknown", "query": "unknown"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG13(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -121,7 +298,7 @@ func TestPGStateStatementsCollectorNewPG(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsNewQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -151,3 +328,463 @@ func TestPGStateStatementsCollectorNewPG(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPGStatStatementsCollector_PG13_WithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG13_WithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17_WithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17_WithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabases(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + excludedDatabases := []string{"rdsadmin", "cloudsqladmin"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + // Expected query should include database exclusion filters + expectedFilter := " AND pg_database.datname NOT IN ('rdsadmin', 'cloudsqladmin')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", expectedFilter, "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded databases", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabasesSQLEscaping(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.0.0")} + + // exclude a database name with a single quote (SQL injection test) + excludedDatabases := []string{"test'db"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_database.datname NOT IN ('test''db')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, expectedFilter, "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with SQL escaping", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedUsers(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + excludedUsers := []string{"monitoring", "readonly"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_get_userbyid(userid) NOT IN ('monitoring', 'readonly')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", "", expectedFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedUsersSQLEscaping(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.0.0")} + + // exclude a user name with a single quote (SQL injection test) + excludedUsers := []string{"test'user"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_get_userbyid(userid) NOT IN ('test''user')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", expectedFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with SQL escaping for users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabasesAndUsers(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + // exclude both databases and users + excludedDatabases := []string{"rdsadmin", "cloudsqladmin"} + excludedUsers := []string{"monitoring", "readonly"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedDbFilter := " AND pg_database.datname NOT IN ('rdsadmin', 'cloudsqladmin')" + expectedUserFilter := " AND pg_get_userbyid(userid) NOT IN ('monitoring', 'readonly')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, "", false, expectedDbFilter, expectedUserFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases, excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded databases and users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_user_tables.go b/collector/pg_stat_user_tables.go index 254d76a66..ad8bcace7 100644 --- a/collector/pg_stat_user_tables.go +++ b/collector/pg_stat_user_tables.go @@ -150,9 +150,15 @@ var ( []string{"datname", "schemaname", "relname"}, prometheus.Labels{}, ) - statUserTablesTotalSize = prometheus.NewDesc( - prometheus.BuildFQName(namespace, userTableSubsystem, "size_bytes"), - "Total disk space used by this table, in bytes, including all indexes and TOAST data", + statUserIndexSize = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "index_size_bytes"), + "Total disk space used by this index, in bytes", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + statUserTableSize = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "table_size_bytes"), + "Total disk space used by this table, in bytes", []string{"datname", "schemaname", "relname"}, prometheus.Labels{}, ) @@ -180,7 +186,8 @@ var ( autovacuum_count, analyze_count, autoanalyze_count, - pg_total_relation_size(relid) as total_size + pg_indexes_size(relid) as indexes_size, + pg_table_size(relid) as table_size FROM pg_stat_user_tables` ) @@ -198,10 +205,10 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan for rows.Next() { var datname, schemaname, relname sql.NullString var seqScan, seqTupRead, idxScan, idxTupFetch, nTupIns, nTupUpd, nTupDel, nTupHotUpd, nLiveTup, nDeadTup, - nModSinceAnalyze, vacuumCount, autovacuumCount, analyzeCount, autoanalyzeCount, totalSize sql.NullInt64 + nModSinceAnalyze, vacuumCount, autovacuumCount, analyzeCount, autoanalyzeCount, indexSize, tableSize sql.NullInt64 var lastVacuum, lastAutovacuum, lastAnalyze, lastAutoanalyze sql.NullTime - if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &totalSize); err != nil { + if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &indexSize, &tableSize); err != nil { return err } @@ -427,14 +434,25 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan datnameLabel, schemanameLabel, relnameLabel, ) - totalSizeMetric := 0.0 - if totalSize.Valid { - totalSizeMetric = float64(totalSize.Int64) + indexSizeMetric := 0.0 + if indexSize.Valid { + indexSizeMetric = float64(indexSize.Int64) + } + ch <- prometheus.MustNewConstMetric( + statUserIndexSize, + prometheus.GaugeValue, + indexSizeMetric, + datnameLabel, schemanameLabel, relnameLabel, + ) + + tableSizeMetric := 0.0 + if tableSize.Valid { + tableSizeMetric = float64(tableSize.Int64) } ch <- prometheus.MustNewConstMetric( - statUserTablesTotalSize, + statUserTableSize, prometheus.GaugeValue, - totalSizeMetric, + tableSizeMetric, datnameLabel, schemanameLabel, relnameLabel, ) } diff --git a/collector/pg_stat_user_tables_test.go b/collector/pg_stat_user_tables_test.go index 5e82335c3..4649bdbc5 100644 --- a/collector/pg_stat_user_tables_test.go +++ b/collector/pg_stat_user_tables_test.go @@ -72,7 +72,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "index_size", + "table_size"} rows := sqlmock.NewRows(columns). AddRow("postgres", "public", @@ -96,7 +97,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { 12, 13, 14, - 15) + 15, + 16) mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -128,6 +130,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 12}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 13}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 14}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 15}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 16}, } convey.Convey("Metrics comparison", t, func() { @@ -173,7 +177,8 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "index_size", + "table_size"} rows := sqlmock.NewRows(columns). AddRow("postgres", nil, @@ -197,6 +202,7 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { nil, nil, nil, + nil, nil) mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -229,6 +235,8 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, } convey.Convey("Metrics comparison", t, func() { diff --git a/collector/pg_stat_walreceiver.go b/collector/pg_stat_walreceiver.go index ea0db4558..e6cc13652 100644 --- a/collector/pg_stat_walreceiver.go +++ b/collector/pg_stat_walreceiver.go @@ -108,7 +108,7 @@ var ( status, (receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn, %s -receive_start_tli, + receive_start_tli, received_tli, extract(epoch from last_msg_send_time) as last_msg_send_time, extract(epoch from last_msg_receipt_time) as last_msg_receipt_time, @@ -147,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64 if hasFlushedLSN { - if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil { + if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil { return err } } else { @@ -209,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta float64(receiveStartLsn.Int64), labels...) - ch <- prometheus.MustNewConstMetric( - statWalReceiverReceiveStartTli, - prometheus.GaugeValue, - float64(receiveStartTli.Int64), - labels...) - if hasFlushedLSN { ch <- prometheus.MustNewConstMetric( statWalReceiverFlushedLSN, @@ -223,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta labels...) } + ch <- prometheus.MustNewConstMetric( + statWalReceiverReceiveStartTli, + prometheus.GaugeValue, + float64(receiveStartTli.Int64), + labels...) + ch <- prometheus.MustNewConstMetric( statWalReceiverReceivedTli, prometheus.GaugeValue, diff --git a/collector/pg_stat_walreceiver_test.go b/collector/pg_stat_walreceiver_test.go index c81c9ecae..13351255d 100644 --- a/collector/pg_stat_walreceiver_test.go +++ b/collector/pg_stat_walreceiver_test.go @@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { "slot_name", "status", "receive_start_lsn", - "receive_start_tli", "flushed_lsn", + "receive_start_tli", "received_tli", "last_msg_send_time", "last_msg_receipt_time", @@ -65,8 +65,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { "bar", "stopping", int64(1200668684563608), - 1687321285, int64(1200668684563609), + 1687321285, 1687321280, 1687321275, 1687321276, @@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { }() expected := []MetricResult{ {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER}, - {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER}, + {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER}, diff --git a/collector/pg_wal.go b/collector/pg_wal.go index afa8fcef6..b76b7118b 100644 --- a/collector/pg_wal.go +++ b/collector/pg_wal.go @@ -15,6 +15,7 @@ package collector import ( "context" + "database/sql" "github.com/prometheus/client_golang/prometheus" ) @@ -67,7 +68,7 @@ func (c PGWALCollector) Update(ctx context.Context, instance *instance, ch chan< ) var segments uint64 - var size uint64 + var size sql.NullInt64 err := row.Scan(&segments, &size) if err != nil { return err @@ -76,9 +77,11 @@ func (c PGWALCollector) Update(ctx context.Context, instance *instance, ch chan< pgWALSegments, prometheus.GaugeValue, float64(segments), ) - ch <- prometheus.MustNewConstMetric( - pgWALSize, - prometheus.GaugeValue, float64(size), - ) + if size.Valid { + ch <- prometheus.MustNewConstMetric( + pgWALSize, + prometheus.GaugeValue, float64(size.Int64), + ) + } return nil } diff --git a/collector/pg_wal_test.go b/collector/pg_wal_test.go index 745105a13..8109953ef 100644 --- a/collector/pg_wal_test.go +++ b/collector/pg_wal_test.go @@ -61,3 +61,42 @@ func TestPgWALCollector(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPgWALCollectorZeroSegments(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{"segments", "size"} + rows := sqlmock.NewRows(columns). + AddRow(0, nil) + mock.ExpectQuery(sanitizeQuery(pgWALQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGWALCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGWALCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + } + + convey.Convey("Metrics comparison (zero segments)", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/probe.go b/collector/probe.go index 54a06261f..e40d6fee1 100644 --- a/collector/probe.go +++ b/collector/probe.go @@ -76,11 +76,11 @@ func (pc *ProbeCollector) Describe(ch chan<- *prometheus.Desc) { func (pc *ProbeCollector) Collect(ch chan<- prometheus.Metric) { // Set up the database connection for the collector. err := pc.instance.setup() + defer pc.instance.Close() if err != nil { pc.logger.Error("Error opening connection to database", "err", err) return } - defer pc.instance.Close() wg := sync.WaitGroup{} wg.Add(len(pc.collectors)) diff --git a/config/config.go b/config/config.go index 7cdc08f7b..d2588d327 100644 --- a/config/config.go +++ b/config/config.go @@ -14,30 +14,17 @@ package config import ( + "errors" "fmt" + "io" "log/slog" "os" "sync" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "gopkg.in/yaml.v3" ) -var ( - configReloadSuccess = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "postgres_exporter", - Name: "config_last_reload_successful", - Help: "Postgres exporter config loaded successfully.", - }) - - configReloadSeconds = promauto.NewGauge(prometheus.GaugeOpts{ - Namespace: "postgres_exporter", - Name: "config_last_reload_success_timestamp_seconds", - Help: "Timestamp of the last successful configuration reload.", - }) -) - type Config struct { AuthModules map[string]AuthModule `yaml:"auth_modules"` } @@ -57,6 +44,31 @@ type UserPass struct { type Handler struct { sync.RWMutex Config *Config + + configReloadSuccess prometheus.Gauge + configReloadSeconds prometheus.Gauge +} + +func NewHandler(registerer prometheus.Registerer) (*Handler, error) { + if registerer == nil { + return nil, errors.New("registerer is required") + } + h := &Handler{ + Config: &Config{}, + configReloadSuccess: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "postgres_exporter", + Name: "config_last_reload_successful", + Help: "Postgres exporter config loaded successfully.", + }), + configReloadSeconds: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "postgres_exporter", + Name: "config_last_reload_success_timestamp_seconds", + Help: "Timestamp of the last successful configuration reload.", + }), + } + registerer.MustRegister(h.configReloadSuccess, h.configReloadSeconds) + + return h, nil } func (ch *Handler) GetConfig() *Config { @@ -66,33 +78,63 @@ func (ch *Handler) GetConfig() *Config { } func (ch *Handler) ReloadConfig(f string, logger *slog.Logger) error { - config := &Config{} var err error defer func() { - if err != nil { - configReloadSuccess.Set(0) - } else { - configReloadSuccess.Set(1) - configReloadSeconds.SetToCurrentTime() - } + ch.observeReload(err) }() + config, err := LoadConfig(f) + if err != nil { + return err + } + + ch.SetConfig(config) + return nil +} + +func (ch *Handler) observeReload(err error) { + if ch.configReloadSuccess == nil { + return + } + if err != nil { + ch.configReloadSuccess.Set(0) + return + } + ch.configReloadSuccess.Set(1) + if ch.configReloadSeconds != nil { + ch.configReloadSeconds.SetToCurrentTime() + } +} + +func LoadConfig(f string) (*Config, error) { yamlReader, err := os.Open(f) if err != nil { - return fmt.Errorf("Error opening config file %q: %s", f, err) + return nil, fmt.Errorf("error opening config file %q: %s", f, err) } defer yamlReader.Close() - decoder := yaml.NewDecoder(yamlReader) + + config, err := DecodeConfig(yamlReader) + if err != nil { + return nil, fmt.Errorf("error parsing config file %q: %s", f, err) + } + return config, nil +} + +func DecodeConfig(r io.Reader) (*Config, error) { + config := &Config{} + decoder := yaml.NewDecoder(r) decoder.KnownFields(true) - if err = decoder.Decode(config); err != nil { - return fmt.Errorf("Error parsing config file %q: %s", f, err) + if err := decoder.Decode(config); err != nil { + return nil, err } + return config, nil +} +func (ch *Handler) SetConfig(config *Config) { ch.Lock() ch.Config = config ch.Unlock() - return nil } func (m AuthModule) ConfigureTarget(target string) (DSN, error) { diff --git a/config/config_test.go b/config/config_test.go index d5d23d3ba..00ded7e93 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -14,23 +14,64 @@ package config import ( + "strings" "testing" + + "github.com/prometheus/client_golang/prometheus" ) -func TestLoadConfig(t *testing.T) { - ch := &Handler{ - Config: &Config{}, +func TestLoadConfigFile(t *testing.T) { + config, err := LoadConfig("testdata/config-good.yaml") + if err != nil { + t.Fatalf("LoadConfig() error = %v", err) + } + if len(config.AuthModules) == 0 { + t.Fatal("LoadConfig() loaded no auth modules") + } +} + +func TestDecodeConfig(t *testing.T) { + config, err := DecodeConfig(strings.NewReader(` +auth_modules: + module: + type: userpass + userpass: + username: user + password: pass +`)) + if err != nil { + t.Fatalf("DecodeConfig() error = %v", err) + } + if got, want := config.AuthModules["module"].UserPass.Username, "user"; got != want { + t.Fatalf("username = %q, want %q", got, want) } +} - err := ch.ReloadConfig("testdata/config-good.yaml", nil) +func TestLoadConfig(t *testing.T) { + ch, err := NewHandler(prometheus.NewRegistry()) if err != nil { - t.Errorf("Error loading config: %s", err) + t.Fatalf("NewHandler() error = %v", err) + } + + if err := ch.ReloadConfig("testdata/config-good.yaml", nil); err != nil { + t.Errorf("error loading config: %s", err) + } +} + +func TestNewHandlerRequiresRegisterer(t *testing.T) { + handler, err := NewHandler(nil) + if err == nil { + t.Fatal("NewHandler() error = nil, want error") + } + if handler != nil { + t.Fatalf("NewHandler() handler = %v, want nil", handler) } } func TestLoadBadConfigs(t *testing.T) { - ch := &Handler{ - Config: &Config{}, + ch, err := NewHandler(prometheus.NewRegistry()) + if err != nil { + t.Fatalf("NewHandler() error = %v", err) } tests := []struct { @@ -39,11 +80,11 @@ func TestLoadBadConfigs(t *testing.T) { }{ { input: "testdata/config-bad-auth-module.yaml", - want: "Error parsing config file \"testdata/config-bad-auth-module.yaml\": yaml: unmarshal errors:\n line 3: field pretendauth not found in type config.AuthModule", + want: "error parsing config file \"testdata/config-bad-auth-module.yaml\": yaml: unmarshal errors:\n line 3: field pretendauth not found in type config.AuthModule", }, { input: "testdata/config-bad-extra-field.yaml", - want: "Error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule", + want: "error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule", }, } diff --git a/cmd/postgres_exporter/datasource.go b/exporter/datasource.go similarity index 86% rename from cmd/postgres_exporter/datasource.go rename to exporter/datasource.go index 7a22e177c..7e4f1bd11 100644 --- a/cmd/postgres_exporter/datasource.go +++ b/exporter/datasource.go @@ -11,13 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "fmt" "net/url" "os" "regexp" + "slices" "strings" "github.com/prometheus/client_golang/prometheus" @@ -38,19 +39,19 @@ func (e *Exporter) discoverDatabaseDSNs() []string { var err error dsnURI, err = url.Parse(dsn) if err != nil { - logger.Error("Unable to parse DSN as URI", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Unable to parse DSN as URI", "dsn", loggableDSN(dsn), "err", err) continue } } else if connstringRe.MatchString(dsn) { dsnConnstring = dsn } else { - logger.Error("Unable to parse DSN as either URI or connstring", "dsn", loggableDSN(dsn)) + e.logger.Error("Unable to parse DSN as either URI or connstring", "dsn", loggableDSN(dsn)) continue } server, err := e.servers.GetServer(dsn) if err != nil { - logger.Error("Error opening connection to database", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Error opening connection to database", "dsn", loggableDSN(dsn), "err", err) continue } dsns[dsn] = struct{}{} @@ -60,15 +61,15 @@ func (e *Exporter) discoverDatabaseDSNs() []string { databaseNames, err := queryDatabases(server) if err != nil { - logger.Error("Error querying databases", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Error querying databases", "dsn", loggableDSN(dsn), "err", err) continue } for _, databaseName := range databaseNames { - if contains(e.excludeDatabases, databaseName) { + if slices.Contains(e.excludeDatabases, databaseName) { continue } - if len(e.includeDatabases) != 0 && !contains(e.includeDatabases, databaseName) { + if len(e.includeDatabases) != 0 && !slices.Contains(e.includeDatabases, databaseName) { continue } @@ -108,17 +109,17 @@ func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error { // Check if map versions need to be updated if err := e.checkMapVersions(ch, server); err != nil { - logger.Warn("Proceeding with outdated query maps, as the Postgres version could not be determined", "err", err) + e.logger.Warn("Proceeding with outdated query maps, as the Postgres version could not be determined", "err", err) } - return server.Scrape(ch, e.disableSettingsMetrics) + return server.Scrape(ch) } // try to get the DataSource // DATA_SOURCE_NAME always wins so we do not break older versions // reading secrets from files wins over secrets in environment variables // DATA_SOURCE_NAME > DATA_SOURCE_{USER|PASS}_FILE > DATA_SOURCE_{USER|PASS} -func getDataSources() ([]string, error) { +func GetDataSources() ([]string, error) { var dsn = os.Getenv("DATA_SOURCE_NAME") if len(dsn) != 0 { return strings.Split(dsn, ","), nil diff --git a/cmd/postgres_exporter/namespace.go b/exporter/namespace.go similarity index 92% rename from cmd/postgres_exporter/namespace.go rename to exporter/namespace.go index ac7a23739..5f6bd8edc 100644 --- a/cmd/postgres_exporter/namespace.go +++ b/exporter/namespace.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "database/sql" @@ -129,7 +129,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_sum"))) continue } - sum, ok := dbToFloat64(columnData[idx]) + sum, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_sum", columnData[idx]))) continue @@ -140,7 +140,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_count"))) continue } - count, ok := dbToUint64(columnData[idx]) + count, ok := dbToUint64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_count", columnData[idx]))) continue @@ -152,7 +152,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa labels..., ) } else { - value, ok := dbToFloat64(columnData[idx]) + value, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx]))) continue @@ -167,7 +167,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa // Its not an error to fail here, since the values are // unexpected anyway. - value, ok := dbToFloat64(columnData[idx]) + value, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unparseable column type - discarding: ", namespace, columnName, err))) continue @@ -189,10 +189,10 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str scrapeStart := time.Now() for namespace, mapping := range server.metricMap { - logger.Debug("Querying namespace", "namespace", namespace) + server.logger.Debug("Querying namespace", "namespace", namespace) if mapping.master && !server.master { - logger.Debug("Query skipped...") + server.logger.Debug("Query skipped...") continue } @@ -201,7 +201,7 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str serVersion, _ := semver.Parse(server.lastMapVersion.String()) runServerRange, _ := semver.ParseRange(server.runonserver) if !runServerRange(serVersion) { - logger.Debug("Query skipped for this database version", "version", server.lastMapVersion.String(), "target_version", server.runonserver) + server.logger.Debug("Query skipped for this database version", "version", server.lastMapVersion.String(), "target_version", server.runonserver) continue } } @@ -232,12 +232,12 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str // Serious error - a namespace disappeared if err != nil { namespaceErrors[namespace] = err - logger.Info("error finding namespace", "err", err) + server.logger.Info("error finding namespace", "err", err) } // Non-serious errors - likely version or parsing problems. if len(nonFatalErrors) > 0 { for _, err := range nonFatalErrors { - logger.Info("error querying namespace", "err", err) + server.logger.Info("error querying namespace", "err", err) } } diff --git a/cmd/postgres_exporter/postgres_exporter.go b/exporter/postgres_exporter.go similarity index 67% rename from cmd/postgres_exporter/postgres_exporter.go rename to exporter/postgres_exporter.go index 90f26beb0..4c918d828 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/exporter/postgres_exporter.go @@ -11,13 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "crypto/sha256" "database/sql" "errors" "fmt" + "log/slog" "math" "os" "regexp" @@ -28,6 +29,19 @@ import ( "github.com/prometheus/client_golang/prometheus" ) +// Metric name parts. +const ( + // Namespace for all metrics. + namespace = "pg" + // Subsystems. + exporter = "exporter" + // Metric label used for static string data thats handy to send to Prometheus + // e.g. version + staticLabelName = "static" + // Metric label used for server identification. + serverLabelName = "server" +) + // ColumnUsage should be one of several enum values which describe how a // queried row is to be converted to a Prometheus metric. type ColumnUsage int @@ -142,7 +156,7 @@ func (e *ErrorConnectToServer) Error() string { } // TODO: revisit this with the semver system -func dumpMaps() { +func DumpMaps() { // TODO: make this function part of the exporter for name, cmap := range builtinMetricMaps { query, ok := queryOverrides[name] @@ -175,52 +189,6 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ true, 0, }, - "pg_stat_replication": { - map[string]ColumnMapping{ - "procpid": {DISCARD, "Process ID of a WAL sender process", nil, semver.MustParseRange("<9.2.0")}, - "pid": {DISCARD, "Process ID of a WAL sender process", nil, semver.MustParseRange(">=9.2.0")}, - "usesysid": {DISCARD, "OID of the user logged into this WAL sender process", nil, nil}, - "usename": {DISCARD, "Name of the user logged into this WAL sender process", nil, nil}, - "application_name": {LABEL, "Name of the application that is connected to this WAL sender", nil, nil}, - "client_addr": {LABEL, "IP address of the client connected to this WAL sender. If this field is null, it indicates that the client is connected via a Unix socket on the server machine.", nil, nil}, - "client_hostname": {DISCARD, "Host name of the connected client, as reported by a reverse DNS lookup of client_addr. This field will only be non-null for IP connections, and only when log_hostname is enabled.", nil, nil}, - "client_port": {DISCARD, "TCP port number that the client is using for communication with this WAL sender, or -1 if a Unix socket is used", nil, nil}, - "backend_start": {DISCARD, "with time zone Time when this process was started, i.e., when the client connected to this WAL sender", nil, nil}, - "backend_xmin": {DISCARD, "The current backend's xmin horizon.", nil, nil}, - "state": {LABEL, "Current WAL sender state", nil, nil}, - "sent_location": {DISCARD, "Last transaction log position sent on this connection", nil, semver.MustParseRange("<10.0.0")}, - "write_location": {DISCARD, "Last transaction log position written to disk by this standby server", nil, semver.MustParseRange("<10.0.0")}, - "flush_location": {DISCARD, "Last transaction log position flushed to disk by this standby server", nil, semver.MustParseRange("<10.0.0")}, - "replay_location": {DISCARD, "Last transaction log position replayed into the database on this standby server", nil, semver.MustParseRange("<10.0.0")}, - "sent_lsn": {DISCARD, "Last transaction log position sent on this connection", nil, semver.MustParseRange(">=10.0.0")}, - "write_lsn": {DISCARD, "Last transaction log position written to disk by this standby server", nil, semver.MustParseRange(">=10.0.0")}, - "flush_lsn": {DISCARD, "Last transaction log position flushed to disk by this standby server", nil, semver.MustParseRange(">=10.0.0")}, - "replay_lsn": {DISCARD, "Last transaction log position replayed into the database on this standby server", nil, semver.MustParseRange(">=10.0.0")}, - "sync_priority": {DISCARD, "Priority of this standby server for being chosen as the synchronous standby", nil, nil}, - "sync_state": {DISCARD, "Synchronous state of this standby server", nil, nil}, - "slot_name": {LABEL, "A unique, cluster-wide identifier for the replication slot", nil, semver.MustParseRange(">=9.2.0")}, - "plugin": {DISCARD, "The base name of the shared object containing the output plugin this logical slot is using, or null for physical slots", nil, nil}, - "slot_type": {DISCARD, "The slot type - physical or logical", nil, nil}, - "datoid": {DISCARD, "The OID of the database this slot is associated with, or null. Only logical slots have an associated database", nil, nil}, - "database": {DISCARD, "The name of the database this slot is associated with, or null. Only logical slots have an associated database", nil, nil}, - "active": {DISCARD, "True if this slot is currently actively being used", nil, nil}, - "active_pid": {DISCARD, "Process ID of a WAL sender process", nil, nil}, - "xmin": {DISCARD, "The oldest transaction that this slot needs the database to retain. VACUUM cannot remove tuples deleted by any later transaction", nil, nil}, - "catalog_xmin": {DISCARD, "The oldest transaction affecting the system catalogs that this slot needs the database to retain. VACUUM cannot remove catalog tuples deleted by any later transaction", nil, nil}, - "restart_lsn": {DISCARD, "The address (LSN) of oldest WAL which still might be required by the consumer of this slot and thus won't be automatically removed during checkpoints", nil, nil}, - "pg_current_xlog_location": {DISCARD, "pg_current_xlog_location", nil, nil}, - "pg_current_wal_lsn": {DISCARD, "pg_current_xlog_location", nil, semver.MustParseRange(">=10.0.0")}, - "pg_current_wal_lsn_bytes": {GAUGE, "WAL position in bytes", nil, semver.MustParseRange(">=10.0.0")}, - "pg_xlog_location_diff": {GAUGE, "Lag in bytes between master and slave", nil, semver.MustParseRange(">=9.2.0 <10.0.0")}, - "pg_wal_lsn_diff": {GAUGE, "Lag in bytes between master and slave", nil, semver.MustParseRange(">=10.0.0")}, - "confirmed_flush_lsn": {DISCARD, "LSN position a consumer of a slot has confirmed flushing the data received", nil, nil}, - "write_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")}, - "flush_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level remote_flush incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")}, - "replay_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")}, - }, - true, - 0, - }, "pg_replication_slots": { map[string]ColumnMapping{ "slot_name": {LABEL, "Name of the replication slot", nil, nil}, @@ -231,42 +199,16 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ true, 0, }, - "pg_stat_archiver": { - map[string]ColumnMapping{ - "archived_count": {COUNTER, "Number of WAL files that have been successfully archived", nil, nil}, - "last_archived_wal": {DISCARD, "Name of the last WAL file successfully archived", nil, nil}, - "last_archived_time": {DISCARD, "Time of the last successful archive operation", nil, nil}, - "failed_count": {COUNTER, "Number of failed attempts for archiving WAL files", nil, nil}, - "last_failed_wal": {DISCARD, "Name of the WAL file of the last failed archival operation", nil, nil}, - "last_failed_time": {DISCARD, "Time of the last failed archival operation", nil, nil}, - "stats_reset": {DISCARD, "Time at which these statistics were last reset", nil, nil}, - "last_archive_age": {GAUGE, "Time in seconds since last WAL segment was successfully archived", nil, nil}, - }, - true, - 0, - }, - "pg_stat_activity": { - map[string]ColumnMapping{ - "datname": {LABEL, "Name of this database", nil, nil}, - "state": {LABEL, "connection state", nil, semver.MustParseRange(">=9.2.0")}, - "usename": {LABEL, "connection usename", nil, nil}, - "application_name": {LABEL, "connection application_name", nil, nil}, - "count": {GAUGE, "number of connections in this state", nil, nil}, - "max_tx_duration": {GAUGE, "max duration in seconds any active transaction has been running", nil, nil}, - }, - true, - 0, - }, } // Turn the MetricMap column mapping into a prometheus descriptor mapping. -func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metricMaps map[string]intermediateMetricMap) map[string]MetricMapNamespace { +func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metricMaps map[string]intermediateMetricMap, logger *slog.Logger, metricPrefix string) map[string]MetricMapNamespace { var metricMap = make(map[string]MetricMapNamespace) for namespace, intermediateMappings := range metricMaps { thisMap := make(map[string]MetricMap) - namespace = strings.Replace(namespace, "pg", *metricPrefix, 1) + namespace = strings.Replace(namespace, "pg", metricPrefix, 1) // Get the constant labels var variableLabels []string @@ -309,7 +251,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.CounterValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } case GAUGE: @@ -317,7 +259,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.GaugeValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } case HISTOGRAM: @@ -326,7 +268,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.UntypedValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } thisMap[columnName+"_bucket"] = MetricMap{ @@ -406,7 +348,7 @@ type Exporter struct { // only, since it just points to the global. builtinMetricMaps map[string]intermediateMetricMap - disableDefaultMetrics, disableSettingsMetrics, autoDiscoverDatabases bool + disableDefaultMetrics, autoDiscoverDatabases bool excludeDatabases []string includeDatabases []string @@ -422,6 +364,9 @@ type Exporter struct { // servers are used to allow re-using the DB connection between scrapes. // servers contains metrics map and query overrides. servers *Servers + + logger *slog.Logger + metricPrefix string } // ExporterOpt configures Exporter. @@ -434,13 +379,6 @@ func DisableDefaultMetrics(b bool) ExporterOpt { } } -// DisableSettingsMetrics configures pg_settings export. -func DisableSettingsMetrics(b bool) ExporterOpt { - return func(e *Exporter) { - e.disableSettingsMetrics = b - } -} - // AutoDiscoverDatabases allows scraping all databases on a database server. func AutoDiscoverDatabases(b bool) ExporterOpt { return func(e *Exporter) { @@ -474,11 +412,18 @@ func WithUserQueriesPath(p string) ExporterOpt { // WithConstantLabels configures constant labels. func WithConstantLabels(s string) ExporterOpt { return func(e *Exporter) { - e.constantLabels = parseConstLabels(s) + e.constantLabels = parseConstLabels(s, e.logger) + } +} + +// WithMetricPrefix configures metric prefix. +func WithMetricPrefix(prefix string) ExporterOpt { + return func(e *Exporter) { + e.metricPrefix = prefix } } -func parseConstLabels(s string) prometheus.Labels { +func parseConstLabels(s string, logger *slog.Logger) prometheus.Labels { labels := make(prometheus.Labels) s = strings.TrimSpace(s) @@ -505,10 +450,11 @@ func parseConstLabels(s string) prometheus.Labels { } // NewExporter returns a new PostgreSQL exporter for the provided DSN. -func NewExporter(dsn []string, opts ...ExporterOpt) *Exporter { +func NewExporter(dsn []string, logger *slog.Logger, opts ...ExporterOpt) *Exporter { e := &Exporter{ dsn: dsn, builtinMetricMaps: builtinMetricMaps, + logger: logger, } for _, opt := range opts { @@ -516,7 +462,7 @@ func NewExporter(dsn []string, opts ...ExporterOpt) *Exporter { } e.setupInternalMetrics() - e.servers = NewServers(ServerWithLabels(e.constantLabels)) + e.servers = NewServers(ServerWithLabels(e.constantLabels), ServerWithLogger(e.logger)) return e } @@ -573,14 +519,11 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { e.userQueriesError.Collect(ch) } -func newDesc(subsystem, name, help string, labels prometheus.Labels) *prometheus.Desc { - return prometheus.NewDesc( - prometheus.BuildFQName(namespace, subsystem, name), - help, nil, labels, - ) +func (e *Exporter) CloseServers() { + e.servers.Close() } -func checkPostgresVersion(db *sql.DB, server string) (semver.Version, string, error) { +func checkPostgresVersion(db *sql.DB, server string, logger *slog.Logger) (semver.Version, string, error) { logger.Debug("Querying PostgreSQL version", "server", server) versionRow := db.QueryRow("SELECT version();") var versionString string @@ -598,24 +541,24 @@ func checkPostgresVersion(db *sql.DB, server string) (semver.Version, string, er // Check and update the exporters query maps if the version has changed. func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server) error { - semanticVersion, versionString, err := checkPostgresVersion(server.db, server.String()) + semanticVersion, versionString, err := checkPostgresVersion(server.db, server.String(), e.logger) if err != nil { return fmt.Errorf("Error fetching version string on %q: %v", server, err) } if !e.disableDefaultMetrics && semanticVersion.LT(lowestSupportedVersion) { - logger.Warn("PostgreSQL version is lower than our lowest supported version", "server", server, "version", semanticVersion, "lowest_supported_version", lowestSupportedVersion) + e.logger.Warn("PostgreSQL version is lower than our lowest supported version", "server", server, "version", semanticVersion, "lowest_supported_version", lowestSupportedVersion) } // Check if semantic version changed and recalculate maps if needed. if semanticVersion.NE(server.lastMapVersion) || server.metricMap == nil { - logger.Info("Semantic version changed", "server", server, "from", server.lastMapVersion, "to", semanticVersion) + e.logger.Info("Semantic version changed", "server", server, "from", server.lastMapVersion, "to", semanticVersion) server.mappingMtx.Lock() // Get Default Metrics only for master database if !e.disableDefaultMetrics && server.master { - server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps) - server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides) + server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps, server.logger, e.metricPrefix) + server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides, server.logger) } else { server.metricMap = make(map[string]MetricMapNamespace) server.queryOverrides = make(map[string]string) @@ -630,13 +573,13 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server) // Calculate the hashsum of the useQueries userQueriesData, err := os.ReadFile(e.userQueriesPath) if err != nil { - logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) + e.logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) e.userQueriesError.WithLabelValues(e.userQueriesPath, "").Set(1) } else { hashsumStr := fmt.Sprintf("%x", sha256.Sum256(userQueriesData)) - if err := addQueries(userQueriesData, semanticVersion, server); err != nil { - logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) + if err := addQueries(userQueriesData, semanticVersion, server, e.metricPrefix); err != nil { + e.logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) e.userQueriesError.WithLabelValues(e.userQueriesPath, hashsumStr).Set(1) } else { // Mark user queries as successfully loaded @@ -678,7 +621,7 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) { if err := e.scrapeDSN(ch, dsn); err != nil { errorsCount++ - logger.Error("error scraping dsn", "err", err, "dsn", dsn) + e.logger.Error("error scraping dsn", "err", err, "dsn", loggableDSN(dsn)) if _, ok := err.(*ErrorConnectToServer); ok { connectionErrorsCount++ diff --git a/cmd/postgres_exporter/postgres_exporter_integration_test.go b/exporter/postgres_exporter_integration_test.go similarity index 92% rename from cmd/postgres_exporter/postgres_exporter_integration_test.go rename to exporter/postgres_exporter_integration_test.go index 7043c1e88..21a883bd4 100644 --- a/cmd/postgres_exporter/postgres_exporter_integration_test.go +++ b/exporter/postgres_exporter_integration_test.go @@ -15,9 +15,8 @@ // a lot of additional work to keep the external docker environment they require // working. //go:build integration -// +build integration -package main +package exporter import ( "fmt" @@ -27,6 +26,7 @@ import ( _ "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" . "gopkg.in/check.v1" ) @@ -43,7 +43,7 @@ func (s *IntegrationSuite) SetUpSuite(c *C) { dsn := os.Getenv("DATA_SOURCE_NAME") c.Assert(dsn, Not(Equals), "") - exporter := NewExporter(strings.Split(dsn, ",")) + exporter := NewExporter(strings.Split(dsn, ","), promslog.NewNopLogger()) c.Assert(exporter, NotNil) // Assign the exporter to the suite s.e = exporter @@ -70,12 +70,6 @@ func (s *IntegrationSuite) TestAllNamespacesReturnResults(c *C) { err = s.e.checkMapVersions(ch, server) c.Assert(err, IsNil) - err = querySettings(ch, server) - if !c.Check(err, Equals, nil) { - fmt.Println("## ERRORS FOUND") - fmt.Println(err) - } - // This should never happen in our test cases. errMap := queryNamespaceMappings(ch, server) if !c.Check(len(errMap), Equals, 0) { @@ -100,12 +94,12 @@ func (s *IntegrationSuite) TestInvalidDsnDoesntCrash(c *C) { }() // Send a bad DSN - exporter := NewExporter([]string{"invalid dsn"}) + exporter := NewExporter([]string{"invalid dsn"}, promslog.NewNopLogger()) c.Assert(exporter, NotNil) exporter.scrape(ch) // Send a DSN to a non-listening port. - exporter = NewExporter([]string{"postgresql://nothing:nothing@127.0.0.1:1/nothing"}) + exporter = NewExporter([]string{"postgresql://nothing:nothing@127.0.0.1:1/nothing"}, promslog.NewNopLogger()) c.Assert(exporter, NotNil) exporter.scrape(ch) } @@ -123,7 +117,7 @@ func (s *IntegrationSuite) TestUnknownMetricParsingDoesntCrash(c *C) { dsn := os.Getenv("DATA_SOURCE_NAME") c.Assert(dsn, Not(Equals), "") - exporter := NewExporter(strings.Split(dsn, ",")) + exporter := NewExporter(strings.Split(dsn, ","), promslog.NewNopLogger()) c.Assert(exporter, NotNil) // Convert the default maps into a list of empty maps. @@ -156,6 +150,7 @@ func (s *IntegrationSuite) TestExtendQueriesDoesntCrash(c *C) { exporter := NewExporter( strings.Split(dsn, ","), + promslog.NewNopLogger(), WithUserQueriesPath("../user_queries_test.yaml"), ) c.Assert(exporter, NotNil) @@ -169,6 +164,7 @@ func (s *IntegrationSuite) TestAutoDiscoverDatabases(c *C) { exporter := NewExporter( strings.Split(dsn, ","), + promslog.NewNopLogger(), ) c.Assert(exporter, NotNil) diff --git a/cmd/postgres_exporter/postgres_exporter_test.go b/exporter/postgres_exporter_test.go similarity index 95% rename from cmd/postgres_exporter/postgres_exporter_test.go rename to exporter/postgres_exporter_test.go index 0f36febf4..6364fc4f9 100644 --- a/cmd/postgres_exporter/postgres_exporter_test.go +++ b/exporter/postgres_exporter_test.go @@ -12,9 +12,8 @@ // limitations under the License. //go:build !integration -// +build !integration -package main +package exporter import ( "math" @@ -25,6 +24,7 @@ import ( "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" . "gopkg.in/check.v1" ) @@ -37,7 +37,6 @@ type FunctionalSuite struct { var _ = Suite(&FunctionalSuite{}) func (s *FunctionalSuite) SetUpSuite(c *C) { - } func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { @@ -54,7 +53,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { { // No metrics should be eliminated - resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -75,7 +74,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { testMetricMap["test_namespace"].columnMappings["metric_which_discards"] = discardableMetric // Discard metric should be discarded - resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -96,7 +95,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { testMetricMap["test_namespace"].columnMappings["metric_which_discards"] = discardableMetric // Discard metric should be discarded - resultMap := makeDescMap(semver.MustParse("0.0.2"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.2"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -126,7 +125,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithSecretsFiles(c *C) { var expected = "postgresql://custom_username$&+,%2F%3A;=%3F%40:custom_password$&+,%2F%3A;=%3F%40@localhost:5432/?sslmode=disable" - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -146,7 +145,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithDns(c *C) { c.Assert(err, IsNil) defer UnsetEnvironment(c, "DATA_SOURCE_NAME") - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -174,7 +173,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithDnsAndSecrets(c *C) { c.Assert(err, IsNil) defer UnsetEnvironment(c, "DATA_SOURCE_PASS") - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -290,7 +289,7 @@ func (s *FunctionalSuite) TestParseConstLabels(c *C) { } for _, cs := range cases { - labels := parseConstLabels(cs.s) + labels := parseConstLabels(cs.s, promslog.NewNopLogger()) if !reflect.DeepEqual(labels, cs.labels) { c.Fatalf("labels not equal (%v -> %v)", labels, cs.labels) } @@ -320,7 +319,6 @@ func (checker *isNaNChecker) Check(params []interface{}, names []string) (result // test boolean metric type gets converted to float func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { - type TestCase struct { input interface{} expectedString string @@ -389,7 +387,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { } for _, cs := range cases { - value, ok := dbToFloat64(cs.input) + value, ok := dbToFloat64(cs.input, promslog.NewNopLogger()) if math.IsNaN(cs.expectedValue) { c.Assert(value, IsNaN) } else { @@ -397,7 +395,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { } c.Assert(ok, Equals, cs.expectedOK) - count, ok := dbToUint64(cs.input) + count, ok := dbToUint64(cs.input, promslog.NewNopLogger()) c.Assert(count, Equals, cs.expectedCount) c.Assert(ok, Equals, cs.expectedOK) @@ -410,7 +408,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { func (s *FunctionalSuite) TestParseUserQueries(c *C) { userQueriesData, err := os.ReadFile("./tests/user_queries_ok.yaml") if err == nil { - metricMaps, newQueryOverrides, err := parseUserQueries(userQueriesData) + metricMaps, newQueryOverrides, err := parseUserQueries(userQueriesData, promslog.NewNopLogger()) c.Assert(err, Equals, nil) c.Assert(metricMaps, NotNil) c.Assert(newQueryOverrides, NotNil) diff --git a/cmd/postgres_exporter/queries.go b/exporter/queries.go similarity index 64% rename from cmd/postgres_exporter/queries.go rename to exporter/queries.go index 7090606e1..cb24f0b16 100644 --- a/cmd/postgres_exporter/queries.go +++ b/exporter/queries.go @@ -11,11 +11,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "errors" "fmt" + "log/slog" "github.com/blang/semver/v4" "gopkg.in/yaml.v2" @@ -45,36 +46,6 @@ type OverrideQuery struct { // Overriding queries for namespaces above. // TODO: validate this is a closed set in tests, and there are no overlaps var queryOverrides = map[string][]OverrideQuery{ - "pg_stat_replication": { - { - semver.MustParseRange(">=10.0.0"), - ` - SELECT *, - (case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end) AS pg_current_wal_lsn, - (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_lsn('0/0'))::float else pg_wal_lsn_diff(pg_current_wal_lsn(), pg_lsn('0/0'))::float end) AS pg_current_wal_lsn_bytes, - (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), replay_lsn)::float else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff - FROM pg_stat_replication - `, - }, - { - semver.MustParseRange(">=9.2.0 <10.0.0"), - ` - SELECT *, - (case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) AS pg_current_xlog_location, - (case pg_is_in_recovery() when 't' then pg_xlog_location_diff(pg_last_xlog_receive_location(), replay_location)::float else pg_xlog_location_diff(pg_current_xlog_location(), replay_location)::float end) AS pg_xlog_location_diff - FROM pg_stat_replication - `, - }, - { - semver.MustParseRange("<9.2.0"), - ` - SELECT *, - (case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) AS pg_current_xlog_location - FROM pg_stat_replication - `, - }, - }, - "pg_replication_slots": { { semver.MustParseRange(">=9.4.0 <10.0.0"), @@ -93,71 +64,11 @@ var queryOverrides = map[string][]OverrideQuery{ `, }, }, - - "pg_stat_archiver": { - { - semver.MustParseRange(">=9.4.0"), - ` - SELECT *, - extract(epoch from now() - last_archived_time) AS last_archive_age - FROM pg_stat_archiver - `, - }, - }, - - "pg_stat_activity": { - // This query only works - { - semver.MustParseRange(">=9.2.0"), - ` - SELECT - pg_database.datname, - tmp.state, - tmp2.usename, - tmp2.application_name, - COALESCE(count,0) as count, - COALESCE(max_tx_duration,0) as max_tx_duration - FROM - ( - VALUES ('active'), - ('idle'), - ('idle in transaction'), - ('idle in transaction (aborted)'), - ('fastpath function call'), - ('disabled') - ) AS tmp(state) CROSS JOIN pg_database - LEFT JOIN - ( - SELECT - datname, - state, - usename, - application_name, - count(*) AS count, - MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration - FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2 - ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname - `, - }, - { - semver.MustParseRange("<9.2.0"), - ` - SELECT - datname, - 'unknown' AS state, - usename, - application_name, - COALESCE(count(*),0) AS count, - COALESCE(MAX(EXTRACT(EPOCH FROM now() - xact_start))::float,0) AS max_tx_duration - FROM pg_stat_activity GROUP BY datname,usename,application_name - `, - }, - }, } // Convert the query override file to the version-specific query override file // for the exporter. -func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][]OverrideQuery) map[string]string { +func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][]OverrideQuery, logger *slog.Logger) map[string]string { resultMap := make(map[string]string) for name, overrideDef := range queryOverrides { // Find a matching semver. We make it an error to have overlapping @@ -179,7 +90,7 @@ func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][] return resultMap } -func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[string]string, error) { +func parseUserQueries(content []byte, logger *slog.Logger) (map[string]intermediateMetricMap, map[string]string, error) { var userQueries UserQueries err := yaml.Unmarshal(content, &userQueries) @@ -232,21 +143,21 @@ func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[str // queries. // TODO: test code for all cu. // TODO: the YAML this supports is "non-standard" - we should move away from it. -func addQueries(content []byte, pgVersion semver.Version, server *Server) error { - metricMaps, newQueryOverrides, err := parseUserQueries(content) +func addQueries(content []byte, pgVersion semver.Version, server *Server, metricPrefix string) error { + metricMaps, newQueryOverrides, err := parseUserQueries(content, server.logger) if err != nil { return err } // Convert the loaded metric map into exporter representation - partialExporterMap := makeDescMap(pgVersion, server.labels, metricMaps) + partialExporterMap := makeDescMap(pgVersion, server.labels, metricMaps, server.logger, metricPrefix) // Merge the two maps (which are now quite flatteend) for k, v := range partialExporterMap { _, found := server.metricMap[k] if found { - logger.Debug("Overriding metric from user YAML file", "metric", k) + server.logger.Debug("Overriding metric from user YAML file", "metric", k) } else { - logger.Debug("Adding new metric from user YAML file", "metric", k) + server.logger.Debug("Adding new metric from user YAML file", "metric", k) } server.metricMap[k] = v } @@ -255,9 +166,9 @@ func addQueries(content []byte, pgVersion semver.Version, server *Server) error for k, v := range newQueryOverrides { _, found := server.queryOverrides[k] if found { - logger.Debug("Overriding query override from user YAML file", "query_override", k) + server.logger.Debug("Overriding query override from user YAML file", "query_override", k) } else { - logger.Debug("Adding new query override from user YAML file", "query_override", k) + server.logger.Debug("Adding new query override from user YAML file", "query_override", k) } server.queryOverrides[k] = v } diff --git a/cmd/postgres_exporter/server.go b/exporter/server.go similarity index 83% rename from cmd/postgres_exporter/server.go rename to exporter/server.go index bd4e76e10..25d8b3185 100644 --- a/cmd/postgres_exporter/server.go +++ b/exporter/server.go @@ -11,16 +11,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "database/sql" "fmt" + "log/slog" "sync" "time" "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" ) // Server describes a connection to Postgres. @@ -42,6 +44,7 @@ type Server struct { // Currently cached metrics metricCache map[string]cachedMetrics cacheMtx sync.Mutex + logger *slog.Logger } // ServerOpt configures a server. @@ -56,6 +59,12 @@ func ServerWithLabels(labels prometheus.Labels) ServerOpt { } } +func ServerWithLogger(logger *slog.Logger) ServerOpt { + return func(s *Server) { + s.logger = logger + } +} + // NewServer establishes a new connection using DSN. func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { fingerprint, err := parseFingerprint(dsn) @@ -70,8 +79,6 @@ func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { db.SetMaxOpenConns(1) db.SetMaxIdleConns(1) - logger.Info("Established new database connection", "fingerprint", fingerprint) - s := &Server{ db: db, master: false, @@ -79,12 +86,15 @@ func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { serverLabelName: fingerprint, }, metricCache: make(map[string]cachedMetrics), + logger: promslog.NewNopLogger(), } for _, opt := range opts { opt(s) } + s.logger.Info("Established new database connection", "fingerprint", fingerprint) + return s, nil } @@ -97,7 +107,7 @@ func (s *Server) Close() error { func (s *Server) Ping() error { if err := s.db.Ping(); err != nil { if cerr := s.Close(); cerr != nil { - logger.Error("Error while closing non-pinging DB connection", "server", s, "err", cerr) + s.logger.Error("Error while closing non-pinging DB connection", "server", s, "err", cerr) } return err } @@ -110,21 +120,19 @@ func (s *Server) String() string { } // Scrape loads metrics. -func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool) error { +func (s *Server) Scrape(ch chan<- prometheus.Metric) error { s.mappingMtx.RLock() defer s.mappingMtx.RUnlock() var err error - if !disableSettingsMetrics && s.master { - if err = querySettings(ch, s); err != nil { - err = fmt.Errorf("error retrieving settings: %s", err) - } - } - errMap := queryNamespaceMappings(ch, s) - if len(errMap) > 0 { - err = fmt.Errorf("queryNamespaceMappings returned %d errors", len(errMap)) + if len(errMap) == 0 { + return nil + } + err = fmt.Errorf("queryNamespaceMappings errors encountered") + for namespace, errStr := range errMap { + err = fmt.Errorf("%s, namespace: %s error: %s", err, namespace, errStr) } return err @@ -168,6 +176,7 @@ func (s *Servers) GetServer(dsn string) (*Server, error) { s.servers[dsn] = server } if err = server.Ping(); err != nil { + server.Close() delete(s.servers, dsn) time.Sleep(time.Duration(errCount) * time.Second) continue @@ -183,7 +192,7 @@ func (s *Servers) Close() { defer s.m.Unlock() for _, server := range s.servers { if err := server.Close(); err != nil { - logger.Error("Failed to close connection", "server", server, "err", err) + server.logger.Error("Failed to close connection", "server", server, "err", err) } } } diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile b/exporter/tests/docker-postgres-replication/Dockerfile similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile rename to exporter/tests/docker-postgres-replication/Dockerfile diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile.p2 b/exporter/tests/docker-postgres-replication/Dockerfile.p2 similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile.p2 rename to exporter/tests/docker-postgres-replication/Dockerfile.p2 diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/README.md b/exporter/tests/docker-postgres-replication/README.md similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/README.md rename to exporter/tests/docker-postgres-replication/README.md diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/docker-compose.yml b/exporter/tests/docker-postgres-replication/docker-compose.yml similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/docker-compose.yml rename to exporter/tests/docker-postgres-replication/docker-compose.yml diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/docker-entrypoint.sh b/exporter/tests/docker-postgres-replication/docker-entrypoint.sh similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/docker-entrypoint.sh rename to exporter/tests/docker-postgres-replication/docker-entrypoint.sh diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/setup-replication.sh b/exporter/tests/docker-postgres-replication/setup-replication.sh similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/setup-replication.sh rename to exporter/tests/docker-postgres-replication/setup-replication.sh diff --git a/cmd/postgres_exporter/tests/test-smoke b/exporter/tests/test-smoke similarity index 100% rename from cmd/postgres_exporter/tests/test-smoke rename to exporter/tests/test-smoke diff --git a/cmd/postgres_exporter/tests/user_queries_ok.yaml b/exporter/tests/user_queries_ok.yaml similarity index 100% rename from cmd/postgres_exporter/tests/user_queries_ok.yaml rename to exporter/tests/user_queries_ok.yaml diff --git a/cmd/postgres_exporter/tests/user_queries_test.yaml b/exporter/tests/user_queries_test.yaml similarity index 100% rename from cmd/postgres_exporter/tests/user_queries_test.yaml rename to exporter/tests/user_queries_test.yaml diff --git a/cmd/postgres_exporter/tests/username_file b/exporter/tests/username_file similarity index 100% rename from cmd/postgres_exporter/tests/username_file rename to exporter/tests/username_file diff --git a/cmd/postgres_exporter/tests/userpass_file b/exporter/tests/userpass_file similarity index 100% rename from cmd/postgres_exporter/tests/userpass_file rename to exporter/tests/userpass_file diff --git a/cmd/postgres_exporter/util.go b/exporter/util.go similarity index 94% rename from cmd/postgres_exporter/util.go rename to exporter/util.go index 3baa6f4be..895c2d07e 100644 --- a/cmd/postgres_exporter/util.go +++ b/exporter/util.go @@ -11,10 +11,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "fmt" + "log/slog" "math" "net/url" "strconv" @@ -24,15 +25,6 @@ import ( "github.com/lib/pq" ) -func contains(a []string, x string) bool { - for _, n := range a { - if x == n { - return true - } - } - return false -} - // convert a string to the corresponding ColumnUsage func stringToColumnUsage(s string) (ColumnUsage, error) { var u ColumnUsage @@ -68,7 +60,7 @@ func stringToColumnUsage(s string) (ColumnUsage, error) { // Convert database.sql types to float64s for Prometheus consumption. Null types are mapped to NaN. string and []byte // types are mapped as NaN and !ok -func dbToFloat64(t interface{}) (float64, bool) { +func dbToFloat64(t interface{}, logger *slog.Logger) (float64, bool) { switch v := t.(type) { case int64: return float64(v), true @@ -106,7 +98,7 @@ func dbToFloat64(t interface{}) (float64, bool) { // Convert database.sql types to uint64 for Prometheus consumption. Null types are mapped to 0. string and []byte // types are mapped as 0 and !ok -func dbToUint64(t interface{}) (uint64, bool) { +func dbToUint64(t interface{}, logger *slog.Logger) (uint64, bool) { switch v := t.(type) { case uint64: return v, true @@ -159,7 +151,7 @@ func dbToString(t interface{}) (string, bool) { // Try and convert to string return string(v), true case string: - return v, true + return strings.ToValidUTF8(v, "�"), true case bool: if v { return "true", true diff --git a/go.mod b/go.mod index bc854c3aa..f50fda431 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,16 @@ module github.com/prometheus-community/postgres_exporter -go 1.23.0 +go 1.25.0 require ( github.com/DATA-DOG/go-sqlmock v1.5.2 github.com/alecthomas/kingpin/v2 v2.4.0 github.com/blang/semver/v4 v4.0.0 github.com/lib/pq v1.10.9 - github.com/prometheus/client_golang v1.20.5 - github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.60.1 - github.com/prometheus/exporter-toolkit v0.13.1 + github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/client_model v0.6.2 + github.com/prometheus/common v0.67.5 + github.com/prometheus/exporter-toolkit v0.16.0 github.com/smartystreets/goconvey v1.8.1 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v2 v2.4.0 @@ -18,29 +18,32 @@ require ( ) require ( - github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect + github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/coreos/go-systemd/v22 v22.7.0 // indirect + github.com/golang-jwt/jwt/v5 v5.3.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/jtolds/gls v4.20.0+incompatible // indirect - github.com/klauspost/compress v1.17.9 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mdlayher/socket v0.4.1 // indirect github.com/mdlayher/vsock v1.2.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect - github.com/prometheus/procfs v0.15.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/smarty/assertions v1.15.0 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.29.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + go.yaml.in/yaml/v2 v2.4.4 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.51.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + golang.org/x/time v0.15.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index 044805aa2..deea6eedf 100644 --- a/go.sum +++ b/go.sum @@ -2,23 +2,26 @@ github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7Oputl github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= -github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= -github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/coreos/go-systemd/v22 v22.7.0 h1:LAEzFkke61DFROc7zNLX/WA2i5J8gYqe0rSj9KI28KA= +github.com/coreos/go-systemd/v22 v22.7.0/go.mod h1:xNUYtjHu2EDXbsxz1i41wouACIwT7Ybq9o0BQhMwD0w= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= @@ -26,8 +29,8 @@ github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -50,16 +53,16 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRW github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= -github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= -github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04= -github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/exporter-toolkit v0.16.0 h1:xT/j7L2XKF+VJd6B4fpUw6xWabHrSmsUf6mYmFqyu0s= +github.com/prometheus/exporter-toolkit v0.16.0/go.mod h1:d1EL8Z9674xQe/iWhwP2wDyCEoBPbXVeqDbqAUsgJWY= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= @@ -68,30 +71,42 @@ github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+ github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= -golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= -golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ= +go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= +golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= +golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/postgres_mixin/.lint b/postgres_mixin/.lint new file mode 100644 index 000000000..65d621f69 --- /dev/null +++ b/postgres_mixin/.lint @@ -0,0 +1,15 @@ +--- +exclusions: + panel-units-rule: + reason: Ignoring so far, need to address this in future + panel-title-description-rule: + reason: Ignoring so far, need to address this in future + panel-datasource-rule: + reason: "Loki datasource variable is being named as loki_datasource now while linter expects 'datasource'" + template-datasource-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + alert-name-camelcase: + reason: QPS is a common acronym (Queries Per Second) and should be allowed + entries: + - alert: PostgreSQLQPS + \ No newline at end of file diff --git a/postgres_mixin/alerts/postgres.libsonnet b/postgres_mixin/alerts/postgres.libsonnet index 4b0275df1..e3c8ee278 100644 --- a/postgres_mixin/alerts/postgres.libsonnet +++ b/postgres_mixin/alerts/postgres.libsonnet @@ -7,16 +7,16 @@ { alert: 'PostgreSQLMaxConnectionsReached', annotations: { - description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.', - summary: '{{ $labels.instance }} has maxed out Postgres connections.', + description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy).', + summary: 'Postgres connections count is over the maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) >= - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) - ||| % $._config, + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '1m', labels: { severity: 'warning', @@ -26,17 +26,17 @@ alert: 'PostgreSQLHighConnections', annotations: { description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).', - summary: '{{ $labels.instance }} is over 80% of max Postgres connections.', + summary: 'Postgres connections count is over 80% of maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) > ( - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) ) * 0.8 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '10m', labels: { severity: 'warning', @@ -46,7 +46,7 @@ alert: 'PostgreSQLDown', annotations: { description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.', - summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}', + summary: 'PostgreSQL is not processing queries.', }, expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config, 'for': '1m', @@ -58,15 +58,15 @@ alert: 'PostgreSQLSlowQueries', annotations: { description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ', - summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ', + summary: 'PostgreSQL high number of slow queries.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( rate ( - pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m] + pg_stat_activity_max_tx_duration{%(dbNameFilter)s, %(postgresExporterSelector)s}[2m] ) ) > 2 * 60 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '2m', labels: { severity: 'warning', @@ -76,19 +76,19 @@ alert: 'PostgreSQLQPS', annotations: { description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL high number of queries per second.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( irate( - pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_commit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + irate( - pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_rollback{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) > 10000 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '5m', labels: { severity: 'warning', @@ -98,28 +98,165 @@ alert: 'PostgreSQLCacheHitRatio', annotations: { description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL low cache hit rate.', }, expr: ||| - avg by (datname) ( - rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]) + avg by (datname, %(agg)s) ( + rate(pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]) / ( rate( - pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + rate( - pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_read{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) ) < 0.98 + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasTooManyRollbacks', + annotations: { + description: 'PostgreSQL has too many rollbacks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has too many rollbacks.', + }, + expr: ||| + avg without(pod, instance) + (rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]) / + (rate(pg_stat_database_xact_commit{%(dbNameFilter)s}[5m]) + rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]))) > 0.10 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasHighDeadLocks', + annotations: { + description: 'PostgreSQL has too high deadlocks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of deadlocks.', + }, + expr: ||| + max without(pod, instance) (rate(pg_stat_database_deadlocks{%(dbNameFilter)s}[5m]) * 60) > 5 ||| % $._config, 'for': '5m', labels: { severity: 'warning', }, }, + { + alert: 'PostgresAcquiredTooManyLocks', + annotations: { + description: 'PostgreSQL has acquired too many locks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of acquired locks.', + }, + expr: ||| + max by(datname, %(agg)s) ( + (pg_locks_count{%(dbNameFilter)s}) + / + on(%(aggWithoutServer)s) group_left(server) ( + pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{} + ) + ) > 0.20 + ||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), aggWithoutServer: std.join(',', std.filter(function(x) x != "server", $._config.groupLabels + $._config.instanceLabels)) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresReplicationLaggingMore1Hour', + annotations: { + description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.', + summary: 'PostgreSQL replication lagging more than 1 hour.', + }, + expr: ||| + (pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasReplicationSlotUsed', + annotations: { + description: '{{ $labels.instance }} has replication slots that are not used, which might lead to replication lag or data inconsistency.', + summary: 'PostgreSQL has unused replication slots.', + }, + expr: 'pg_replication_slots_active{} == 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresReplicationRoleChanged', + annotations: { + description: '{{ $labels.instance }} replication role has changed. Verify if this is expected or if it indicates a failover.', + summary: 'PostgreSQL replication role change detected.', + }, + expr: 'pg_replication_is_replica{} and changes(pg_replication_is_replica{}[1m]) > 0', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasExporterErrors', + annotations: { + description: '{{ $labels.instance }} exporter is experiencing errors. Verify exporter health and configuration.', + summary: 'PostgreSQL exporter errors detected.', + }, + expr: 'pg_exporter_last_scrape_error{} > 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTablesNotVaccumed', + annotations: { + description: '{{ $labels.instance }} tables have not been vacuumed recently within the last hour, which may lead to performance degradation.', + summary: 'PostgreSQL tables not vacuumed.', + }, + expr: ||| + group without(pod, instance)( + timestamp( + pg_stat_user_tables_n_dead_tup{} > + pg_stat_user_tables_n_live_tup{} + * on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{} + + on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{} + ) + < time() - 36000 + ) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTooManyCheckpointsRequested', + annotations: { + description: '{{ $labels.instance }} is requesting too many checkpoints, which may lead to performance degradation.', + summary: 'PostgreSQL too many checkpoints requested.', + }, + expr: ||| + rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) / + (rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) + rate(pg_stat_bgwriter_checkpoints_req_total{}[5m])) + < 0.5 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, ], }, ], diff --git a/postgres_mixin/config.libsonnet b/postgres_mixin/config.libsonnet index d7bd7ac1b..4ea3b7e52 100644 --- a/postgres_mixin/config.libsonnet +++ b/postgres_mixin/config.libsonnet @@ -1,5 +1,9 @@ { _config+:: { + dbNameFilter: 'datname!~"template.*"', postgresExporterSelector: '', + groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'], + instanceLabels: ['instance', 'server'], + enableMultiCluster: false, }, } diff --git a/postgres_mixin/dashboards/postgres-overview.json b/postgres_mixin/dashboards/postgres-overview.json index 9bf41be6a..8baf6fbb3 100644 --- a/postgres_mixin/dashboards/postgres-overview.json +++ b/postgres_mixin/dashboards/postgres-overview.json @@ -3,81 +3,119 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Performance metrics for Postgres", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 455, "graphTooltip": 0, - "id": 1, - "iteration": 1603191461722, + "id": 38, "links": [], + "liveNow": false, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 2, + "w": 8, + "x": 0, + "y": 0 + }, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "span": 4, + "title": "PostgreSQL overview", + "titleSize": "h6", + "type": "row" + }, + { + + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 20, + "w": 4, "x": 0, "y": 0 }, - "hiddenSeries": false, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 11, "links": [], - "nullPointMode": "connected", + "maxDataPoints": 100, "options": { - "alertThreshold": true + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -94,7 +132,6 @@ } ], "intervalFactor": 2, - "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", "refId": "A", @@ -103,7 +140,7 @@ [ { "params": [ - "tup_fetched" + "xact_commit" ], "type": "field" }, @@ -119,7 +156,7 @@ } ] ], - "step": 120, + "step": 1800, "tags": [ { "key": "instance", @@ -127,11 +164,101 @@ "value": "/^$instance$/" } ] + } + ], + "title": "QPS", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 0 + }, + "id": 1, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.3", + "targets": [ { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -148,10 +275,10 @@ } ], "intervalFactor": 2, - "legendFormat": "returned", + "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -184,8 +311,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -202,10 +332,10 @@ } ], "intervalFactor": 2, - "legendFormat": "inserted", + "legendFormat": "returned", "measurement": "postgresql", "policy": "default", - "refId": "C", + "refId": "B", "resultFormat": "time_series", "select": [ [ @@ -238,8 +368,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -256,10 +389,10 @@ } ], "intervalFactor": 2, - "legendFormat": "updated", + "legendFormat": "inserted", "measurement": "postgresql", "policy": "default", - "refId": "D", + "refId": "C", "resultFormat": "time_series", "select": [ [ @@ -292,8 +425,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -310,10 +446,10 @@ } ], "intervalFactor": 2, - "legendFormat": "deleted", + "legendFormat": "updated", "measurement": "postgresql", "policy": "default", - "refId": "E", + "refId": "D", "resultFormat": "time_series", "select": [ [ @@ -343,124 +479,14 @@ "value": "/^$instance$/" } ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Rows", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 20, - "y": 0 - }, - "height": "55px", - "id": 11, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { + "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -477,15 +503,16 @@ } ], "intervalFactor": 2, + "legendFormat": "deleted", "measurement": "postgresql", "policy": "default", - "refId": "A", + "refId": "E", "resultFormat": "time_series", "select": [ [ { "params": [ - "xact_commit" + "tup_fetched" ], "type": "field" }, @@ -501,7 +528,7 @@ } ] ], - "step": 1800, + "step": 120, "tags": [ { "key": "instance", @@ -511,80 +538,100 @@ ] } ], - "thresholds": "", - "title": "QPS", - "transparent": true, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Rows", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 1, - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, - "hiddenSeries": false, "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_alloc{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_alloc_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -635,8 +682,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -687,8 +737,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -739,8 +792,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_clean{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_clean_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -791,8 +847,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_checkpoint{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_checkpoint_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -842,104 +901,113 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Buffers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "deadlocks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, - "hiddenSeries": false, "id": 3, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "conflicts", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -959,6 +1027,7 @@ "legendFormat": "deadlocks", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "A", "resultFormat": "time_series", "select": [ @@ -990,8 +1059,12 @@ }, { "alias": "deadlocks", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -1011,6 +1084,7 @@ "legendFormat": "conflicts", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "B", "resultFormat": "time_series", "select": [ @@ -1041,204 +1115,187 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Conflicts/Deadlocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, - "hiddenSeries": false, "id": 12, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": true, - "pluginVersion": "7.2.1", - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "expr": "sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "round(sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))*100,0.001)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{datname}} - cache hit rate", "refId": "A", "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cache hit ratio", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, - "hiddenSeries": false, "id": 13, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "pg_stat_database_numbackends{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, @@ -1247,51 +1304,13 @@ "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Number of active connections", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 26, + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [ "postgres" @@ -1310,66 +1329,96 @@ "regex": "", "skipUrlSync": false, "type": "datasource" - }, + }, { "allValue": ".+", - "datasource": "$datasource", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, "definition": "label_values(pg_up, job)", "hide": 0, "includeAll": true, - "label": "job", + "label": "Job", "multi": true, "name": "job", "options": [], - "query": "label_values(pg_up, job)", - "refresh": 0, + "query": { + "query": "label_values(pg_up, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_up{job=~\"$job\"},instance)", "hide": 0, "includeAll": true, - "label": "instance", + "label": "Instance", "multi": true, "name": "instance", "options": [], - "query": "label_values(up{job=~\"$job\"},instance)", - "refresh": 1, + "query": { + "query": "label_values(pg_up{job=~\"$job\"},instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", "hide": 0, "includeAll": true, - "label": "db", + "label": "Database", "multi": false, "name": "db", "options": [], - "query": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", - "refresh": 1, + "query": { + "query": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1408,5 +1457,6 @@ "timezone": "browser", "title": "Postgres Overview", "uid": "wGgaPlciz", - "version": 5 -} + "version": 39, + "weekStart": "" +} \ No newline at end of file diff --git a/queries.yaml b/queries.yaml index 189ce0866..9e9ee167c 100644 --- a/queries.yaml +++ b/queries.yaml @@ -1,2 +1,2 @@ # Adding queries to this file is deprecated -# Example queries have been transformed into collectors. \ No newline at end of file +# Example queries have been transformed into collectors.