ceph-telemetry/db_create_cluster.sql at master · ceph/ceph-telemetry · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
-- In case we need to drop the schema, make sure to use CASCADE.
-- DROP SCHEMA IF EXISTS grafana CASCADE;

/*
  We don't use 'IF NOT EXISTS' in CREATE SCHEMA
  so that an error will be thrown in case the schema
  does exist.
*/
CREATE SCHEMA grafana;

GRANT usage ON SCHEMA grafana TO grafana;
GRANT ALL ON ALL TABLES IN SCHEMA grafana TO grafana;
GRANT ALL ON ALL SEQUENCES IN SCHEMA grafana TO grafana;


CREATE TABLE grafana.ts_cluster (
    report_id           INTEGER REFERENCES public.report(id) PRIMARY KEY,
    ts                  TIMESTAMP,
    cluster_id          VARCHAR(50),
    created             TIMESTAMP, /* cluster creation date */
    channel_basic       BOOLEAN,
    channel_crash       BOOLEAN,
    channel_device      BOOLEAN,
    channel_ident       BOOLEAN,
    total_bytes         BIGINT,
    total_used_bytes    BIGINT,
    osd_count           INTEGER,
    mon_count           INTEGER,
    ipv4_addr_mons      INTEGER,
    ipv6_addr_mons      INTEGER,
    v1_addr_mons        INTEGER,
    v2_addr_mons        INTEGER,
    rbd_num_pools       INTEGER,
    fs_count            INTEGER,
    hosts_num           INTEGER,
    pools_num           INTEGER,
    pg_num              INTEGER
);

CREATE INDEX ON grafana.ts_cluster (ts);

/*
  The json report's metadata section:

    "metadata": {
      "osd": {
        "cpu": {
          "Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz": 90
        },
      }
    }

  translates to:

    INSERT INTO metadata (report_id, ts, entity, attr, value, total)
      VALUES ($report_id, $ts, "osd", "cpu", "Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz", 90);

  example query - number of different ceph versions per cluster:

    SELECT COUNT(DISTINCT(value)) FROM metadata WHERE report_id=$report_id AND attr='ceph_version';
*/

CREATE TABLE grafana.metadata (
    report_id       INTEGER REFERENCES public.report(id) ON DELETE CASCADE,
    ts              TIMESTAMP,
    entity          VARCHAR(16),
    attr            VARCHAR(32),
    value           VARCHAR(128),
    total           INTEGER
);

CREATE TABLE grafana.rbd_pool (
    report_id       INTEGER REFERENCES public.report(id) ON DELETE CASCADE,
    ts              TIMESTAMP,
    pool_idx        INTEGER, /* needs to derive this from the element position in the array */
    num_images      INTEGER,
    mirroring       BOOLEAN
);

CREATE TABLE grafana.pool (
    report_id                INTEGER REFERENCES public.report(id) ON DELETE CASCADE,
    ts                       TIMESTAMP,
    pool_idx                 INTEGER,
    pgp_num                  INTEGER,
    pg_num                   INTEGER,
    size                     BIGINT,
    min_size                 BIGINT,
    cache_mode               VARCHAR(32),
    target_max_objects       BIGINT,
    target_max_bytes         BIGINT,
    pg_autoscale_mode        VARCHAR(32),
    type                     VARCHAR(32),

    /* erasure_code_profile */
    ec_k                    SMALLINT,
    ec_m                    SMALLINT,
    ec_crush_failure_domain VARCHAR(32),
    ec_plugin               VARCHAR(32),
    ec_technique            VARCHAR(32)
);

-- maps major versions ("14", "15") to their name ("Nautilus", "Octopus").
-- 'version' is of type VARCHAR because there is version "Dev".
CREATE TABLE grafana.version_to_name (
    version     VARCHAR(32),
    name        VARCHAR(32)
);

INSERT INTO grafana.version_to_name(version, name)
    VALUES
        ('12', 'Luminous'),
        ('13', 'Mimic'),
        ('14', 'Nautilus'),
        ('15', 'Octopus'),
        ('16', 'Pacific'),
        ('17', 'Quincy'),
        ('18', 'Reef'),
        ('Dev', NULL);

-- When grafana draws a graph with a resolution of a day, it expects the DB
-- query to return a data point per day. A cluster may report less frequently
-- than once a day, thus the DB query will return a spike on the day that
-- the cluster reports, and a drop on days it doesn't.
-- To normalize this, we create a materialized view that for each day holds
-- a list of all reports that occurred on the previous week - only the most
-- recent report for each cluster within that previous week.
-- We use a materialized view because it's much faster than a regular view.
-- We update the materialized view in import_clusters.py.
CREATE MATERIALIZED VIEW grafana.weekly_reports_sliding AS
    SELECT
        DISTINCT ON(daily_window, cluster_id)
        daily_window,
        report_id
        /*
        GENERATE_SERIES generates a table with a single column 'daily_window'
        which holds all the days (a row per day) between the first report
        and today. Day format is 'YYYY-MM-DD 00:00:00'.
        */
    FROM
        grafana.ts_cluster c,
        GENERATE_SERIES('2019-03-01', now()::date, interval '1' day) daily_window
    WHERE
        c.ts BETWEEN daily_window - interval '7' day AND daily_window + interval '1' day
    -- Include only clusters that reported more than once
    AND (c.cluster_id IN (
            SELECT _c.cluster_id
            FROM grafana.ts_cluster _c
            GROUP BY _c.cluster_id
            HAVING count(*) > 1)
        )
    ORDER BY
        daily_window,
        cluster_id,
        c.ts DESC;

CREATE INDEX ON grafana.weekly_reports_sliding (daily_window);

ALTER MATERIALIZED VIEW grafana.weekly_reports_sliding OWNER TO grafana;