-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathqueries.yaml
339 lines (325 loc) · 14 KB
/
queries.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
pg_is_in:
query: "SELECT CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 END AS recovery"
master: true
metrics:
- recovery:
usage: "GAUGE"
description: "0 for master, 1 for slaves"
pg_xlog:
query: "SELECT CASE WHEN pg_is_in_recovery() THEN pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '0/0') ELSE pg_wal_lsn_diff(pg_current_wal_flush_lsn(), '0/0') END AS position"
master: true
metrics:
- position:
usage: "COUNTER"
description: "Position in the WAL"
pg_replication:
query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN -1 WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()) * 1000 END AS replag"
master: true
metrics:
- replag:
usage: "GAUGE"
description: "Replication lag behind master in milliseconds"
pg_replication_replay:
query: "SELECT client_addr, coalesce(pg_wal_lsn_diff(pg_current_wal_flush_lsn(), replay_lsn), 0) AS lag FROM pg_stat_replication WHERE state <> 'backup'"
master: true
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in bytes"
- client_addr:
usage: "LABEL"
description: "Address of client"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
master: true
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
pg_wals:
query: |
WITH ls AS (
SELECT './pg_wal/' || pg_ls_dir AS filename, (pg_stat_file('./pg_wal/' || pg_ls_dir)).* FROM pg_ls_dir('./pg_wal/')
)
SELECT count(*) AS count, sum(size) AS size FROM ls WHERE NOT isdir;
master: true
metrics:
- count:
usage: "GAUGE"
description: "Number of wal segments"
- size:
usage: "GAUGE"
description: "Disk usage of wal segments"
pg_archive_wals:
query: |
WITH ls AS (
select './pg_wal/archive_status/' || pg_ls_dir AS filename, (pg_stat_file('./pg_wal/archive_status/' || pg_ls_dir)).* from pg_ls_dir('./pg_wal/archive_status/')
)
SELECT (SELECT count(*) FROM ls WHERE filename LIKE '%.done') AS done, (SELECT count(*) FROM ls WHERE filename LIKE '%.ready') AS ready;
master: true
metrics:
- done:
usage: "GAUGE"
description: "Number of wal segments archived"
- ready:
usage: "GAUGE"
description: "Number of wal segments ready for archival"
pg_locksage:
query: |
SELECT datname, relation::regclass AS relname, pl.pid, mode, regexp_replace(substring(query FOR 100), '[ \t\n]+', ' ', 'g') AS query, EXTRACT (EPOCH FROM xact_start) AS start
FROM pg_locks pl
LEFT JOIN pg_stat_activity psa ON pl.pid = psa.pid
WHERE datname = current_database()
AND granted = true
AND relation IS NOT NULL
AND mode != 'AccessShareLock'
metrics:
- start:
usage: "GAUGE"
description: "Start time of lock"
- datname:
usage: "LABEL"
description: "Name of database"
- relname:
usage: "LABEL"
description: "Name of the table"
- pid:
usage: "LABEL"
description: "Process Identifier"
- mode:
usage: "LABEL"
description: "Lock mode"
- query:
usage: "LABEL"
description: "SQL Query"
pg_stat_user_tables:
query: "SELECT current_database() datname, schemaname, relname, COALESCE(seq_scan, 0) AS seq_scan, COALESCE(seq_tup_read, 0) AS seq_tup_read, COALESCE(idx_scan, 0) AS idx_scan, COALESCE(idx_tup_fetch, 0) AS idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z'), COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- seq_scan:
usage: "COUNTER"
description: "Number of sequential scans initiated on this table"
- seq_tup_read:
usage: "COUNTER"
description: "Number of live rows fetched by sequential scans"
- idx_scan:
usage: "COUNTER"
description: "Number of index scans initiated on this table"
- idx_tup_fetch:
usage: "COUNTER"
description: "Number of live rows fetched by index scans"
- n_tup_ins:
usage: "COUNTER"
description: "Number of rows inserted"
- n_tup_upd:
usage: "COUNTER"
description: "Number of rows updated"
- n_tup_del:
usage: "COUNTER"
description: "Number of rows deleted"
- n_tup_hot_upd:
usage: "COUNTER"
description: "Number of rows HOT updated (i.e., with no separate index update required)"
- n_live_tup:
usage: "GAUGE"
description: "Estimated number of live rows"
- n_dead_tup:
usage: "GAUGE"
description: "Estimated number of dead rows"
- n_mod_since_analyze:
usage: "GAUGE"
description: "Estimated number of rows changed since last analyze"
- last_vacuum:
usage: "GAUGE"
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
- last_autovacuum:
usage: "GAUGE"
description: "Last time at which this table was vacuumed by the autovacuum daemon"
- last_analyze:
usage: "GAUGE"
description: "Last time at which this table was manually analyzed"
- last_autoanalyze:
usage: "GAUGE"
description: "Last time at which this table was analyzed by the autovacuum daemon"
- vacuum_count:
usage: "COUNTER"
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
- autovacuum_count:
usage: "COUNTER"
description: "Number of times this table has been vacuumed by the autovacuum daemon"
- analyze_count:
usage: "COUNTER"
description: "Number of times this table has been manually analyzed"
- autoanalyze_count:
usage: "COUNTER"
description: "Number of times this table has been analyzed by the autovacuum daemon"
pg_statio_user_tables:
query: "SELECT current_database() datname, schemaname, relname, COALESCE(heap_blks_read, 0) AS heap_blks_read, COALESCE(heap_blks_hit, 0) AS heap_blks_hit, COALESCE(idx_blks_read, 0) AS idx_blks_read, COALESCE(idx_blks_hit, 0) AS idx_blks_hit, COALESCE(toast_blks_read, 0) AS toast_blks_read, COALESCE(toast_blks_hit, 0) AS toast_blks_hit, COALESCE(tidx_blks_read, 0) AS tidx_blks_read, COALESCE(tidx_blks_hit, 0) AS tidx_blks_hit FROM pg_statio_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- heap_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table"
- heap_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table"
- idx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from all indexes on this table"
- idx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in all indexes on this table"
- toast_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table (if any)"
- toast_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table (if any)"
- tidx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
- tidx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table indexes (if any)"
pg_table_size:
query: "SELECT current_database() AS datname, *, total_bytes - index_bytes - toast_bytes AS table_bytes FROM (SELECT c.oid,nspname AS schemaname, relname, c.reltuples AS row_estimate, pg_total_relation_size(c.oid) AS total_bytes, pg_indexes_size(c.oid) AS index_bytes, COALESCE(pg_total_relation_size(reltoastrelid), 0) AS toast_bytes FROM pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace WHERE relkind = 'r') a"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the relation schema"
- relname:
usage: "LABEL"
description: "Name of the relation"
- total_bytes:
usage: "GAUGE"
description: "Total size of relation data"
- table_bytes:
usage: "GAUGE"
description: "Size of relation data not TOASTed"
- index_bytes:
usage: "GAUGE"
description: "Size of relation indexes"
- toast_bytes:
usage: "GAUGE"
description: "Size of TOASTed data"
pg_client_connections:
master: true
query: "SELECT count(datname) as count, datname, usename, application_name as appname, client_addr FROM pg_stat_activity WHERE backend_type = 'client backend' GROUP BY datname, usename, client_addr, application_name"
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- usename:
usage: "LABEL"
description: "Username connected as"
- appname:
usage: "LABEL"
description: "Application Name connection string"
- client_addr:
usage: "LABEL"
description: "Client connection address"
- count:
usage: "GAUGE"
description: "Clients connected"
pg_database:
query: "SELECT datname, pg_database_size(datname) as size_bytes FROM pg_database WHERE datname NOT IN ('template0', 'template1')"
master: true
metrics:
- datname:
usage: "LABEL"
description: "Name of the database"
- size_bytes:
usage: "GAUGE"
description: "Disk space used by the database"
pg_stat_statements:
query: "SELECT t2.rolname, t3.datname, queryid, regexp_replace(substring(query FOR 100), '[ \t\n]+', ' ', 'g') AS shrunk_query_text, calls, (total_exec_time + total_plan_time) / 1000 as total_time_seconds, (min_exec_time + min_plan_time) / 1000 as min_time_seconds, (max_exec_time + max_plan_time) / 1000 as max_time_seconds, (mean_exec_time + mean_plan_time) / 1000 as mean_time_seconds, (stddev_exec_time + stddev_plan_time)/ 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t3.datname = current_database() AND t2.rolname != 'rdsadmin' ORDER BY calls DESC LIMIT 100"
master: true
metrics:
- rolname:
usage: "LABEL"
description: "Name of user"
- datname:
usage: "LABEL"
description: "Name of database"
- queryid:
usage: "LABEL"
description: "Query ID"
- shrunk_query_text:
usage: "LABEL"
description: "First 100 characters of the query"
- calls:
usage: "COUNTER"
description: "Number of times executed"
- total_time_seconds:
usage: "COUNTER"
description: "Total time spent in the statement, in milliseconds"
- min_time_seconds:
usage: "GAUGE"
description: "Minimum time spent in the statement, in milliseconds"
- max_time_seconds:
usage: "GAUGE"
description: "Maximum time spent in the statement, in milliseconds"
- mean_time_seconds:
usage: "GAUGE"
description: "Mean time spent in the statement, in milliseconds"
- stddev_time_seconds:
usage: "GAUGE"
description: "Population standard deviation of time spent in the statement, in milliseconds"
- rows:
usage: "COUNTER"
description: "Total number of rows retrieved or affected by the statement"
- shared_blks_hit:
usage: "COUNTER"
description: "Total number of shared block cache hits by the statement"
- shared_blks_read:
usage: "COUNTER"
description: "Total number of shared blocks read by the statement"
- shared_blks_dirtied:
usage: "COUNTER"
description: "Total number of shared blocks dirtied by the statement"
- shared_blks_written:
usage: "COUNTER"
description: "Total number of shared blocks written by the statement"
- local_blks_hit:
usage: "COUNTER"
description: "Total number of local block cache hits by the statement"
- local_blks_read:
usage: "COUNTER"
description: "Total number of local blocks read by the statement"
- local_blks_dirtied:
usage: "COUNTER"
description: "Total number of local blocks dirtied by the statement"
- local_blks_written:
usage: "COUNTER"
description: "Total number of local blocks written by the statement"
- temp_blks_read:
usage: "COUNTER"
description: "Total number of temp blocks read by the statement"
- temp_blks_written:
usage: "COUNTER"
description: "Total number of temp blocks written by the statement"
- blk_read_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
- blk_write_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"