kube-cockroachdb
Overview
Jsonnet source code is available at github.com/metalmatze/kube-cockroachdb
Alerts
Complete list of pregenerated alerts is available here.
cockroachdb
CockroachInstanceFlapping
alert: CockroachInstanceFlapping
annotations:
description: '{{ $labels.instance }} for cluster {{ $labels.cluster }} restarted
{{ $value }} time(s) in 10m.'
summary: CockroachDB instances have restarted in the last 10 minutes.
expr: |
resets(cockroachdb_sys_uptime{job="cockroachdb-public"}[10m]) > 5
for: 1m
labels:
severity: warning
CockroachLivenessMismatch
alert: CockroachLivenessMismatch
annotations:
description: Liveness mismatch for {{ $labels.instance }}
summary: CockroachDB has liveness mismatches.
expr: |
(cockroachdb_liveness_livenodes{job="cockroachdb-public"})
!=
ignoring(instance) group_left() (count by(cluster, job) (up{job="cockroachdb-public"} == 1))
for: 5m
labels:
severity: warning
CockroachVersionMismatch
alert: CockroachVersionMismatch
annotations:
description: Cluster {{ $labels.cluster }} running {{ $value }} different versions
summary: CockroachDB cluster is running different versions.
expr: |
count by(cluster) (count_values by(tag, cluster) ("version", cockroachdb_build_timestamp{job="cockroachdb-public"})) > 1
for: 1h
labels:
severity: warning
CockroachStoreDiskLow
alert: CockroachStoreDiskLow
annotations:
description: Store {{ $labels.store }} on node {{ $labels.instance }} at {{ $value
}} available disk fraction
summary: CockroachDB is at low disk capacity.
expr: |
:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.15
for: 30m
labels:
severity: critical
CockroachClusterDiskLow
alert: CockroachClusterDiskLow
annotations:
description: Cluster {{ $labels.cluster }} at {{ $value }} available disk fraction
summary: CockroachDB cluster is at critically low disk capacity.
expr: |
cluster:cockroachdb_capacity_available:ratio{job="cockroachdb-public"} < 0.2
for: 30m
labels:
severity: critical
CockroachUnavailableRanges
alert: CockroachUnavailableRanges
annotations:
description: Instance {{ $labels.instance }} has {{ $value }} unavailable ranges
summary: CockroachDB has unavailable ranges.
expr: |
(sum by(instance, cluster) (cockroachdb_ranges_unavailable{job="cockroachdb-public"})) > 0
for: 10m
labels:
severity: critical
CockroachNoLeaseRanges
alert: CockroachNoLeaseRanges
annotations:
description: Instance {{ $labels.instance }} has {{ $value }} ranges without leases
summary: CockroachDB has ranges without leases.
expr: |
(sum by(instance, cluster) (cockroachdb_replicas_leaders_not_leaseholders{job="cockroachdb-public"})) > 0
for: 10m
labels:
severity: warning
CockroachHighOpenFDCount
alert: CockroachHighOpenFDCount
annotations:
description: 'Too many open file descriptors on {{ $labels.instance }}: {{ $value
}} fraction used'
summary: CockroachDB has too many open file descriptors.
expr: |
cockroachdb_sys_fd_open{job="cockroachdb-public"} / cockroachdb_sys_fd_softlimit{job="cockroachdb-public"} > 0.8
for: 10m
labels:
severity: warning
Recording rules
Complete list of pregenerated recording rules is available here.
cockroachdb.rules
node:cockroachdb_capacity:sum
expr: |
sum without(store) (cockroachdb_capacity{job="cockroachdb-public"})
record: node:cockroachdb_capacity:sum
cluster:cockroachdb_capacity:sum
expr: |
sum without(instance) (node:cockroachdb_capacity:sum{job="cockroachdb-public"})
record: cluster:cockroachdb_capacity:sum
node:cockroachdb_capacity_available:sum
expr: |
sum without(store) (cockroachdb_capacity_available{job="cockroachdb-public"})
record: node:cockroachdb_capacity_available:sum
cluster:cockroachdb_capacity_available:sum
expr: |
sum without(instance) (node:cockroachdb_capacity_available:sum{job="cockroachdb-public"})
record: cluster:cockroachdb_capacity_available:sum
:cockroachdb_capacity_available:ratio
expr: |
cockroachdb_capacity_available{job="cockroachdb-public"} / cockroachdb_capacity{job="cockroachdb-public"}
record: :cockroachdb_capacity_available:ratio
node:cockroachdb_capacity_available:ratio
expr: |
node:cockroachdb_capacity_available:sum{job="cockroachdb-public"} / node:cockroachdb_capacity:sum{job="cockroachdb-public"}
record: node:cockroachdb_capacity_available:ratio
cluster:cockroachdb_capacity_available:ratio
expr: |
cluster:cockroachdb_capacity_available:sum{job="cockroachdb-public"} / cluster:cockroachdb_capacity:sum{job="cockroachdb-public"}
record: cluster:cockroachdb_capacity_available:ratio