apache-cassandra
Overview
Jsonnet source code is available at github.com/grafana/jsonnet-libs
Alerts
Complete list of pregenerated alerts is available here.
ApacheCassandraAlerts
HighReadLatency
alert: HighReadLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of read latency has occurred
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 200ms. '
summary: There is a high level of read latency within the node.
expr: |
sum(cassandra_table_readlatency_seconds_sum) by (instance) / sum(cassandra_table_readlatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
HighWriteLatency
alert: HighWriteLatency
annotations:
description: 'An average of {{ printf "%.0f" $value }}ms of write latency has occurred
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 200ms. '
summary: There is a high level of write latency within the node.
expr: |
sum(cassandra_keyspace_writelatency_seconds_sum) by (instance) / sum(cassandra_keyspace_writelatency_seconds_count) by (instance) * 1000 > 200
for: 5m
labels:
severity: critical
HighPendingCompactionTasks
alert: HighPendingCompactionTasks
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been pending over
the last 15 minutes on {{$labels.instance}}, which is above the threshold of 30. '
summary: Compaction task queue is filling up.
expr: |
cassandra_compaction_pendingtasks > 30
for: 15m
labels:
severity: warning
BlockedCompactionTasksFound
alert: BlockedCompactionTasksFound
annotations:
description: '{{ printf "%.0f" $value }} compaction tasks have been blocked over
the last 5 minutes on {{$labels.instance}}, which is above the threshold of 1. '
summary: Compaction task queue is full.
expr: |
cassandra_threadpools_currentlyblockedtasks_count{threadpools="CompactionExecutor", path="internal"} > 1
for: 5m
labels:
severity: critical
HintsStoredOnNode
alert: HintsStoredOnNode
annotations:
description: '{{ printf "%.0f" $value }} hints have been written to the node over
the last minute on {{$labels.instance}}, which is above the threshold of 1. '
summary: Hints have been recently written to this node.
expr: |
increase(cassandra_storage_totalhints_count[5m]) > 1
for: 1m
labels:
severity: warning
UnavailableWriteRequestsFound
alert: UnavailableWriteRequestsFound
annotations:
description: '{{ printf "%.0f" $value }} unavailable write requests have been found
over the last 5 minutes on {{$labels.instance}}, which is above the threshold
of 1. '
summary: Unavailable exceptions have been encountered while performing writes in
this cluster.
expr: |
sum(cassandra_clientrequest_unavailables_count{clientrequest="Write"}) by (cassandra_cluster) > 1
for: 5m
labels:
severity: critical
HighCpuUsage
alert: HighCpuUsage
annotations:
description: 'Cpu usage is at {{ printf "%.0f" $value }} percent over the last 5
minutes on {{$labels.instance}}, which is above the threshold of 80. '
summary: A node has a CPU usage higher than the configured threshold.
expr: |
jvm_process_cpu_load{job=~"integrations/apache-cassandra"} * 100 > 80
for: 5m
labels:
severity: critical
HighMemoryUsage
alert: HighMemoryUsage
annotations:
description: 'Memory usage is at {{ printf "%.0f" $value }} percent over the last
5 minutes on {{$labels.instance}}, which is above the threshold of 80 }}. '
summary: A node has a higher memory utilization than the configured threshold.
expr: |
sum(jvm_memory_usage_used_bytes{job=~"integrations/apache-cassandra", area="Heap"}) / sum(jvm_physical_memory_size{job=~"integrations/apache-cassandra"}) * 100 > 80
for: 5m
labels:
severity: critical
Dashboards
Following dashboards are generated from mixins and hosted on github: