Set up comprehensive monitoring for Elasticsearch 8 using Prometheus metrics collection and Grafana visualization. This tutorial covers exporter installation, metric configuration, dashboard setup, and alerting rules for production environments.
Prerequisites
- Elasticsearch 8 installed and running
- Prometheus server configured
- Grafana installed with admin access
- Root or sudo access
What this solves
Elasticsearch monitoring is critical for production deployments to track cluster health, performance metrics, and resource utilization. This tutorial integrates Elasticsearch 8 with Prometheus for metrics collection and Grafana for visualization, providing real-time monitoring and alerting capabilities for your search infrastructure.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you have the latest package information.
sudo apt update && sudo apt upgrade -y
Install Elasticsearch exporter
Download and install the Prometheus Elasticsearch exporter to collect metrics from your cluster.
cd /tmp
wget https://github.com/prometheus-community/elasticsearch_exporter/releases/download/v1.7.0/elasticsearch_exporter-1.7.0.linux-amd64.tar.gz
tar -xzf elasticsearch_exporter-1.7.0.linux-amd64.tar.gz
sudo mv elasticsearch_exporter-1.7.0.linux-amd64/elasticsearch_exporter /usr/local/bin/
sudo chmod +x /usr/local/bin/elasticsearch_exporter
Create exporter user and directories
Create a dedicated system user for the exporter service with minimal privileges.
sudo useradd --no-create-home --shell /bin/false elasticsearch_exporter
sudo mkdir -p /var/lib/elasticsearch_exporter
sudo chown elasticsearch_exporter:elasticsearch_exporter /var/lib/elasticsearch_exporter
Configure Elasticsearch exporter
Create the configuration file to specify Elasticsearch connection details and export settings.
elasticsearch:
uri: "http://localhost:9200"
username: ""
password: ""
timeout: 30s
ssl_skip_verify: false
log:
level: info
format: json
metrics:
cluster_info: true
cluster_stats: true
indices_stats: true
shards_stats: true
snapshots_stats: false
Create systemd service file
Configure the Elasticsearch exporter as a systemd service for automatic startup and management.
[Unit]
Description=Elasticsearch Exporter
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=elasticsearch_exporter
Group=elasticsearch_exporter
ExecStart=/usr/local/bin/elasticsearch_exporter --config.file=/etc/elasticsearch_exporter/config.yml --web.listen-address=:9114
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
Set proper permissions and start exporter
Apply correct ownership and permissions to configuration files, then enable the service.
sudo mkdir -p /etc/elasticsearch_exporter
sudo chown -R elasticsearch_exporter:elasticsearch_exporter /etc/elasticsearch_exporter
sudo chmod 755 /etc/elasticsearch_exporter
sudo chmod 644 /etc/elasticsearch_exporter/config.yml
sudo systemctl daemon-reload
sudo systemctl enable --now elasticsearch_exporter
Configure Prometheus scraping
Add the Elasticsearch exporter as a scraping target in your Prometheus configuration.
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "elasticsearch_rules.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'elasticsearch'
static_configs:
- targets: ['localhost:9114']
scrape_interval: 30s
metrics_path: /metrics
params:
timeout: ['30s']
Create Elasticsearch alerting rules
Define Prometheus alerting rules to monitor critical Elasticsearch metrics and trigger notifications.
groups:
- name: elasticsearch
rules:
- alert: ElasticsearchClusterRed
expr: elasticsearch_cluster_health_status{color="red"} == 1
for: 5m
labels:
severity: critical
annotations:
summary: "Elasticsearch cluster status is RED"
description: "Cluster {{ $labels.cluster }} health is RED"
- alert: ElasticsearchClusterYellow
expr: elasticsearch_cluster_health_status{color="yellow"} == 1
for: 10m
labels:
severity: warning
annotations:
summary: "Elasticsearch cluster status is YELLOW"
description: "Cluster {{ $labels.cluster }} health is YELLOW for more than 10 minutes"
- alert: ElasticsearchHighJVMMemory
expr: elasticsearch_jvm_memory_used_bytes / elasticsearch_jvm_memory_max_bytes > 0.85
for: 5m
labels:
severity: warning
annotations:
summary: "Elasticsearch JVM memory usage is high"
description: "JVM memory usage is above 85% on node {{ $labels.name }}"
- alert: ElasticsearchDiskSpaceLow
expr: elasticsearch_filesystem_data_available_bytes / elasticsearch_filesystem_data_size_bytes < 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "Elasticsearch disk space is low"
description: "Less than 10% disk space available on node {{ $labels.name }}"
- alert: ElasticsearchNodeDown
expr: up{job="elasticsearch"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Elasticsearch exporter is down"
description: "Elasticsearch exporter has been down for more than 2 minutes"
Restart Prometheus
Reload Prometheus configuration to apply the new scraping targets and alerting rules.
sudo systemctl restart prometheus
sudo systemctl status prometheus
Import Grafana dashboard
Add a comprehensive Elasticsearch dashboard to Grafana for visualizing cluster metrics and performance data.
curl -X POST \
http://admin:admin@localhost:3000/api/dashboards/db \
-H 'Content-Type: application/json' \
-d '{
"dashboard": {
"id": null,
"title": "Elasticsearch Cluster Monitoring",
"tags": ["elasticsearch", "monitoring"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Cluster Health Status",
"type": "stat",
"targets": [
{
"expr": "elasticsearch_cluster_health_status",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"mappings": [
{
"options": {
"0": {
"text": "GREEN",
"color": "green"
},
"1": {
"text": "YELLOW",
"color": "yellow"
},
"2": {
"text": "RED",
"color": "red"
}
},
"type": "value"
}
]
}
}
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "30s"
}
}'
Configure Grafana data source
Add Prometheus as a data source in Grafana to enable dashboard functionality.
curl -X POST \
http://admin:admin@localhost:3000/api/datasources \
-H 'Content-Type: application/json' \
-d '{
"name": "Prometheus",
"type": "prometheus",
"url": "http://localhost:9090",
"access": "proxy",
"isDefault": true
}'
Configure authentication for secure environments
Set up Elasticsearch authentication
If your Elasticsearch cluster uses authentication, update the exporter configuration with credentials.
elasticsearch:
uri: "https://localhost:9200"
username: "monitoring_user"
password: "secure_password"
timeout: 30s
ssl_skip_verify: false
ca_cert: "/etc/elasticsearch/certs/ca.crt"
client_cert: "/etc/elasticsearch/certs/client.crt"
client_key: "/etc/elasticsearch/certs/client.key"
Create monitoring user in Elasticsearch
Create a dedicated user with minimal privileges for metrics collection.
curl -X POST "localhost:9200/_security/user/monitoring_user" \
-H 'Content-Type: application/json' \
-d '{
"password" : "secure_password",
"roles" : [ "monitoring_user" ],
"full_name" : "Monitoring User",
"email" : "monitoring@example.com"
}'
Advanced dashboard configuration
Create comprehensive monitoring panels
Set up detailed Grafana panels for monitoring various Elasticsearch metrics including performance and resource utilization.
# Cluster Nodes Count
sum(elasticsearch_cluster_health_number_of_nodes)
Index Operations Rate
rate(elasticsearch_indices_indexing_index_total[5m])
Search Operations Rate
rate(elasticsearch_indices_search_query_total[5m])
JVM Memory Usage
elasticsearch_jvm_memory_used_bytes{area="heap"} / elasticsearch_jvm_memory_max_bytes{area="heap"} * 100
Disk Usage per Node
(elasticsearch_filesystem_data_size_bytes - elasticsearch_filesystem_data_available_bytes) / elasticsearch_filesystem_data_size_bytes * 100
Thread Pool Queue Size
elasticsearch_thread_pool_queue_count
GC Collection Time
rate(elasticsearch_jvm_gc_collection_seconds_sum[5m])
Verify your setup
Check that all components are running correctly and collecting metrics.
# Check Elasticsearch exporter status
sudo systemctl status elasticsearch_exporter
Verify metrics are being collected
curl http://localhost:9114/metrics | grep elasticsearch_cluster_health
Check Prometheus targets
curl http://localhost:9090/api/v1/targets | jq '.data.activeTargets[] | select(.job == "elasticsearch")'
Test Elasticsearch connectivity
curl -X GET "localhost:9200/_cluster/health?pretty"
Verify alerting rules are loaded
curl http://localhost:9090/api/v1/rules | jq '.data.groups[] | select(.name == "elasticsearch")'
Check Grafana data source
curl -u admin:admin http://localhost:3000/api/datasources
Performance optimization
| Metric Type | Recommended Interval | Resource Impact |
|---|---|---|
| Cluster health | 15-30 seconds | Low |
| Node statistics | 30-60 seconds | Medium |
| Index statistics | 60-300 seconds | High |
| Shard statistics | 300-600 seconds | Very High |
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Exporter fails to start | Invalid configuration file | Check YAML syntax with yamllint /etc/elasticsearch_exporter/config.yml |
| Connection refused errors | Elasticsearch not accessible | Verify Elasticsearch is running on specified URI |
| Authentication failures | Invalid credentials or permissions | Check username/password and user roles in Elasticsearch |
| No metrics in Prometheus | Scraping configuration incorrect | Verify target configuration in prometheus.yml |
| SSL certificate errors | Invalid or expired certificates | Update certificate paths or disable SSL verification for testing |
| High memory usage | Too many metrics being collected | Disable unnecessary metrics in exporter configuration |
Next steps
- Set up centralized log aggregation with ELK Stack for comprehensive logging alongside metrics
- Configure SSL/TLS encryption for Elasticsearch to secure your monitoring setup
- Set up Thanos for long-term metrics storage to retain historical Elasticsearch performance data
- Implement automated Elasticsearch backup strategies for disaster recovery
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Default configuration
ELASTICSEARCH_URL="${1:-http://localhost:9200}"
EXPORTER_VERSION="1.7.0"
EXPORTER_PORT="9114"
# Usage message
usage() {
echo "Usage: $0 [elasticsearch_url]"
echo "Example: $0 http://localhost:9200"
exit 1
}
# Logging functions
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Cleanup function
cleanup() {
log_error "Installation failed. Cleaning up..."
systemctl stop elasticsearch_exporter 2>/dev/null || true
systemctl disable elasticsearch_exporter 2>/dev/null || true
userdel elasticsearch_exporter 2>/dev/null || true
rm -rf /etc/elasticsearch_exporter /var/lib/elasticsearch_exporter
rm -f /usr/local/bin/elasticsearch_exporter /etc/systemd/system/elasticsearch_exporter.service
exit 1
}
trap cleanup ERR
# Check prerequisites
check_prerequisites() {
echo "[1/10] Checking prerequisites..."
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root"
exit 1
fi
if ! command -v wget &> /dev/null; then
log_error "wget is required but not installed"
exit 1
fi
if ! command -v tar &> /dev/null; then
log_error "tar is required but not installed"
exit 1
fi
log_info "Prerequisites check passed"
}
# Detect distribution
detect_distro() {
echo "[2/10] Detecting distribution..."
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update && apt upgrade -y"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
;;
*)
log_error "Unsupported distribution: $ID"
exit 1
;;
esac
log_info "Detected distribution: $PRETTY_NAME"
else
log_error "Cannot detect distribution"
exit 1
fi
}
# Update system packages
update_system() {
echo "[3/10] Updating system packages..."
eval $PKG_UPDATE
log_info "System packages updated"
}
# Download and install Elasticsearch exporter
install_exporter() {
echo "[4/10] Installing Elasticsearch exporter..."
cd /tmp
EXPORTER_TARBALL="elasticsearch_exporter-${EXPORTER_VERSION}.linux-amd64.tar.gz"
DOWNLOAD_URL="https://github.com/prometheus-community/elasticsearch_exporter/releases/download/v${EXPORTER_VERSION}/${EXPORTER_TARBALL}"
wget -q "$DOWNLOAD_URL"
tar -xzf "$EXPORTER_TARBALL"
mv "elasticsearch_exporter-${EXPORTER_VERSION}.linux-amd64/elasticsearch_exporter" /usr/local/bin/
chmod 755 /usr/local/bin/elasticsearch_exporter
# Cleanup temporary files
rm -rf /tmp/elasticsearch_exporter-*
log_info "Elasticsearch exporter installed"
}
# Create exporter user and directories
create_user_and_dirs() {
echo "[5/10] Creating exporter user and directories..."
useradd --no-create-home --shell /bin/false --system elasticsearch_exporter 2>/dev/null || true
mkdir -p /var/lib/elasticsearch_exporter
mkdir -p /etc/elasticsearch_exporter
chown elasticsearch_exporter:elasticsearch_exporter /var/lib/elasticsearch_exporter
chown elasticsearch_exporter:elasticsearch_exporter /etc/elasticsearch_exporter
chmod 755 /var/lib/elasticsearch_exporter
chmod 755 /etc/elasticsearch_exporter
log_info "User and directories created"
}
# Create configuration file
create_config() {
echo "[6/10] Creating configuration file..."
cat > /etc/elasticsearch_exporter/config.yml << EOF
elasticsearch:
uri: "${ELASTICSEARCH_URL}"
username: ""
password: ""
timeout: 30s
ssl_skip_verify: false
log:
level: info
format: json
metrics:
cluster_info: true
cluster_stats: true
indices_stats: true
shards_stats: true
snapshots_stats: false
EOF
chown elasticsearch_exporter:elasticsearch_exporter /etc/elasticsearch_exporter/config.yml
chmod 644 /etc/elasticsearch_exporter/config.yml
log_info "Configuration file created"
}
# Create systemd service
create_systemd_service() {
echo "[7/10] Creating systemd service..."
cat > /etc/systemd/system/elasticsearch_exporter.service << EOF
[Unit]
Description=Elasticsearch Exporter
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=elasticsearch_exporter
Group=elasticsearch_exporter
ExecStart=/usr/local/bin/elasticsearch_exporter --config.file=/etc/elasticsearch_exporter/config.yml --web.listen-address=:${EXPORTER_PORT}
Restart=always
RestartSec=5
NoNewPrivileges=yes
PrivateTmp=yes
[Install]
WantedBy=multi-user.target
EOF
chmod 644 /etc/systemd/system/elasticsearch_exporter.service
systemctl daemon-reload
log_info "Systemd service created"
}
# Configure firewall
configure_firewall() {
echo "[8/10] Configuring firewall..."
case "$PKG_MGR" in
apt)
if command -v ufw &> /dev/null && ufw status | grep -q "Status: active"; then
ufw allow ${EXPORTER_PORT}/tcp
log_info "UFW firewall rule added"
fi
;;
dnf|yum)
if systemctl is-active --quiet firewalld; then
firewall-cmd --permanent --add-port=${EXPORTER_PORT}/tcp
firewall-cmd --reload
log_info "Firewalld rule added"
fi
;;
esac
}
# Start and enable service
start_service() {
echo "[9/10] Starting and enabling service..."
systemctl enable elasticsearch_exporter
systemctl start elasticsearch_exporter
# Wait a moment for service to start
sleep 3
log_info "Service started and enabled"
}
# Verify installation
verify_installation() {
echo "[10/10] Verifying installation..."
if ! systemctl is-active --quiet elasticsearch_exporter; then
log_error "Elasticsearch exporter service is not running"
journalctl -u elasticsearch_exporter --no-pager -l
exit 1
fi
if ! curl -s localhost:${EXPORTER_PORT}/metrics > /dev/null; then
log_error "Elasticsearch exporter metrics endpoint is not responding"
exit 1
fi
log_info "Installation verification passed"
}
# Main execution
main() {
log_info "Starting Elasticsearch exporter installation..."
check_prerequisites
detect_distro
update_system
install_exporter
create_user_and_dirs
create_config
create_systemd_service
configure_firewall
start_service
verify_installation
echo ""
log_info "=== Installation Complete ==="
log_info "Elasticsearch exporter is running on port ${EXPORTER_PORT}"
log_info "Metrics URL: http://localhost:${EXPORTER_PORT}/metrics"
log_info "Configuration: /etc/elasticsearch_exporter/config.yml"
log_info "Service status: systemctl status elasticsearch_exporter"
echo ""
log_warn "Next steps:"
echo "1. Configure Prometheus to scrape http://localhost:${EXPORTER_PORT}/metrics"
echo "2. Import Elasticsearch dashboards in Grafana"
echo "3. Set up alerting rules for cluster health monitoring"
}
# Trap to remove cleanup on successful completion
trap - ERR
main "$@"
Review the script before running. Execute with: bash install.sh