Set up a production-ready InfluxDB Enterprise cluster with automatic data replication, failover mechanisms, and comprehensive monitoring using Grafana dashboards for time-series workloads.
Prerequisites
- 3 or more servers with 4GB+ RAM each
- Root or sudo access on all nodes
- Network connectivity between cluster nodes on ports 8086, 8088, 8091
- Basic understanding of time-series databases
What this solves
InfluxDB clustering provides high availability and horizontal scaling for time-series data workloads. This tutorial sets up InfluxDB Enterprise 2.7 with automated data replication across multiple nodes, ensuring your time-series database remains available during node failures. You'll configure load balancing, automated failover, and monitoring to create a production-ready cluster that can handle enterprise-scale metrics and IoT data streams.
Step-by-step installation
Install InfluxDB Enterprise on all nodes
Start by installing InfluxDB Enterprise on each cluster node. This creates the foundation for your high-availability setup.
wget -q https://repos.influxdata.com/influxdata-archive_compat.key
echo '393e8779c89ac8d958f81f942f9ad7fb82a25e133faddaf92e15b16e6ac9ce4c6b' influxdata-archive_compat.key | sha256sum -c && cat influxdata-archive_compat.key | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg > /dev/null
echo 'deb [signed-by=/etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg] https://repos.influxdata.com/debian stable main' | sudo tee /etc/apt/sources.list.d/influxdata.list
sudo apt update
sudo apt install -y influxdb2-enterprise chronograf kapacitor
Configure data nodes
Configure the first data node with clustering enabled. This node will store and replicate time-series data across the cluster.
[meta]
dir = "/var/lib/influxdb/meta"
hostname = "data-node-1"
bind-address = ":8088"
http-bind-address = ":8091"
retention-autocreate = true
election-timeout = "1s"
heartbeat-timeout = "1s"
leader-lease-timeout = "500ms"
commit-timeout = "50ms"
cluster-tracing = false
raft-promotion-enabled = true
logging-enabled = true
[data]
dir = "/var/lib/influxdb/data"
wal-dir = "/var/lib/influxdb/wal"
series-id-set-cache-size = 100
query-log-enabled = true
cache-max-memory-size = "1g"
cache-snapshot-memory-size = "25m"
cache-snapshot-write-cold-duration = "10m"
compact-full-write-cold-duration = "4h"
max-concurrent-compactions = 0
compact-throughput = "48m"
compact-throughput-burst = "48m"
max-index-log-file-size = "1m"
max-series-per-database = 1000000
max-values-per-tag = 100000
[cluster]
shard-writer-timeout = "5s"
shard-mapper-timeout = "5s"
write-timeout = "10s"
max-remote-write-connections = 3
pool-max-idle-streams = 100
pool-max-idle-time = "1m"
max-concurrent-queries = 0
query-timeout = "0s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[retention]
enabled = true
check-interval = "30m"
[shard-precreation]
enabled = true
check-interval = "10m"
advance-period = "30m"
[monitor]
store-enabled = true
store-database = "_internal"
store-interval = "10s"
[http]
enabled = true
bind-address = ":8086"
auth-enabled = true
log-enabled = true
write-tracing = false
pprof-enabled = true
pprof-auth-enabled = true
debug-pprof-enabled = false
ping-auth-enabled = false
https-enabled = true
https-certificate = "/etc/ssl/certs/influxdb.crt"
https-private-key = "/etc/ssl/private/influxdb.key"
max-row-limit = 0
max-connection-limit = 0
shared-secret = "your-cluster-shared-secret-change-this"
realm = "InfluxDB"
[logging]
format = "auto"
level = "info"
suppress-logo = false
Generate SSL certificates for secure communication
Create SSL certificates for encrypted communication between cluster nodes and clients.
sudo mkdir -p /etc/ssl/certs /etc/ssl/private
sudo openssl req -x509 -newkey rsa:4096 -keyout /etc/ssl/private/influxdb.key -out /etc/ssl/certs/influxdb.crt -days 365 -nodes -subj "/C=US/ST=State/L=City/O=Organization/CN=example.com"
sudo chmod 600 /etc/ssl/private/influxdb.key
sudo chmod 644 /etc/ssl/certs/influxdb.crt
sudo chown influxdb:influxdb /etc/ssl/private/influxdb.key /etc/ssl/certs/influxdb.crt
Configure additional data nodes
Set up the second and third data nodes with similar configuration but different hostnames.
[meta]
dir = "/var/lib/influxdb/meta"
hostname = "data-node-2"
bind-address = ":8088"
http-bind-address = ":8091"
retention-autocreate = true
election-timeout = "1s"
heartbeat-timeout = "1s"
leader-lease-timeout = "500ms"
commit-timeout = "50ms"
cluster-tracing = false
raft-promotion-enabled = true
logging-enabled = true
[data]
dir = "/var/lib/influxdb/data"
wal-dir = "/var/lib/influxdb/wal"
series-id-set-cache-size = 100
query-log-enabled = true
cache-max-memory-size = "1g"
cache-snapshot-memory-size = "25m"
cache-snapshot-write-cold-duration = "10m"
compact-full-write-cold-duration = "4h"
max-concurrent-compactions = 0
compact-throughput = "48m"
compact-throughput-burst = "48m"
max-index-log-file-size = "1m"
max-series-per-database = 1000000
max-values-per-tag = 100000
[cluster]
shard-writer-timeout = "5s"
shard-mapper-timeout = "5s"
write-timeout = "10s"
max-remote-write-connections = 3
pool-max-idle-streams = 100
pool-max-idle-time = "1m"
max-concurrent-queries = 0
query-timeout = "0s"
log-queries-after = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[http]
enabled = true
bind-address = ":8086"
auth-enabled = true
log-enabled = true
write-tracing = false
pprof-enabled = true
pprof-auth-enabled = true
debug-pprof-enabled = false
ping-auth-enabled = false
https-enabled = true
https-certificate = "/etc/ssl/certs/influxdb.crt"
https-private-key = "/etc/ssl/private/influxdb.key"
max-row-limit = 0
max-connection-limit = 0
shared-secret = "your-cluster-shared-secret-change-this"
realm = "InfluxDB"
Initialize the cluster
Start InfluxDB on the first node and initialize the cluster with the first meta node.
sudo systemctl enable influxdb
sudo systemctl start influxdb
sudo systemctl status influxdb
Join additional nodes to the cluster
Add the remaining nodes to form a complete cluster with data replication.
sudo systemctl enable influxdb
sudo systemctl start influxdb
influx -host data-node-1:8086 -execute "CREATE USER admin WITH PASSWORD 'secure-password' WITH ALL PRIVILEGES"
influx -host data-node-1:8086 -username admin -password 'secure-password' -execute "SHOW SERVERS"
Configure HAProxy for load balancing
Set up HAProxy to distribute client connections across cluster nodes with health checks.
sudo apt install -y haproxy
global
daemon
chroot /var/lib/haproxy
stats socket /run/haproxy/admin.sock mode 660 level admin
stats timeout 30s
user haproxy
group haproxy
log stdout local0
defaults
mode http
timeout connect 5000
timeout client 50000
timeout server 50000
errorfile 400 /etc/haproxy/errors/400.http
errorfile 403 /etc/haproxy/errors/403.http
errorfile 408 /etc/haproxy/errors/408.http
errorfile 500 /etc/haproxy/errors/500.http
errorfile 502 /etc/haproxy/errors/502.http
errorfile 503 /etc/haproxy/errors/503.http
errorfile 504 /etc/haproxy/errors/504.http
frontend influxdb_frontend
bind *:8086
default_backend influxdb_backend
backend influxdb_backend
balance roundrobin
option httpchk GET /ping
server data-node-1 data-node-1:8086 check
server data-node-2 data-node-2:8086 check
server data-node-3 data-node-3:8086 check
frontend influxdb_stats
bind *:8404
stats enable
stats uri /stats
stats refresh 30s
stats admin if TRUE
Enable HAProxy and test load balancing
Start HAProxy and verify it properly distributes connections across your InfluxDB cluster nodes.
sudo systemctl enable haproxy
sudo systemctl start haproxy
sudo systemctl status haproxy
curl -k https://localhost:8086/ping
Configure data replication and retention policies
Set up automatic data replication across cluster nodes with appropriate retention policies for different data types.
influx -host localhost:8086 -username admin -password 'secure-password' -execute "CREATE DATABASE metrics"
influx -host localhost:8086 -username admin -password 'secure-password' -execute "CREATE RETENTION POLICY \"one_hour\" ON \"metrics\" DURATION 1h REPLICATION 3 DEFAULT"
influx -host localhost:8086 -username admin -password 'secure-password' -execute "CREATE RETENTION POLICY \"one_day\" ON \"metrics\" DURATION 24h REPLICATION 3"
influx -host localhost:8086 -username admin -password 'secure-password' -execute "CREATE RETENTION POLICY \"one_week\" ON \"metrics\" DURATION 168h REPLICATION 2"
influx -host localhost:8086 -username admin -password 'secure-password' -execute "SHOW RETENTION POLICIES ON metrics"
Install and configure Telegraf for metrics collection
Set up Telegraf to collect system and InfluxDB cluster metrics for monitoring.
sudo apt install -y telegraf
[global_tags]
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
hostname = ""
omit_hostname = false
[[outputs.influxdb]]
urls = ["https://localhost:8086"]
database = "telegraf"
username = "admin"
password = "secure-password"
skip_database_creation = false
insecure_skip_verify = true
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.diskio]]
[[inputs.kernel]]
[[inputs.mem]]
[[inputs.processes]]
[[inputs.swap]]
[[inputs.system]]
[[inputs.influxdb]]
urls = [
"https://data-node-1:8086/debug/vars",
"https://data-node-2:8086/debug/vars",
"https://data-node-3:8086/debug/vars"
]
username = "admin"
password = "secure-password"
insecure_skip_verify = true
Setup automated failover with Kapacitor
Configure Kapacitor for automated alerting and failover actions when cluster nodes become unavailable.
hostname = "kapacitor-server"
data_dir = "/var/lib/kapacitor"
[http]
bind-address = ":9092"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = false
https-enabled = false
shutdown-timeout = "10s"
[logging]
file = "STDOUT"
level = "INFO"
[replay]
dir = "/var/lib/kapacitor/replay"
[storage]
boltdb = "/var/lib/kapacitor/kapacitor.db"
[task]
dir = "/var/lib/kapacitor/tasks"
snapshot-interval = "1m0s"
[[influxdb]]
enabled = true
name = "influxdb-cluster"
default = true
urls = ["https://localhost:8086"]
username = "admin"
password = "secure-password"
ssl-ca = ""
ssl-cert = ""
ssl-key = ""
insecure-skip-verify = true
timeout = "0s"
disable-subscriptions = false
subscription-protocol = "http"
kapacitor-hostname = ""
http-port = 0
udp-bind = ""
udp-buffer = 1000
udp-read-buffer = 0
startup-timeout = "5m0s"
subscriptions-sync-interval = "1m0s"
[smtp]
enabled = true
host = "localhost"
port = 587
username = ""
password = ""
no-verify = false
global = false
state-changes-only = false
from = "kapacitor@example.com"
idle-timeout = "30s"
Create failover alerting script
Set up a TICKscript for monitoring node health and triggering alerts when nodes fail.
stream
|from()
.measurement('influxdb_httpd')
.groupBy('host')
|window()
.period(1m)
.every(30s)
|mean('requests_per_sec')
|alert()
.id('influxdb-node-health')
.message('InfluxDB node {{ index .Tags "host" }} may be down - requests per second: {{ .Level }}')
.warn(lambda: "mean" < 1.0)
.crit(lambda: "mean" < 0.1)
.post('http://localhost:9093/api/v1/alerts')
.email()
.to('admin@example.com')
.exec('/usr/local/bin/influxdb-failover.sh', '{{ index .Tags "host" }}')
Start cluster services
Enable and start all services required for the InfluxDB cluster with monitoring.
sudo systemctl enable telegraf kapacitor
sudo systemctl start telegraf kapacitor
sudo systemctl status telegraf kapacitor
kapacitor define node_health -tick /var/lib/kapacitor/node_health.tick
kapacitor enable node_health
Install and configure Grafana for monitoring
Set up Grafana to visualize cluster health and performance metrics with automated dashboards.
wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add -
echo "deb https://packages.grafana.com/oss/deb stable main" | sudo tee -a /etc/apt/sources.list.d/grafana.list
sudo apt update
sudo apt install -y grafana
sudo systemctl enable grafana-server
sudo systemctl start grafana-server
sudo systemctl status grafana-server
Verify your setup
Test your InfluxDB cluster configuration with these verification commands.
# Check cluster status
influx -host localhost:8086 -username admin -password 'secure-password' -execute "SHOW SERVERS"
Test data replication
influx -host localhost:8086 -username admin -password 'secure-password' -execute "USE metrics; INSERT cpu,host=server01 value=0.85"
influx -host data-node-2:8086 -username admin -password 'secure-password' -execute "USE metrics; SELECT * FROM cpu LIMIT 5"
Check HAProxy status
curl http://localhost:8404/stats
Verify Telegraf is collecting metrics
influx -host localhost:8086 -username admin -password 'secure-password' -execute "USE telegraf; SHOW MEASUREMENTS"
Check Kapacitor tasks
kapacitor list tasks
Test Grafana connectivity
curl http://localhost:3000
Configure monitoring dashboards
This tutorial integrates with our existing monitoring setup. For comprehensive cluster monitoring, you can enhance your Grafana dashboards by following our advanced Grafana configuration guide and combine it with Telegraf custom plugins for deeper InfluxDB metrics collection.
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Nodes can't join cluster | Network connectivity or shared secret mismatch | Check firewall rules on ports 8086, 8088, 8091 and verify shared-secret matches |
| SSL certificate errors | Self-signed certificate or hostname mismatch | Use insecure_skip_verify = true for testing or generate proper certificates |
| Data not replicating | Replication factor higher than available nodes | Adjust replication factor to match or be less than node count |
| High memory usage | Cache settings too high for available RAM | Reduce cache-max-memory-size in influxdb.conf |
| Query timeout errors | Heavy queries overwhelming cluster | Increase query-timeout or optimize queries with better indexing |
| HAProxy health checks failing | Authentication required for /ping endpoint | Set ping-auth-enabled = false or configure HAProxy basic auth |
| Kapacitor alerts not firing | Wrong measurement name or field in TICKscript | Check measurement names with SHOW MEASUREMENTS and verify field names |
Next steps
- Set up automated backup encryption for your cluster data
- Configure automated data retention policies for time-series optimization
- Implement continuous queries for data downsampling and aggregation
- Configure enterprise security with LDAP authentication and RBAC
- Set up cross-datacenter replication for disaster recovery
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Global variables
SCRIPT_NAME=$(basename "$0")
CLUSTER_NODES=()
NODE_ID=""
LICENSE_KEY=""
DATA_DIR="/var/lib/influxdb"
CONFIG_DIR="/etc/influxdb"
# Usage information
usage() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Configure InfluxDB 2.7 Enterprise clustering for high availability
OPTIONS:
-n, --nodes Comma-separated list of cluster node IPs
-i, --node-id Unique node ID (1-based integer)
-l, --license InfluxDB Enterprise license key
-d, --data-dir Data directory (default: /var/lib/influxdb)
-h, --help Show this help message
EXAMPLE:
$SCRIPT_NAME --nodes "10.0.1.10,10.0.1.11,10.0.1.12" --node-id 1 --license "your-license-key"
EOF
}
# Logging functions
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Cleanup function for rollback
cleanup() {
local exit_code=$?
if [ $exit_code -ne 0 ]; then
log_error "Installation failed. Cleaning up..."
systemctl stop influxdb || true
systemctl stop chronograf || true
systemctl stop kapacitor || true
systemctl disable influxdb || true
systemctl disable chronograf || true
systemctl disable kapacitor || true
fi
exit $exit_code
}
trap cleanup ERR
# Parse command line arguments
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-n|--nodes)
IFS=',' read -ra CLUSTER_NODES <<< "$2"
shift 2
;;
-i|--node-id)
NODE_ID="$2"
shift 2
;;
-l|--license)
LICENSE_KEY="$2"
shift 2
;;
-d|--data-dir)
DATA_DIR="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
if [[ ${#CLUSTER_NODES[@]} -eq 0 || -z "$NODE_ID" || -z "$LICENSE_KEY" ]]; then
log_error "Missing required arguments"
usage
exit 1
fi
}
# Check prerequisites
check_prerequisites() {
log_info "[1/8] Checking prerequisites..."
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root"
exit 1
fi
if ! command -v curl &> /dev/null; then
log_error "curl is required but not installed"
exit 1
fi
if ! command -v gpg &> /dev/null; then
log_error "gpg is required but not installed"
exit 1
fi
log_success "Prerequisites check passed"
}
# Detect Linux distribution
detect_distro() {
log_info "[2/8] Detecting Linux distribution..."
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
FIREWALL_CMD="ufw"
;;
almalinux|rocky|centos|rhel|ol)
PKG_MGR="dnf"
PKG_UPDATE="dnf makecache"
PKG_INSTALL="dnf install -y"
FIREWALL_CMD="firewall-cmd"
;;
fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf makecache"
PKG_INSTALL="dnf install -y"
FIREWALL_CMD="firewall-cmd"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum makecache"
PKG_INSTALL="yum install -y"
FIREWALL_CMD="firewall-cmd"
;;
*)
log_error "Unsupported distribution: $ID"
exit 1
;;
esac
else
log_error "Cannot detect Linux distribution"
exit 1
fi
log_success "Detected distribution: $PRETTY_NAME (Package manager: $PKG_MGR)"
}
# Setup InfluxData repository
setup_repository() {
log_info "[3/8] Setting up InfluxData repository..."
case "$PKG_MGR" in
apt)
wget -q https://repos.influxdata.com/influxdata-archive_compat.key -O /tmp/influxdata-archive_compat.key
echo '393e8779c89ac8d958f81f942f9ad7fb82a25e133faddaf92e15b16e6ac9ce4c6b /tmp/influxdata-archive_compat.key' | sha256sum -c
cat /tmp/influxdata-archive_compat.key | gpg --dearmor | tee /etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg > /dev/null
chmod 644 /etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg
echo 'deb [signed-by=/etc/apt/trusted.gpg.d/influxdata-archive_compat.gpg] https://repos.influxdata.com/debian stable main' > /etc/apt/sources.list.d/influxdata.list
chmod 644 /etc/apt/sources.list.d/influxdata.list
;;
dnf|yum)
cat > /etc/yum.repos.d/influxdata.repo << 'EOF'
[influxdata]
name = InfluxData Repository
baseurl = https://repos.influxdata.com/rhel/\$releasever/\$basearch/stable/
enabled = 1
gpgcheck = 1
gpgkey = https://repos.influxdata.com/influxdata-archive_compat.key
EOF
chmod 644 /etc/yum.repos.d/influxdata.repo
;;
esac
rm -f /tmp/influxdata-archive_compat.key
log_success "Repository setup completed"
}
# Install InfluxDB Enterprise components
install_influxdb_enterprise() {
log_info "[4/8] Installing InfluxDB Enterprise components..."
$PKG_UPDATE
$PKG_INSTALL influxdb2-enterprise chronograf kapacitor
log_success "InfluxDB Enterprise components installed"
}
# Configure InfluxDB clustering
configure_clustering() {
log_info "[5/8] Configuring InfluxDB clustering..."
mkdir -p "$CONFIG_DIR" "$DATA_DIR"
chown influxdb:influxdb "$DATA_DIR"
chmod 755 "$DATA_DIR"
# Generate cluster configuration
cat > "$CONFIG_DIR/influxdb.conf" << EOF
# InfluxDB Enterprise cluster configuration
[enterprise]
license-key = "$LICENSE_KEY"
[meta]
dir = "$DATA_DIR/meta"
bind-address = ":8089"
http-bind-address = ":8091"
[data]
dir = "$DATA_DIR/data"
wal-dir = "$DATA_DIR/wal"
[cluster]
shard-writer-timeout = "10s"
write-timeout = "10s"
max-concurrent-queries = 0
query-timeout = "0s"
max-select-point = 0
max-select-series = 0
max-select-buckets = 0
[retention]
enabled = true
check-interval = "30m0s"
[http]
enabled = true
bind-address = ":8086"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = true
https-enabled = false
max-row-limit = 0
max-connection-limit = 0
shared-secret = ""
realm = "InfluxDB"
[logging]
level = "info"
suppress-logo = false
EOF
chown influxdb:influxdb "$CONFIG_DIR/influxdb.conf"
chmod 644 "$CONFIG_DIR/influxdb.conf"
log_success "Clustering configuration completed"
}
# Configure firewall
configure_firewall() {
log_info "[6/8] Configuring firewall..."
case "$FIREWALL_CMD" in
ufw)
if command -v ufw &> /dev/null && ufw status | grep -q "Status: active"; then
ufw allow 8086/tcp comment "InfluxDB HTTP API"
ufw allow 8089/tcp comment "InfluxDB Meta"
ufw allow 8091/tcp comment "InfluxDB Meta HTTP"
ufw allow 8888/tcp comment "Chronograf"
ufw allow 9092/tcp comment "Kapacitor"
fi
;;
firewall-cmd)
if systemctl is-active --quiet firewalld; then
firewall-cmd --permanent --add-port=8086/tcp
firewall-cmd --permanent --add-port=8089/tcp
firewall-cmd --permanent --add-port=8091/tcp
firewall-cmd --permanent --add-port=8888/tcp
firewall-cmd --permanent --add-port=9092/tcp
firewall-cmd --reload
fi
;;
esac
log_success "Firewall configuration completed"
}
# Start and enable services
start_services() {
log_info "[7/8] Starting and enabling InfluxDB services..."
systemctl daemon-reload
systemctl enable influxdb chronograf kapacitor
systemctl start influxdb
# Wait for InfluxDB to start
sleep 10
# Initialize cluster if this is node 1
if [[ "$NODE_ID" == "1" ]]; then
log_info "Initializing InfluxDB cluster..."
influxd-ctl add-meta "$(hostname):8091"
influxd-ctl add-data "$(hostname):8088"
# Add other nodes to cluster
for i in "${!CLUSTER_NODES[@]}"; do
if [[ $((i + 1)) -ne $NODE_ID ]]; then
node_ip="${CLUSTER_NODES[$i]}"
influxd-ctl add-meta "$node_ip:8091" || log_warning "Failed to add meta node $node_ip"
influxd-ctl add-data "$node_ip:8088" || log_warning "Failed to add data node $node_ip"
fi
done
fi
systemctl start chronograf kapacitor
log_success "Services started and enabled"
}
# Verify installation
verify_installation() {
log_info "[8/8] Verifying installation..."
# Check service status
for service in influxdb chronograf kapacitor; do
if systemctl is-active --quiet "$service"; then
log_success "$service is running"
else
log_error "$service is not running"
exit 1
fi
done
# Test InfluxDB connection
if curl -s http://localhost:8086/ping > /dev/null; then
log_success "InfluxDB HTTP API is responding"
else
log_error "InfluxDB HTTP API is not responding"
exit 1
fi
# Check cluster status (if node 1)
if [[ "$NODE_ID" == "1" ]]; then
if influxd-ctl show > /dev/null 2>&1; then
log_success "Cluster status check passed"
else
log_warning "Cluster status check failed - may need manual configuration"
fi
fi
log_success "Installation verification completed"
}
# Main function
main() {
parse_args "$@"
check_prerequisites
detect_distro
setup_repository
install_influxdb_enterprise
configure_clustering
configure_firewall
start_services
verify_installation
log_success "InfluxDB Enterprise cluster installation completed!"
log_info "Access Chronograf at: http://$(hostname -I | awk '{print $1}'):8888"
log_info "InfluxDB API endpoint: http://$(hostname -I | awk '{print $1}'):8086"
log_info "Data directory: $DATA_DIR"
log_info "Config file: $CONFIG_DIR/influxdb.conf"
}
main "$@"
Review the script before running. Execute with: bash install.sh