Set up comprehensive container monitoring with cAdvisor, Prometheus, and Grafana to collect detailed metrics on CPU, memory, network, and disk usage. This tutorial covers installation, configuration, and alerting for production-ready container performance monitoring.
Prerequisites
- Docker installed and running
- Minimum 4GB RAM available
- Firewall access to ports 3000, 8080, 9090
What this solves
Container environments require detailed monitoring to track resource usage, identify performance bottlenecks, and ensure optimal application performance. cAdvisor (Container Advisor) provides comprehensive container metrics collection, while Prometheus stores and queries these metrics for analysis. Combined with Grafana dashboards and alerting rules, this monitoring stack gives you complete visibility into container performance across CPU, memory, network, and disk usage patterns.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you get the latest versions of Docker and monitoring tools.
sudo apt update && sudo apt upgrade -y
sudo apt install -y curl wget git
Install Docker and Docker Compose
Install Docker to run containers and Docker Compose to manage the monitoring stack deployment.
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker $USER
sudo apt install -y docker-compose-plugin
Log out and back in for group changes to take effect, or run:
newgrp docker
sudo systemctl enable --now docker
Create monitoring directory structure
Set up the directory structure for configuration files and persistent data storage.
mkdir -p ~/container-monitoring/{prometheus,grafana,configs}
cd ~/container-monitoring
Configure Prometheus
Create the Prometheus configuration file with cAdvisor as a scrape target and container discovery settings.
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "/etc/prometheus/alerts.yml"
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 5s
metrics_path: /metrics
- job_name: 'docker-containers'
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- source_labels: [__meta_docker_container_name]
target_label: container_name
- source_labels: [__meta_docker_container_id]
target_label: container_id
Create container alerting rules
Set up alerting rules for container performance monitoring including high CPU, memory usage, and container failures.
groups:
- name: container.rules
rules:
- alert: ContainerHighCpuUsage
expr: rate(container_cpu_usage_seconds_total{name!=""}[1m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high CPU usage"
description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."
- alert: ContainerHighMemoryUsage
expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.name }} high memory usage"
description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."
- alert: ContainerDown
expr: up{job="cadvisor"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "cAdvisor is down"
description: "cAdvisor has been down for more than 1 minute."
- alert: ContainerRestarting
expr: increase(container_start_time_seconds{name!=""}[10m]) > 0
for: 0m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} restarting"
description: "Container {{ $labels.name }} has restarted in the last 10 minutes."
Create Grafana provisioning configuration
Set up Grafana datasource and dashboard provisioning for automatic configuration.
mkdir -p configs/grafana/{provisioning/datasources,provisioning/dashboards}
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
access: proxy
isDefault: true
editable: true
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
Create Docker Compose configuration
Set up the complete monitoring stack with cAdvisor, Prometheus, Grafana, and Alertmanager using Docker Compose.
version: '3.8'
services:
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.49.1
container_name: cadvisor
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
restart: unless-stopped
command:
- '--housekeeping_interval=10s'
- '--docker_only=false'
- '--store_container_labels=false'
- '--whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace'
prometheus:
image: prom/prometheus:v2.48.1
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./configs/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./configs/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
restart: unless-stopped
depends_on:
- cadvisor
grafana:
image: grafana/grafana:10.2.2
container_name: grafana
ports:
- "3000:3000"
volumes:
- grafana_data:/var/lib/grafana
- ./configs/grafana/provisioning:/etc/grafana/provisioning:ro
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=secure_password_123
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SECURITY_DISABLE_GRAVATAR=true
- GF_SECURITY_COOKIE_SECURE=true
- GF_SECURITY_STRICT_TRANSPORT_SECURITY=true
restart: unless-stopped
depends_on:
- prometheus
alertmanager:
image: prom/alertmanager:v0.26.0
container_name: alertmanager
ports:
- "9093:9093"
volumes:
- ./configs/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager_data:/alertmanager
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=http://localhost:9093'
restart: unless-stopped
volumes:
prometheus_data:
grafana_data:
alertmanager_data:
networks:
default:
name: monitoring
Configure Alertmanager
Set up Alertmanager configuration for email notifications when container alerts trigger.
global:
smtp_smarthost: 'localhost:587'
smtp_from: 'alerts@example.com'
smtp_auth_username: 'alerts@example.com'
smtp_auth_password: 'your_email_password'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
email_configs:
- to: 'admin@example.com'
subject: 'Container Alert: {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
Labels:
{{ range .Labels.SortedPairs }}
{{ .Name }}: {{ .Value }}
{{ end }}
{{ end }}
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
Deploy the monitoring stack
Start all monitoring services with Docker Compose and verify they are running correctly.
docker compose up -d
docker compose ps
Create container performance dashboard
Set up a comprehensive Grafana dashboard for container monitoring with key performance metrics.
mkdir -p configs/grafana/dashboards
{
"dashboard": {
"id": null,
"title": "Container Performance Monitor",
"tags": ["docker", "containers"],
"style": "dark",
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Container CPU Usage",
"type": "graph",
"targets": [
{
"expr": "rate(container_cpu_usage_seconds_total{name!=\"\"}[1m]) * 100",
"legendFormat": "{{name}}"
}
],
"yAxes": [
{
"label": "CPU %",
"max": 100
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
},
{
"id": 2,
"title": "Container Memory Usage",
"type": "graph",
"targets": [
{
"expr": "container_memory_usage_bytes{name!=\"\"}",
"legendFormat": "{{name}}"
}
],
"yAxes": [
{
"label": "Bytes"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "5s"
}
}
Configure firewall access
Open the necessary ports for accessing the monitoring services securely.
sudo ufw allow 3000/tcp comment "Grafana"
sudo ufw allow 9090/tcp comment "Prometheus"
sudo ufw allow 8080/tcp comment "cAdvisor"
sudo ufw reload
Set up container resource limits
Configure resource limits for the monitoring stack to prevent resource exhaustion.
docker compose down
sed -i '/restart: unless-stopped/a\ deploy:\n resources:\n limits:\n cpus: "0.5"\n memory: 512M' docker-compose.yml
docker compose up -d
Configure advanced monitoring features
Enable container log collection
Configure log collection for containers to complement metrics monitoring, similar to the approach used in centralized log aggregation with ELK Stack.
# Add to existing scrape_configs:
- job_name: 'container-logs'
static_configs:
- targets: ['localhost:9100']
metrics_path: /metrics
params:
collect[]:
- textfile
- systemd
Create custom metric collection
Set up custom application metrics collection for containers running web applications.
groups:
- name: application.rules
interval: 30s
rules:
- record: container_cpu_usage_rate
expr: rate(container_cpu_usage_seconds_total{name!=""}[5m])
- record: container_memory_usage_ratio
expr: container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}
- record: container_network_io_rate
expr: rate(container_network_receive_bytes_total{name!=""}[5m]) + rate(container_network_transmit_bytes_total{name!=""}[5m])
Verify your setup
Check that all monitoring services are running and collecting metrics properly.
docker compose ps
curl -s http://localhost:8080/metrics | grep container_cpu_usage_seconds_total | head -5
curl -s http://localhost:9090/api/v1/query?query=up | jq '.data.result[].metric.job'
docker logs cadvisor --tail 20
docker logs prometheus --tail 20
Access the web interfaces to verify functionality:
- cAdvisor: http://your-server-ip:8080 - Container metrics interface
- Prometheus: http://your-server-ip:9090 - Metrics database and query interface
- Grafana: http://your-server-ip:3000 - Dashboard and visualization (admin/secure_password_123)
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| cAdvisor shows no containers | Docker socket permission denied | Add cAdvisor container to docker group: docker exec -it cadvisor id |
| Prometheus can't scrape cAdvisor | Network connectivity issues | Check Docker network: docker network ls and restart services |
| Grafana shows "No data" panels | Prometheus datasource not configured | Check datasource config: docker logs grafana | grep datasource |
| High memory usage on monitoring host | Too many metrics collected | Reduce scrape interval in prometheus.yml and restart |
| Container metrics missing labels | Label whitelist too restrictive | Update cAdvisor command args to include required labels |
| Alerts not firing | Alertmanager configuration error | Test config: docker exec prometheus promtool check config /etc/prometheus/prometheus.yml |
Next steps
- Set up Prometheus and Grafana monitoring stack with Docker compose - Expand monitoring to system metrics
- Monitor system resources with Netdata real-time performance dashboard - Add complementary real-time monitoring
- Configure Prometheus Alertmanager with email notifications and webhook integration - Enhanced alerting setup
- Implement container security monitoring with Falco runtime detection - Security monitoring for containers
- Set up Kubernetes monitoring with Prometheus Operator and custom metrics - Kubernetes-specific monitoring
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Global variables
INSTALL_DIR="${HOME}/container-monitoring"
COMPOSE_VERSION="v2.24.0"
# Error handling and cleanup
cleanup() {
if [ $? -ne 0 ]; then
echo -e "${RED}[ERROR] Installation failed. Cleaning up...${NC}"
if [ -d "$INSTALL_DIR" ]; then
cd "$INSTALL_DIR" && docker compose down 2>/dev/null || true
rm -rf "$INSTALL_DIR"
fi
fi
}
trap cleanup EXIT
# Usage message
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " -h, --help Show this help message"
echo " -d, --dir DIR Installation directory (default: ~/container-monitoring)"
exit 1
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) usage ;;
-d|--dir) INSTALL_DIR="$2"; shift ;;
*) echo -e "${RED}Unknown option: $1${NC}"; usage ;;
esac
shift
done
# Detect distribution and package manager
detect_distro() {
if [ ! -f /etc/os-release ]; then
echo -e "${RED}Cannot detect Linux distribution${NC}"
exit 1
fi
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update && apt upgrade -y"
PKG_INSTALL="apt install -y"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
;;
*)
echo -e "${RED}Unsupported distribution: $ID${NC}"
exit 1
;;
esac
}
# Check prerequisites
check_prerequisites() {
echo -e "${BLUE}[1/8] Checking prerequisites...${NC}"
if [ "$EUID" -eq 0 ]; then
echo -e "${RED}Please don't run this script as root${NC}"
exit 1
fi
if ! command -v sudo &> /dev/null; then
echo -e "${RED}sudo is required but not installed${NC}"
exit 1
fi
detect_distro
echo -e "${GREEN}Prerequisites check passed${NC}"
}
# Update system packages
update_system() {
echo -e "${BLUE}[2/8] Updating system packages...${NC}"
sudo bash -c "$PKG_UPDATE"
sudo $PKG_INSTALL curl wget git
echo -e "${GREEN}System packages updated${NC}"
}
# Install Docker
install_docker() {
echo -e "${BLUE}[3/8] Installing Docker...${NC}"
if command -v docker &> /dev/null; then
echo -e "${YELLOW}Docker already installed${NC}"
return
fi
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker "$USER"
rm -f get-docker.sh
# Install docker-compose plugin
case "$PKG_MGR" in
apt) sudo $PKG_INSTALL docker-compose-plugin ;;
dnf|yum) sudo $PKG_INSTALL docker-compose-plugin ;;
esac
sudo systemctl enable --now docker
echo -e "${GREEN}Docker installed successfully${NC}"
echo -e "${YELLOW}Please log out and back in, or run 'newgrp docker' for group changes to take effect${NC}"
}
# Create directory structure
create_directories() {
echo -e "${BLUE}[4/8] Creating directory structure...${NC}"
mkdir -p "$INSTALL_DIR"/{prometheus,grafana,configs}
mkdir -p "$INSTALL_DIR"/grafana/{dashboards,provisioning/{dashboards,datasources}}
echo -e "${GREEN}Directory structure created${NC}"
}
# Create configuration files
create_configs() {
echo -e "${BLUE}[5/8] Creating configuration files...${NC}"
# Prometheus configuration
cat > "$INSTALL_DIR/prometheus/prometheus.yml" << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "/etc/prometheus/alerts.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 5s
metrics_path: /metrics
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
EOF
# Prometheus alerts
cat > "$INSTALL_DIR/prometheus/alerts.yml" << 'EOF'
groups:
- name: container.rules
rules:
- alert: ContainerHighCpuUsage
expr: rate(container_cpu_usage_seconds_total{name!=""}[1m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Container {{ $labels.name }} high CPU usage"
description: "Container {{ $labels.name }} CPU usage is above 80% for more than 5 minutes."
- alert: ContainerHighMemoryUsage
expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "Container {{ $labels.name }} high memory usage"
description: "Container {{ $labels.name }} memory usage is above 90% for more than 5 minutes."
- alert: ContainerDown
expr: up{job="cadvisor"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "cAdvisor is down"
description: "cAdvisor has been down for more than 1 minute."
EOF
# Grafana datasource
cat > "$INSTALL_DIR/grafana/provisioning/datasources/prometheus.yml" << 'EOF'
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
EOF
# Set proper permissions
chmod -R 755 "$INSTALL_DIR"
find "$INSTALL_DIR" -type f -exec chmod 644 {} \;
echo -e "${GREEN}Configuration files created${NC}"
}
# Create docker-compose file
create_compose() {
echo -e "${BLUE}[6/8] Creating Docker Compose configuration...${NC}"
cat > "$INSTALL_DIR/docker-compose.yml" << 'EOF'
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: cadvisor
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg:/dev/kmsg
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
prometheus_data:
grafana_data:
EOF
chmod 644 "$INSTALL_DIR/docker-compose.yml"
echo -e "${GREEN}Docker Compose configuration created${NC}"
}
# Start services
start_services() {
echo -e "${BLUE}[7/8] Starting monitoring services...${NC}"
cd "$INSTALL_DIR"
# Ensure user can run docker
if ! groups | grep -q docker; then
echo -e "${YELLOW}Adding user to docker group temporarily...${NC}"
newgrp docker << EONG
docker compose up -d
EONG
else
docker compose up -d
fi
echo -e "${GREEN}Services started successfully${NC}"
}
# Verify installation
verify_installation() {
echo -e "${BLUE}[8/8] Verifying installation...${NC}"
cd "$INSTALL_DIR"
# Wait for services to start
sleep 10
# Check service status
if docker compose ps | grep -q "Up"; then
echo -e "${GREEN}✓ Docker containers are running${NC}"
else
echo -e "${RED}✗ Some containers failed to start${NC}"
return 1
fi
# Check endpoints
local endpoints=("http://localhost:9090" "http://localhost:8080" "http://localhost:3000" "http://localhost:9100")
for endpoint in "${endpoints[@]}"; do
if curl -s "$endpoint" > /dev/null; then
echo -e "${GREEN}✓ $endpoint is accessible${NC}"
else
echo -e "${YELLOW}⚠ $endpoint is not yet ready${NC}"
fi
done
echo -e "${GREEN}Installation completed successfully!${NC}"
echo -e "${BLUE}Access points:${NC}"
echo -e " Prometheus: http://localhost:9090"
echo -e " cAdvisor: http://localhost:8080"
echo -e " Grafana: http://localhost:3000 (admin/admin)"
echo -e " Node Exporter: http://localhost:9100"
}
# Main execution
main() {
check_prerequisites
update_system
install_docker
create_directories
create_configs
create_compose
start_services
verify_installation
}
main "$@"
Review the script before running. Execute with: bash install.sh