Set up VPA to automatically adjust CPU and memory requests for your Kubernetes workloads. Reduce resource waste and optimize costs by letting VPA analyze actual usage patterns and rightsizing containers.
Prerequisites
- Running Kubernetes cluster with admin access
- kubectl configured and working
- Basic understanding of Kubernetes resources
What this solves
Kubernetes Vertical Pod Autoscaler (VPA) automatically adjusts CPU and memory requests for your containers based on actual usage patterns. This eliminates resource waste from over-provisioned pods and prevents performance issues from under-provisioned workloads, helping you optimize both costs and reliability.
Step-by-step installation
Verify cluster prerequisites
Check that your cluster has metrics-server running and sufficient RBAC permissions for VPA components.
kubectl get deployment metrics-server -n kube-system
kubectl get nodes
kubectl version --short
Install metrics-server if missing
VPA requires metrics-server to collect resource usage data. Install it if not already present.
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
Wait for metrics-server to be ready:
kubectl wait --for=condition=available --timeout=300s deployment/metrics-server -n kube-system
Clone VPA repository
Download the official VPA installation manifests from the autoscaler repository.
git clone https://github.com/kubernetes/autoscaler.git
cd autoscaler/vertical-pod-autoscaler
Install VPA components
Deploy the VPA admission controller, recommender, and updater components to your cluster.
./hack/vpa-install.sh
Verify all VPA components are running:
kubectl get pods -n kube-system | grep vpa
kubectl get deployment -n kube-system | grep vpa
Create VPA custom resource definitions
Ensure VPA CRDs are properly registered in your cluster.
kubectl get crd | grep verticalpodautoscaler
kubectl api-resources | grep verticalpodautoscaler
Step-by-step configuration
Deploy sample application
Create a test deployment to demonstrate VPA functionality with realistic resource patterns.
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-vpa-demo
namespace: default
spec:
replicas: 2
selector:
matchLabels:
app: nginx-vpa-demo
template:
metadata:
labels:
app: nginx-vpa-demo
spec:
containers:
- name: nginx
image: nginx:1.25
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "200m"
memory: "256Mi"
ports:
- containerPort: 80
kubectl apply -f nginx-deployment.yaml
Configure VPA in recommendation mode
Start with recommendation-only mode to observe VPA suggestions without automatic changes.
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: nginx-vpa-recommender
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: nginx-vpa-demo
updatePolicy:
updateMode: "Off"
resourcePolicy:
containerPolicies:
- containerName: nginx
minAllowed:
cpu: 50m
memory: 64Mi
maxAllowed:
cpu: 500m
memory: 512Mi
controlledResources: ["cpu", "memory"]
kubectl apply -f nginx-vpa-rec.yaml
Generate load for meaningful recommendations
Create some CPU and memory usage to help VPA generate realistic recommendations.
apiVersion: v1
kind: Pod
metadata:
name: load-generator
spec:
containers:
- name: busybox
image: busybox:1.36
command:
- /bin/sh
- -c
- |
while true; do
wget -q -O- http://nginx-vpa-demo.default.svc.cluster.local/
sleep 0.1
done
restartPolicy: Never
kubectl apply -f load-generator.yaml
Configure VPA for automatic updates
Enable automatic resource adjustments with proper update policies for production workloads.
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: nginx-vpa-auto
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: nginx-vpa-demo
updatePolicy:
updateMode: "Auto"
minReplicas: 1
resourcePolicy:
containerPolicies:
- containerName: nginx
minAllowed:
cpu: 50m
memory: 64Mi
maxAllowed:
cpu: 500m
memory: 512Mi
controlledResources: ["cpu", "memory"]
controlledValues: "RequestsAndLimits"
kubectl apply -f nginx-vpa-auto.yaml
Configure VPA resource policies
Set up advanced resource policies with scaling bounds and controlled resource types.
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: production-app-vpa
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: nginx-vpa-demo
updatePolicy:
updateMode: "Initial"
resourcePolicy:
containerPolicies:
- containerName: '*'
minAllowed:
cpu: 100m
memory: 128Mi
maxAllowed:
cpu: 2
memory: 2Gi
controlledResources: ["cpu", "memory"]
controlledValues: "RequestsOnly"
mode: Auto
kubectl apply -f advanced-vpa-policy.yaml
Monitor and tune VPA recommendations
View VPA recommendations
Check current resource recommendations and compare them with actual usage patterns.
kubectl describe vpa nginx-vpa-recommender
kubectl get vpa nginx-vpa-recommender -o yaml
Monitor resource usage trends
Compare VPA recommendations with actual pod resource consumption over time.
kubectl top pods -l app=nginx-vpa-demo
kubectl get pods -l app=nginx-vpa-demo -o jsonpath='{.items[].spec.containers[].resources}'
Configure VPA admission webhook
Verify the VPA admission controller is properly configured to intercept pod creation.
kubectl get mutatingwebhookconfigurations
kubectl get validatingwebhookconfigurations | grep vpa
Set up VPA monitoring dashboard
Create monitoring queries to track VPA effectiveness and resource optimization metrics.
#!/bin/bash
echo "=== VPA Status ==="
kubectl get vpa --all-namespaces
echo -e "\n=== VPA Recommendations ==="
for vpa in $(kubectl get vpa -o name); do
echo "$vpa:"
kubectl get $vpa -o jsonpath='{.status.recommendation.containerRecommendations[*]}' | jq .
done
echo -e "\n=== Resource Utilization ==="
kubectl top pods --all-namespaces
chmod +x vpa-monitor.sh
./vpa-monitor.sh
Deploy VPA policies for workload optimization
Configure namespace-wide VPA policies
Apply VPA policies across multiple deployments with consistent resource boundaries.
apiVersion: v1
kind: ConfigMap
metadata:
name: vpa-policy-template
namespace: production
data:
vpa-template.yaml: |
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: DEPLOYMENT_NAME-vpa
namespace: NAMESPACE
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: DEPLOYMENT_NAME
updatePolicy:
updateMode: "Auto"
minReplicas: 2
resourcePolicy:
containerPolicies:
- containerName: '*'
minAllowed:
cpu: 100m
memory: 128Mi
maxAllowed:
cpu: 4
memory: 8Gi
controlledResources: ["cpu", "memory"]
controlledValues: "RequestsAndLimits"
kubectl apply -f namespace-vpa-policy.yaml
Configure VPA for StatefulSets
Apply VPA policies to StatefulSets with careful consideration of persistent storage and scaling patterns.
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: database-vpa
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: StatefulSet
name: postgresql
updatePolicy:
updateMode: "Initial"
resourcePolicy:
containerPolicies:
- containerName: postgresql
minAllowed:
cpu: 500m
memory: 1Gi
maxAllowed:
cpu: 8
memory: 32Gi
controlledResources: ["cpu", "memory"]
controlledValues: "RequestsOnly"
kubectl apply -f statefulset-vpa.yaml
Implement VPA exclusion policies
Configure workloads to exclude certain containers or resources from VPA management.
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: selective-vpa
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: multi-container-app
updatePolicy:
updateMode: "Auto"
resourcePolicy:
containerPolicies:
- containerName: app-container
controlledResources: ["cpu", "memory"]
mode: Auto
- containerName: sidecar-container
mode: "Off"
- containerName: monitoring-agent
controlledResources: ["memory"]
minAllowed:
memory: 64Mi
maxAllowed:
memory: 512Mi
kubectl apply -f selective-vpa.yaml
Verify your setup
# Check VPA components are running
kubectl get pods -n kube-system | grep vpa
Verify VPA CRDs are installed
kubectl get crd | grep verticalpodautoscaler
Check VPA recommendations
kubectl get vpa --all-namespaces
kubectl describe vpa nginx-vpa-recommender
Monitor resource changes
kubectl get pods -l app=nginx-vpa-demo -o jsonpath='{range .items[]}{.metadata.name}{"\t"}{.spec.containers[].resources.requests}{"\n"}{end}'
Check VPA admission webhook
kubectl get mutatingwebhookconfigurations | grep vpa
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| VPA shows no recommendations | Insufficient metrics data | Wait 24-48 hours for data collection, ensure metrics-server is running |
| Pods not getting updated automatically | Update policy set to "Off" or "Initial" | Change updateMode to "Auto" in VPA spec |
| VPA recommendations too high/low | Insufficient load or incorrect resource policies | Adjust minAllowed/maxAllowed bounds, generate realistic load patterns |
| Admission controller webhook errors | Certificate issues or RBAC permissions | kubectl logs -n kube-system deployment/vpa-admission-controller |
| VPA conflicts with HPA | Both autoscalers targeting same resource | Use HPA for CPU-based scaling, VPA for memory, or separate workloads |
| Resource updates causing downtime | Insufficient replicas during updates | Set minReplicas in updatePolicy, use "Initial" mode for critical services |
Next steps
- Implement Kubernetes workload rightsizing with VPA recommendations and cost analysis
- Configure Kubernetes cluster autoscaler with mixed instance types for cost optimization
- Set up Kubernetes custom metrics autoscaling with Prometheus adapter for application-specific scaling
- Configure Kubernetes resource quotas and limit ranges for namespace-level resource management
- Monitor Kubernetes cluster with Prometheus Operator for comprehensive observability
Running this in production?
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# Production-grade Kubernetes VPA Installation Script
# Supports Ubuntu, Debian, AlmaLinux, Rocky Linux, CentOS, RHEL, Amazon Linux, Fedora
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Default values
NAMESPACE="kube-system"
VPA_VERSION="latest"
CLEANUP_ON_EXIT=false
usage() {
echo "Usage: $0 [OPTIONS]"
echo "Options:"
echo " --namespace NAME VPA installation namespace (default: kube-system)"
echo " --vpa-version VER VPA version to install (default: latest)"
echo " --cleanup-on-fail Clean up on failure (default: false)"
echo " -h, --help Show this help"
exit 1
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--namespace) NAMESPACE="$2"; shift 2 ;;
--vpa-version) VPA_VERSION="$2"; shift 2 ;;
--cleanup-on-fail) CLEANUP_ON_EXIT=true; shift ;;
-h|--help) usage ;;
*) echo -e "${RED}Unknown option: $1${NC}"; usage ;;
esac
done
# Detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian) PKG_MGR="apt"; PKG_INSTALL="apt install -y"; PKG_UPDATE="apt update" ;;
almalinux|rocky|centos|rhel|ol|fedora) PKG_MGR="dnf"; PKG_INSTALL="dnf install -y"; PKG_UPDATE="dnf check-update" || true ;;
amzn) PKG_MGR="yum"; PKG_INSTALL="yum install -y"; PKG_UPDATE="yum check-update" || true ;;
*) echo -e "${RED}Unsupported distro: $ID${NC}"; exit 1 ;;
esac
else
echo -e "${RED}/etc/os-release not found. Cannot detect distribution.${NC}"
exit 1
fi
log_info() { echo -e "${GREEN}$1${NC}"; }
log_warn() { echo -e "${YELLOW}$1${NC}"; }
log_error() { echo -e "${RED}$1${NC}"; }
# Cleanup function
cleanup() {
if [ "$CLEANUP_ON_EXIT" = true ]; then
log_warn "Cleaning up on failure..."
kubectl delete -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml --ignore-not-found=true 2>/dev/null || true
rm -rf /tmp/autoscaler 2>/dev/null || true
fi
}
# Set trap for cleanup on error
trap cleanup ERR
check_prerequisites() {
log_info "[1/8] Checking prerequisites..."
# Check if running as root or with sudo
if [[ $EUID -ne 0 ]] && ! sudo -n true 2>/dev/null; then
log_error "This script requires root privileges or sudo access"
exit 1
fi
# Check for kubectl
if ! command -v kubectl &> /dev/null; then
log_error "kubectl is not installed or not in PATH"
exit 1
fi
# Check cluster connectivity
if ! kubectl cluster-info &> /dev/null; then
log_error "Cannot connect to Kubernetes cluster"
exit 1
fi
# Check cluster version compatibility
KUBE_VERSION=$(kubectl version --short --client=false -o json 2>/dev/null | grep -o '"gitVersion":"[^"]*' | cut -d'"' -f4 | sed 's/v//' || echo "0.0.0")
if [[ $(echo "$KUBE_VERSION" | cut -d. -f2) -lt 16 ]]; then
log_warn "Kubernetes version $KUBE_VERSION may not be fully compatible with VPA"
fi
log_info "Prerequisites check passed"
}
install_dependencies() {
log_info "[2/8] Installing system dependencies..."
if [[ $EUID -eq 0 ]]; then
$PKG_UPDATE 2>/dev/null || true
$PKG_INSTALL git curl wget
else
sudo $PKG_UPDATE 2>/dev/null || true
sudo $PKG_INSTALL git curl wget
fi
}
check_metrics_server() {
log_info "[3/8] Checking metrics-server installation..."
if kubectl get deployment metrics-server -n kube-system &> /dev/null; then
log_info "Metrics-server is already installed"
# Verify it's running
if ! kubectl wait --for=condition=available --timeout=30s deployment/metrics-server -n kube-system &> /dev/null; then
log_warn "Metrics-server exists but is not ready"
fi
else
log_info "Installing metrics-server..."
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
log_info "Waiting for metrics-server to be ready..."
kubectl wait --for=condition=available --timeout=300s deployment/metrics-server -n kube-system
fi
# Verify metrics are available
sleep 10
if ! kubectl top nodes &> /dev/null; then
log_warn "Metrics-server is running but metrics may not be available yet"
fi
}
clone_vpa_repository() {
log_info "[4/8] Cloning VPA repository..."
cd /tmp
if [ -d "autoscaler" ]; then
rm -rf autoscaler
fi
git clone --depth=1 https://github.com/kubernetes/autoscaler.git
cd autoscaler/vertical-pod-autoscaler
}
install_vpa_components() {
log_info "[5/8] Installing VPA components..."
# Make the install script executable
chmod 755 hack/vpa-install.sh
# Run the VPA installation
./hack/vpa-install.sh
# Wait for VPA components to be ready
log_info "Waiting for VPA components to be ready..."
sleep 15
kubectl wait --for=condition=available --timeout=300s deployment/vpa-admission-controller -n kube-system || true
kubectl wait --for=condition=available --timeout=300s deployment/vpa-recommender -n kube-system || true
kubectl wait --for=condition=available --timeout=300s deployment/vpa-updater -n kube-system || true
}
verify_vpa_crds() {
log_info "[6/8] Verifying VPA Custom Resource Definitions..."
# Check if VPA CRDs are installed
if ! kubectl get crd | grep verticalpodautoscaler &> /dev/null; then
log_error "VPA CRDs not found"
exit 1
fi
# Verify API resources are available
if ! kubectl api-resources | grep verticalpodautoscaler &> /dev/null; then
log_error "VPA API resources not available"
exit 1
fi
log_info "VPA CRDs verified successfully"
}
create_sample_resources() {
log_info "[7/8] Creating sample VPA resources for testing..."
# Create a simple deployment for VPA testing
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: vpa-test-app
namespace: default
spec:
replicas: 1
selector:
matchLabels:
app: vpa-test-app
template:
metadata:
labels:
app: vpa-test-app
spec:
containers:
- name: nginx
image: nginx:1.25
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "200m"
memory: "256Mi"
ports:
- containerPort: 80
EOF
# Create a VPA in recommendation mode
cat <<EOF | kubectl apply -f -
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: vpa-test-recommender
namespace: default
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: vpa-test-app
updatePolicy:
updateMode: "Off"
resourcePolicy:
containerPolicies:
- containerName: nginx
minAllowed:
cpu: 50m
memory: 64Mi
maxAllowed:
cpu: 500m
memory: 512Mi
controlledResources: ["cpu", "memory"]
EOF
log_info "Sample VPA resources created successfully"
}
verify_installation() {
log_info "[8/8] Verifying VPA installation..."
# Check VPA pods are running
local vpa_pods=$(kubectl get pods -n kube-system | grep vpa | grep Running | wc -l)
if [ "$vpa_pods" -lt 3 ]; then
log_error "Not all VPA components are running"
kubectl get pods -n kube-system | grep vpa
exit 1
fi
# Check VPA deployments
local vpa_deployments=$(kubectl get deployment -n kube-system | grep vpa | grep -E "1/1|2/2|3/3" | wc -l)
if [ "$vpa_deployments" -lt 3 ]; then
log_error "Not all VPA deployments are ready"
kubectl get deployment -n kube-system | grep vpa
exit 1
fi
# Wait a moment for the VPA to start processing
sleep 30
# Check if VPA recommendations are being generated
if kubectl get vpa vpa-test-recommender -n default -o yaml | grep -q "recommendation:"; then
log_info "VPA is generating recommendations successfully"
else
log_warn "VPA recommendations not yet available (this is normal and may take a few minutes)"
fi
log_info "VPA installation completed successfully!"
echo ""
log_info "Next steps:"
echo "1. Monitor VPA recommendations: kubectl describe vpa vpa-test-recommender -n default"
echo "2. View VPA status: kubectl get vpa -A"
echo "3. Check VPA components: kubectl get pods -n kube-system | grep vpa"
echo "4. To enable automatic updates, change updateMode from 'Off' to 'Auto'"
echo ""
log_warn "Remember to monitor your applications after enabling automatic VPA updates"
}
main() {
log_info "Starting Kubernetes VPA installation..."
check_prerequisites
install_dependencies
check_metrics_server
clone_vpa_repository
install_vpa_components
verify_vpa_crds
create_sample_resources
verify_installation
# Cleanup temporary files
rm -rf /tmp/autoscaler
log_info "VPA installation completed successfully!"
}
main "$@"
Review the script before running. Execute with: bash install.sh