Set up automated daily backups for ScyllaDB with monitoring, restore procedures, and retention policies. Includes snapshot management, S3 storage integration, and comprehensive alerting for production NoSQL environments.
Prerequisites
- ScyllaDB cluster installed and running
- S3-compatible storage bucket
- SMTP server for email notifications
- Python 3.6+ with pip
- AWS CLI configured
What this solves
ScyllaDB requires reliable backup strategies to protect against data loss and enable disaster recovery. This tutorial configures automated daily backups using ScyllaDB's built-in snapshot capabilities, integrates with S3-compatible storage, and implements monitoring with email alerts. You'll set up backup retention policies, automated restore procedures, and comprehensive logging for production environments.
Step-by-step installation
Update system packages
Start by updating your package manager to ensure you have the latest security patches and package versions.
sudo apt update && sudo apt upgrade -y
Install backup dependencies
Install the AWS CLI for S3 integration, Python for backup scripts, and email utilities for notifications.
sudo apt install -y awscli python3-pip python3-venv mailutils jq curl
Create backup directories and user
Create dedicated directories for backup scripts and temporary storage with proper ownership.
sudo mkdir -p /opt/scylladb-backup/{scripts,logs,temp}
sudo useradd -r -s /bin/bash -d /opt/scylladb-backup scyllabackup
sudo chown -R scyllabackup:scyllabackup /opt/scylladb-backup
sudo chmod 755 /opt/scylladb-backup
sudo chmod 750 /opt/scylladb-backup/{scripts,logs,temp}
Configure AWS credentials
Set up AWS credentials for S3 backup storage. Create a configuration file with your S3 access details.
sudo -u scyllabackup mkdir -p /opt/scylladb-backup/.aws
[default]
aws_access_key_id = YOUR_ACCESS_KEY
aws_secret_access_key = YOUR_SECRET_KEY
[default]
region = us-east-1
output = json
sudo chown -R scyllabackup:scyllabackup /opt/scylladb-backup/.aws
sudo chmod 600 /opt/scylladb-backup/.aws/{credentials,config}
Create backup configuration file
Define backup settings including retention policies, S3 bucket details, and notification preferences.
# ScyllaDB Backup Configuration
SCYLLA_HOST="127.0.0.1"
SCYLLA_PORT="10000"
KEYSPACES="mykeyspace1 mykeyspace2"
S3 Configuration
S3_BUCKET="my-scylla-backups"
S3_PREFIX="scylladb/backups"
S3_STORAGE_CLASS="STANDARD_IA"
Retention Settings
LOCAL_RETENTION_DAYS=3
S3_RETENTION_DAYS=30
Email Notifications
EMAIL_ENABLED=true
EMAIL_TO="admin@example.com"
EMAIL_FROM="scylla-backup@example.com"
SMTP_SERVER="localhost"
Backup Settings
COMPRESSION=true
PARALLEL_UPLOADS=4
MAX_BACKUP_SIZE_GB=100
sudo chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/backup-config.conf
sudo chmod 640 /opt/scylladb-backup/scripts/backup-config.conf
Create backup script
Develop the main backup script that handles snapshots, compression, and S3 upload with error handling.
#!/usr/bin/env python3
import os
import sys
import subprocess
import datetime
import json
import logging
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import configparser
import shutil
import glob
Load configuration
config = configparser.ConfigParser()
config.read('/opt/scylladb-backup/scripts/backup-config.conf')
Setup logging
log_file = f"/opt/scylladb-backup/logs/backup-{datetime.datetime.now().strftime('%Y%m%d')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
class ScyllaBackup:
def __init__(self):
self.scylla_host = config.get('DEFAULT', 'SCYLLA_HOST')
self.scylla_port = config.get('DEFAULT', 'SCYLLA_PORT')
self.keyspaces = config.get('DEFAULT', 'KEYSPACES').split()
self.s3_bucket = config.get('DEFAULT', 'S3_BUCKET')
self.s3_prefix = config.get('DEFAULT', 'S3_PREFIX')
self.backup_dir = '/opt/scylladb-backup/temp'
self.timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
def send_notification(self, subject, message, is_error=False):
if not config.getboolean('DEFAULT', 'EMAIL_ENABLED'):
return
try:
msg = MIMEMultipart()
msg['From'] = config.get('DEFAULT', 'EMAIL_FROM')
msg['To'] = config.get('DEFAULT', 'EMAIL_TO')
msg['Subject'] = f"ScyllaDB Backup: {subject}"
body = f"Backup timestamp: {self.timestamp}\n\n{message}"
msg.attach(MIMEText(body, 'plain'))
server = smtplib.SMTP(config.get('DEFAULT', 'SMTP_SERVER'))
server.sendmail(msg['From'], msg['To'], msg.as_string())
server.quit()
except Exception as e:
logger.error(f"Failed to send email notification: {e}")
def create_snapshot(self, keyspace):
snapshot_name = f"backup_{self.timestamp}"
try:
cmd = f"nodetool -h {self.scylla_host} -p {self.scylla_port} snapshot -t {snapshot_name} {keyspace}"
result = subprocess.run(cmd.split(), capture_output=True, text=True, check=True)
logger.info(f"Created snapshot {snapshot_name} for keyspace {keyspace}")
return snapshot_name
except subprocess.CalledProcessError as e:
logger.error(f"Failed to create snapshot for {keyspace}: {e.stderr}")
raise
def find_snapshot_files(self, keyspace, snapshot_name):
data_dir = "/var/lib/scylla/data"
snapshot_files = []
keyspace_dir = os.path.join(data_dir, keyspace)
if not os.path.exists(keyspace_dir):
logger.warning(f"Keyspace directory not found: {keyspace_dir}")
return snapshot_files
for table_dir in os.listdir(keyspace_dir):
snapshot_path = os.path.join(keyspace_dir, table_dir, "snapshots", snapshot_name)
if os.path.exists(snapshot_path):
for file in os.listdir(snapshot_path):
snapshot_files.append(os.path.join(snapshot_path, file))
return snapshot_files
def compress_and_upload(self, keyspace, snapshot_name, files):
if not files:
logger.warning(f"No files found for keyspace {keyspace}")
return False
archive_name = f"{keyspace}_{snapshot_name}.tar.gz"
archive_path = os.path.join(self.backup_dir, archive_name)
try:
# Create compressed archive
cmd = ["tar", "-czf", archive_path] + files
subprocess.run(cmd, check=True)
logger.info(f"Created archive: {archive_name}")
# Upload to S3
s3_key = f"{self.s3_prefix}/{datetime.datetime.now().strftime('%Y/%m/%d')}/{archive_name}"
upload_cmd = [
"aws", "s3", "cp", archive_path, f"s3://{self.s3_bucket}/{s3_key}",
"--storage-class", config.get('DEFAULT', 'S3_STORAGE_CLASS')
]
subprocess.run(upload_cmd, check=True, env={**os.environ, 'HOME': '/opt/scylladb-backup'})
logger.info(f"Uploaded to S3: s3://{self.s3_bucket}/{s3_key}")
# Clean up local archive
os.remove(archive_path)
return True
except subprocess.CalledProcessError as e:
logger.error(f"Failed to compress/upload {keyspace}: {e}")
return False
def cleanup_snapshots(self):
try:
cmd = f"nodetool -h {self.scylla_host} -p {self.scylla_port} clearsnapshot"
subprocess.run(cmd.split(), check=True)
logger.info("Cleaned up old snapshots")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to cleanup snapshots: {e}")
def cleanup_old_backups(self):
retention_days = int(config.get('DEFAULT', 'S3_RETENTION_DAYS'))
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
try:
# List old S3 objects
list_cmd = [
"aws", "s3api", "list-objects-v2",
"--bucket", self.s3_bucket,
"--prefix", self.s3_prefix,
"--query", f"Contents[?LastModified<'{cutoff_date.isoformat()}']"
]
result = subprocess.run(list_cmd, capture_output=True, text=True, check=True,
env={**os.environ, 'HOME': '/opt/scylladb-backup'})
old_objects = json.loads(result.stdout)
if old_objects:
for obj in old_objects:
delete_cmd = ["aws", "s3", "rm", f"s3://{self.s3_bucket}/{obj['Key']}"]
subprocess.run(delete_cmd, check=True,
env={**os.environ, 'HOME': '/opt/scylladb-backup'})
logger.info(f"Deleted old backup: {obj['Key']}")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to cleanup old backups: {e}")
def run_backup(self):
start_time = datetime.datetime.now()
success_count = 0
total_keyspaces = len(self.keyspaces)
logger.info(f"Starting backup for keyspaces: {', '.join(self.keyspaces)}")
try:
for keyspace in self.keyspaces:
try:
snapshot_name = self.create_snapshot(keyspace)
files = self.find_snapshot_files(keyspace, snapshot_name)
if self.compress_and_upload(keyspace, snapshot_name, files):
success_count += 1
except Exception as e:
logger.error(f"Failed to backup keyspace {keyspace}: {e}")
# Cleanup
self.cleanup_snapshots()
self.cleanup_old_backups()
# Calculate duration
duration = datetime.datetime.now() - start_time
# Send notification
if success_count == total_keyspaces:
subject = "Backup Completed Successfully"
message = f"All {total_keyspaces} keyspaces backed up successfully.\nDuration: {duration}"
logger.info(message)
self.send_notification(subject, message)
else:
subject = "Backup Completed with Errors"
message = f"Backed up {success_count}/{total_keyspaces} keyspaces.\nDuration: {duration}\nCheck logs for details."
logger.warning(message)
self.send_notification(subject, message, is_error=True)
except Exception as e:
subject = "Backup Failed"
message = f"Backup process failed: {str(e)}"
logger.error(message)
self.send_notification(subject, message, is_error=True)
sys.exit(1)
if __name__ == "__main__":
backup = ScyllaBackup()
backup.run_backup()
sudo chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/scylla-backup.py
sudo chmod 750 /opt/scylladb-backup/scripts/scylla-backup.py
Create restore script
Develop a restore script for disaster recovery scenarios with keyspace and timestamp selection.
#!/usr/bin/env python3
import os
import sys
import subprocess
import datetime
import json
import logging
import argparse
import configparser
import tempfile
Load configuration
config = configparser.ConfigParser()
config.read('/opt/scylladb-backup/scripts/backup-config.conf')
Setup logging
log_file = f"/opt/scylladb-backup/logs/restore-{datetime.datetime.now().strftime('%Y%m%d')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
class ScyllaRestore:
def __init__(self):
self.scylla_host = config.get('DEFAULT', 'SCYLLA_HOST')
self.scylla_port = config.get('DEFAULT', 'SCYLLA_PORT')
self.s3_bucket = config.get('DEFAULT', 'S3_BUCKET')
self.s3_prefix = config.get('DEFAULT', 'S3_PREFIX')
self.temp_dir = tempfile.mkdtemp(prefix='scylla-restore-')
def list_available_backups(self, keyspace=None):
try:
prefix = self.s3_prefix
if keyspace:
prefix = f"{self.s3_prefix}//{keyspace}_"
list_cmd = [
"aws", "s3api", "list-objects-v2",
"--bucket", self.s3_bucket,
"--prefix", self.s3_prefix,
"--query", "Contents[?contains(Key, '.tar.gz')]"
]
result = subprocess.run(list_cmd, capture_output=True, text=True, check=True,
env={**os.environ, 'HOME': '/opt/scylladb-backup'})
backups = json.loads(result.stdout)
if backups:
print("Available backups:")
for backup in sorted(backups, key=lambda x: x['LastModified'], reverse=True):
key = backup['Key']
size = backup['Size'] / (1024*1024) # MB
date = backup['LastModified'][:19]
print(f" {key} ({size:.1f}MB, {date})")
else:
print("No backups found")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to list backups: {e}")
return []
def download_backup(self, s3_key):
local_path = os.path.join(self.temp_dir, os.path.basename(s3_key))
try:
download_cmd = [
"aws", "s3", "cp", f"s3://{self.s3_bucket}/{s3_key}", local_path
]
subprocess.run(download_cmd, check=True,
env={**os.environ, 'HOME': '/opt/scylladb-backup'})
logger.info(f"Downloaded backup: {s3_key}")
return local_path
except subprocess.CalledProcessError as e:
logger.error(f"Failed to download backup {s3_key}: {e}")
return None
def extract_backup(self, archive_path, keyspace):
extract_dir = os.path.join(self.temp_dir, f"extract_{keyspace}")
os.makedirs(extract_dir, exist_ok=True)
try:
extract_cmd = ["tar", "-xzf", archive_path, "-C", extract_dir]
subprocess.run(extract_cmd, check=True)
logger.info(f"Extracted backup for keyspace {keyspace}")
return extract_dir
except subprocess.CalledProcessError as e:
logger.error(f"Failed to extract backup: {e}")
return None
def restore_keyspace(self, keyspace, backup_date, dry_run=False):
# Find matching backup
s3_key = f"{self.s3_prefix}/{backup_date[:4]}/{backup_date[4:6]}/{backup_date[6:8]}/{keyspace}_backup_{backup_date}.tar.gz"
if dry_run:
print(f"[DRY RUN] Would restore {keyspace} from {s3_key}")
return True
try:
# Download backup
archive_path = self.download_backup(s3_key)
if not archive_path:
return False
# Extract backup
extract_dir = self.extract_backup(archive_path, keyspace)
if not extract_dir:
return False
# Stop ScyllaDB for restoration
logger.info("Stopping ScyllaDB service")
subprocess.run(["sudo", "systemctl", "stop", "scylla-server"], check=True)
# Copy restored files to data directory
data_dir = f"/var/lib/scylla/data/{keyspace}"
if os.path.exists(data_dir):
subprocess.run(["sudo", "rm", "-rf", data_dir], check=True)
subprocess.run(["sudo", "cp", "-r", extract_dir, data_dir], check=True)
subprocess.run(["sudo", "chown", "-R", "scylla:scylla", data_dir], check=True)
# Start ScyllaDB
logger.info("Starting ScyllaDB service")
subprocess.run(["sudo", "systemctl", "start", "scylla-server"], check=True)
# Wait for service to be ready
import time
time.sleep(30)
# Verify restoration
verify_cmd = f"cqlsh -e 'DESCRIBE KEYSPACE {keyspace};'"
result = subprocess.run(verify_cmd.split(), capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"Successfully restored keyspace {keyspace}")
return True
else:
logger.error(f"Restoration verification failed for {keyspace}")
return False
except subprocess.CalledProcessError as e:
logger.error(f"Failed to restore keyspace {keyspace}: {e}")
return False
finally:
# Cleanup
if os.path.exists(self.temp_dir):
subprocess.run(["rm", "-rf", self.temp_dir])
def cleanup(self):
if os.path.exists(self.temp_dir):
subprocess.run(["rm", "-rf", self.temp_dir])
def main():
parser = argparse.ArgumentParser(description='ScyllaDB Restore Tool')
parser.add_argument('--list', action='store_true', help='List available backups')
parser.add_argument('--keyspace', help='Keyspace to restore')
parser.add_argument('--date', help='Backup date (YYYYMMDD_HHMMSS)')
parser.add_argument('--dry-run', action='store_true', help='Show what would be restored')
args = parser.parse_args()
restore = ScyllaRestore()
try:
if args.list:
restore.list_available_backups(args.keyspace)
elif args.keyspace and args.date:
success = restore.restore_keyspace(args.keyspace, args.date, args.dry_run)
sys.exit(0 if success else 1)
else:
parser.print_help()
finally:
restore.cleanup()
if __name__ == "__main__":
main()
sudo chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/scylla-restore.py
sudo chmod 750 /opt/scylladb-backup/scripts/scylla-restore.py
Set up automated cron scheduling
Configure daily backup execution with proper logging and error handling using systemd timers.
[Unit]
Description=ScyllaDB Backup Service
After=scylla-server.service
Requires=scylla-server.service
[Service]
Type=oneshot
User=scyllabackup
Group=scyllabackup
ExecStart=/usr/bin/python3 /opt/scylladb-backup/scripts/scylla-backup.py
WorkingDirectory=/opt/scylladb-backup
Environment=HOME=/opt/scylladb-backup
StandardOutput=append:/opt/scylladb-backup/logs/systemd.log
StandardError=append:/opt/scylladb-backup/logs/systemd.log
[Unit]
Description=ScyllaDB Backup Timer
Requires=scylla-backup.service
[Timer]
OnCalendar=daily
RandomizedDelaySec=3600
Persistent=true
[Install]
WantedBy=timers.target
sudo systemctl daemon-reload
sudo systemctl enable scylla-backup.timer
sudo systemctl start scylla-backup.timer
Configure monitoring and health checks
Set up monitoring script to verify backup completion and send alerts for failures.
#!/usr/bin/env python3
import os
import sys
import datetime
import subprocess
import json
import configparser
import smtplib
from email.mime.text import MIMEText
config = configparser.ConfigParser()
config.read('/opt/scylladb-backup/scripts/backup-config.conf')
def check_last_backup():
try:
# Check for recent backups in S3
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
date_prefix = f"{config.get('DEFAULT', 'S3_PREFIX')}/{yesterday.strftime('%Y/%m/%d')}"
list_cmd = [
"aws", "s3api", "list-objects-v2",
"--bucket", config.get('DEFAULT', 'S3_BUCKET'),
"--prefix", date_prefix
]
result = subprocess.run(list_cmd, capture_output=True, text=True, check=True,
env={**os.environ, 'HOME': '/opt/scylladb-backup'})
objects = json.loads(result.stdout)
if 'Contents' not in objects or len(objects['Contents']) == 0:
send_alert("No backups found", f"No backups found for {yesterday.strftime('%Y-%m-%d')}")
return False
# Check log file for errors
log_file = f"/opt/scylladb-backup/logs/backup-{yesterday.strftime('%Y%m%d')}.log"
if os.path.exists(log_file):
with open(log_file, 'r') as f:
log_content = f.read()
if 'ERROR' in log_content or 'Failed' in log_content:
send_alert("Backup errors detected", f"Check log file: {log_file}")
return False
return True
except Exception as e:
send_alert("Backup monitoring failed", str(e))
return False
def send_alert(subject, message):
if not config.getboolean('DEFAULT', 'EMAIL_ENABLED'):
return
try:
msg = MIMEText(f"ScyllaDB Backup Alert\n\n{message}")
msg['Subject'] = f"ScyllaDB Alert: {subject}"
msg['From'] = config.get('DEFAULT', 'EMAIL_FROM')
msg['To'] = config.get('DEFAULT', 'EMAIL_TO')
server = smtplib.SMTP(config.get('DEFAULT', 'SMTP_SERVER'))
server.sendmail(msg['From'], msg['To'], msg.as_string())
server.quit()
except Exception as e:
print(f"Failed to send alert: {e}")
if __name__ == "__main__":
if not check_last_backup():
sys.exit(1)
sudo chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/backup-monitor.py
sudo chmod 750 /opt/scylladb-backup/scripts/backup-monitor.py
Set up log rotation
Configure logrotate to manage backup log files and prevent disk space issues.
/opt/scylladb-backup/logs/*.log {
daily
missingok
rotate 30
compress
delaycompress
notifempty
copytruncate
su scyllabackup scyllabackup
}
Verify your setup
Test the backup system to ensure all components work correctly.
# Check backup service status
sudo systemctl status scylla-backup.timer
sudo systemctl list-timers scylla-backup.timer
Test backup script manually
sudo -u scyllabackup /usr/bin/python3 /opt/scylladb-backup/scripts/scylla-backup.py
Verify S3 connectivity
sudo -u scyllabackup aws s3 ls s3://your-bucket-name/
Test restore script (list backups)
sudo -u scyllabackup /usr/bin/python3 /opt/scylladb-backup/scripts/scylla-restore.py --list
Check log files
sudo tail -f /opt/scylladb-backup/logs/backup-$(date +%Y%m%d).log
Common issues
| Symptom | Cause | Fix |
|---|---|---|
| Permission denied on backup files | Incorrect file ownership | sudo chown -R scyllabackup:scyllabackup /opt/scylladb-backup |
| AWS CLI command fails | Missing credentials or region | Check /opt/scylladb-backup/.aws/config and credentials file |
| Backup timer not running | Systemd timer not enabled | sudo systemctl enable --now scylla-backup.timer |
| Snapshot creation fails | ScyllaDB not running or accessible | sudo systemctl status scylla-server and check host/port configuration |
| S3 upload fails | Network issues or wrong bucket permissions | Test with aws s3 ls s3://bucket-name/ and verify IAM permissions |
| Email notifications not sent | SMTP server configuration issues | Test mail server with echo "test" | mail -s "test" user@example.com |
| Large backup files consume disk space | Temp directory not cleaned | Add cleanup in backup script and monitor /opt/scylladb-backup/temp |
Next steps
- Configure MinIO backup and disaster recovery with automated snapshots and replication
- Setup remote backup storage with S3-compatible encryption and automated retention policies
- Configure ScyllaDB cluster monitoring with Prometheus and Grafana dashboards
- Implement ScyllaDB disaster recovery with cross-region replication
- Setup ScyllaDB backup validation and automated restore testing
Automated install script
Run this to automate the entire setup
#!/usr/bin/env bash
set -euo pipefail
# ScyllaDB Backup and Restore Automation Installer
# Production-quality script for multiple Linux distributions
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Default configuration
S3_BUCKET="${1:-}"
AWS_ACCESS_KEY="${2:-}"
AWS_SECRET_KEY="${3:-}"
EMAIL_TO="${4:-admin@localhost}"
usage() {
echo "Usage: $0 <s3_bucket> <aws_access_key> <aws_secret_key> [email@domain.com]"
echo "Example: $0 my-scylla-backups AKIAIOSFODNN7EXAMPLE wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY admin@example.com"
exit 1
}
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}
warn() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
}
cleanup() {
if [ $? -ne 0 ]; then
error "Installation failed. Cleaning up..."
systemctl stop scylladb-backup.timer 2>/dev/null || true
systemctl disable scylladb-backup.timer 2>/dev/null || true
rm -f /etc/systemd/system/scylladb-backup.* 2>/dev/null || true
userdel -r scyllabackup 2>/dev/null || true
rm -rf /opt/scylladb-backup 2>/dev/null || true
fi
}
trap cleanup ERR
# Validate arguments
if [ $# -lt 3 ] || [ -z "$S3_BUCKET" ] || [ -z "$AWS_ACCESS_KEY" ] || [ -z "$AWS_SECRET_KEY" ]; then
usage
fi
# Check if running as root
if [ "$EUID" -ne 0 ]; then
error "This script must be run as root"
exit 1
fi
# Auto-detect distribution
if [ -f /etc/os-release ]; then
. /etc/os-release
case "$ID" in
ubuntu|debian)
PKG_MGR="apt"
PKG_UPDATE="apt update"
PKG_INSTALL="apt install -y"
MAIL_PKG="mailutils"
;;
almalinux|rocky|centos|rhel|ol|fedora)
PKG_MGR="dnf"
PKG_UPDATE="dnf update -y"
PKG_INSTALL="dnf install -y"
MAIL_PKG="mailx"
;;
amzn)
PKG_MGR="yum"
PKG_UPDATE="yum update -y"
PKG_INSTALL="yum install -y"
MAIL_PKG="mailx"
;;
*)
error "Unsupported distribution: $ID"
exit 1
;;
esac
else
error "Cannot detect Linux distribution"
exit 1
fi
log "[1/8] Updating system packages..."
$PKG_UPDATE
log "[2/8] Installing backup dependencies..."
$PKG_INSTALL awscli python3-pip $MAIL_PKG jq curl
# Install Python packages
pip3 install boto3 configparser
log "[3/8] Creating backup directories and user..."
mkdir -p /opt/scylladb-backup/{scripts,logs,temp}
useradd -r -s /bin/bash -d /opt/scylladb-backup scyllabackup 2>/dev/null || true
chown -R scyllabackup:scyllabackup /opt/scylladb-backup
chmod 755 /opt/scylladb-backup
chmod 750 /opt/scylladb-backup/{scripts,logs,temp}
log "[4/8] Configuring AWS credentials..."
sudo -u scyllabackup mkdir -p /opt/scylladb-backup/.aws
cat > /opt/scylladb-backup/.aws/credentials << EOF
[default]
aws_access_key_id = $AWS_ACCESS_KEY
aws_secret_access_key = $AWS_SECRET_KEY
EOF
cat > /opt/scylladb-backup/.aws/config << EOF
[default]
region = us-east-1
output = json
EOF
chown -R scyllabackup:scyllabackup /opt/scylladb-backup/.aws
chmod 600 /opt/scylladb-backup/.aws/{credentials,config}
log "[5/8] Creating backup configuration..."
cat > /opt/scylladb-backup/scripts/backup-config.conf << EOF
[DEFAULT]
SCYLLA_HOST=127.0.0.1
SCYLLA_PORT=10000
KEYSPACES=system_schema
S3_BUCKET=$S3_BUCKET
S3_PREFIX=scylladb/backups
S3_STORAGE_CLASS=STANDARD_IA
LOCAL_RETENTION_DAYS=3
S3_RETENTION_DAYS=30
EMAIL_ENABLED=true
EMAIL_TO=$EMAIL_TO
EMAIL_FROM=scylla-backup@$(hostname -f)
SMTP_SERVER=localhost
COMPRESSION=true
PARALLEL_UPLOADS=4
MAX_BACKUP_SIZE_GB=100
EOF
chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/backup-config.conf
chmod 640 /opt/scylladb-backup/scripts/backup-config.conf
log "[6/8] Creating backup script..."
cat > /opt/scylladb-backup/scripts/backup.py << 'EOF'
#!/usr/bin/env python3
import os
import sys
import subprocess
import datetime
import json
import logging
import configparser
import shutil
import glob
import time
# Set AWS credentials path
os.environ['HOME'] = '/opt/scylladb-backup'
config = configparser.ConfigParser()
config.read('/opt/scylladb-backup/scripts/backup-config.conf')
log_file = f"/opt/scylladb-backup/logs/backup-{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}.log"
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[logging.FileHandler(log_file), logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)
def run_command(cmd):
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=3600)
if result.returncode != 0:
logger.error(f"Command failed: {cmd}")
logger.error(f"Error: {result.stderr}")
return False, result.stderr
return True, result.stdout
except subprocess.TimeoutExpired:
logger.error(f"Command timed out: {cmd}")
return False, "Timeout"
def create_snapshot():
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
snapshot_name = f"backup_{timestamp}"
logger.info(f"Creating snapshot: {snapshot_name}")
# Create snapshot using nodetool
success, output = run_command(f"nodetool snapshot -t {snapshot_name}")
if not success:
logger.error("Failed to create snapshot")
return None
return snapshot_name
def upload_to_s3(local_path, s3_key):
bucket = config.get('DEFAULT', 'S3_BUCKET')
storage_class = config.get('DEFAULT', 'S3_STORAGE_CLASS')
cmd = f"aws s3 cp {local_path} s3://{bucket}/{s3_key} --storage-class {storage_class}"
success, output = run_command(cmd)
if success:
logger.info(f"Uploaded {local_path} to s3://{bucket}/{s3_key}")
return True
else:
logger.error(f"Failed to upload {local_path}")
return False
def cleanup_old_backups():
retention_days = int(config.get('DEFAULT', 'LOCAL_RETENTION_DAYS'))
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
temp_dir = '/opt/scylladb-backup/temp'
for item in os.listdir(temp_dir):
item_path = os.path.join(temp_dir, item)
if os.path.isdir(item_path):
item_time = datetime.datetime.fromtimestamp(os.path.getctime(item_path))
if item_time < cutoff_date:
shutil.rmtree(item_path)
logger.info(f"Removed old backup: {item}")
def main():
logger.info("Starting ScyllaDB backup process")
try:
snapshot_name = create_snapshot()
if not snapshot_name:
sys.exit(1)
# Find snapshot directories
data_dirs = ['/var/lib/scylla/data']
for data_dir in data_dirs:
if not os.path.exists(data_dir):
continue
for keyspace in os.listdir(data_dir):
keyspace_path = os.path.join(data_dir, keyspace)
if not os.path.isdir(keyspace_path):
continue
for table in os.listdir(keyspace_path):
table_path = os.path.join(keyspace_path, table)
snapshot_path = os.path.join(table_path, 'snapshots', snapshot_name)
if os.path.exists(snapshot_path):
# Create tar archive
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
archive_name = f"{keyspace}_{table}_{timestamp}.tar.gz"
archive_path = f"/opt/scylladb-backup/temp/{archive_name}"
cmd = f"tar -czf {archive_path} -C {snapshot_path} ."
success, _ = run_command(cmd)
if success:
s3_key = f"{config.get('DEFAULT', 'S3_PREFIX')}/{keyspace}/{archive_name}"
upload_to_s3(archive_path, s3_key)
os.remove(archive_path)
# Clear snapshot
run_command(f"nodetool clearsnapshot -t {snapshot_name}")
# Cleanup old backups
cleanup_old_backups()
logger.info("Backup completed successfully")
except Exception as e:
logger.error(f"Backup failed: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
main()
EOF
chmod 755 /opt/scylladb-backup/scripts/backup.py
chown scyllabackup:scyllabackup /opt/scylladb-backup/scripts/backup.py
log "[7/8] Creating systemd service and timer..."
cat > /etc/systemd/system/scylladb-backup.service << EOF
[Unit]
Description=ScyllaDB Backup Service
After=scylla-server.service
[Service]
Type=oneshot
User=scyllabackup
Group=scyllabackup
WorkingDirectory=/opt/scylladb-backup
ExecStart=/usr/bin/python3 /opt/scylladb-backup/scripts/backup.py
StandardOutput=journal
StandardError=journal
EOF
cat > /etc/systemd/system/scylladb-backup.timer << EOF
[Unit]
Description=ScyllaDB Backup Timer
Requires=scylladb-backup.service
[Timer]
OnCalendar=daily
Persistent=true
RandomizedDelaySec=1800
[Install]
WantedBy=timers.target
EOF
systemctl daemon-reload
systemctl enable scylladb-backup.timer
systemctl start scylladb-backup.timer
log "[8/8] Verifying installation..."
if systemctl is-active --quiet scylladb-backup.timer; then
log "✓ Backup timer is active"
else
error "✗ Backup timer is not active"
exit 1
fi
if [ -f /opt/scylladb-backup/scripts/backup.py ] && [ -x /opt/scylladb-backup/scripts/backup.py ]; then
log "✓ Backup script is executable"
else
error "✗ Backup script is not properly installed"
exit 1
fi
if id scyllabackup &>/dev/null; then
log "✓ Backup user created successfully"
else
error "✗ Backup user creation failed"
exit 1
fi
log "ScyllaDB backup automation installed successfully!"
log "Backups will run daily and upload to S3 bucket: $S3_BUCKET"
log "Backup logs location: /opt/scylladb-backup/logs/"
log "Configuration file: /opt/scylladb-backup/scripts/backup-config.conf"
log "To run a manual backup: sudo -u scyllabackup python3 /opt/scylladb-backup/scripts/backup.py"
log "To check timer status: systemctl status scylladb-backup.timer"
Review the script before running. Execute with: bash install.sh