dlx-ansible/playbooks/remediate-docker-storage.yml

285 lines
9.6 KiB
YAML

---
# Detailed Docker storage cleanup for proxmox-01 dlx-docker container
# Targets: proxmox-01 host and dlx-docker LXC container
# Purpose: Reduce dlx-docker storage utilization from 81% to <75%
- name: "Cleanup Docker storage on proxmox-01"
hosts: proxmox-01
gather_facts: yes
vars:
docker_host_ip: "192.168.200.200"
docker_mount_point: "/mnt/pve/dlx-docker"
cleanup_dry_run: false # Set to false to actually remove items
min_free_space_gb: 100 # Target at least 100 GB free
tasks:
- name: Pre-flight checks
block:
- name: Verify Docker is accessible
shell: docker --version
register: docker_version
changed_when: false
- name: Display Docker version
debug:
msg: "Docker installed: {{ docker_version.stdout }}"
- name: Get dlx-docker mount point info
shell: df {{ docker_mount_point }} | tail -1
register: mount_info
changed_when: false
- name: Parse current utilization
set_fact:
docker_disk_usage: "{{ mount_info.stdout.split()[4] | int }}"
docker_disk_total: "{{ mount_info.stdout.split()[1] | int }}"
vars:
# Extract percentage without % sign
- name: Display current utilization
debug:
msg: |
Docker Storage Status:
Mount: {{ docker_mount_point }}
Usage: {{ mount_info.stdout }}
- name: "Phase 1: Analyze Docker resource usage"
block:
- name: Get container disk usage
shell: |
docker ps -a --format "table {{.Names}}\t{{.State}}\t{{.Size}}" | \
awk 'NR>1 {size=$3; gsub("kB|MB|GB","",size); print $1, $2, $3}'
register: container_sizes
changed_when: false
- name: Display container sizes
debug:
msg: |
Container Disk Usage:
{{ container_sizes.stdout }}
- name: Get image disk usage
shell: docker images --format "table {{.Repository}}\t{{.Size}}" | sort -k2 -hr
register: image_sizes
changed_when: false
- name: Display image sizes
debug:
msg: |
Docker Image Sizes:
{{ image_sizes.stdout }}
- name: Find dangling resources
block:
- name: Count dangling images
shell: docker images -f dangling=true -q | wc -l
register: dangling_count
changed_when: false
- name: Count unused volumes
shell: docker volume ls -f dangling=true -q | wc -l
register: volume_count
changed_when: false
- name: Display dangling resources
debug:
msg: |
Dangling Resources:
- Dangling images: {{ dangling_count.stdout }} found
- Dangling volumes: {{ volume_count.stdout }} found
- name: "Phase 2: Remove unused resources"
block:
- name: Remove dangling images
shell: docker image prune -f
register: image_prune
when: not cleanup_dry_run
- name: Display pruned images
debug:
msg: "{{ image_prune.stdout }}"
when: not cleanup_dry_run and image_prune.changed
- name: Remove dangling volumes
shell: docker volume prune -f
register: volume_prune
when: not cleanup_dry_run
- name: Display pruned volumes
debug:
msg: "{{ volume_prune.stdout }}"
when: not cleanup_dry_run and volume_prune.changed
- name: Remove unused networks
shell: docker network prune -f
register: network_prune
when: not cleanup_dry_run
failed_when: false
- name: Remove build cache
shell: docker builder prune -f -a
register: cache_prune
when: not cleanup_dry_run
failed_when: false # May not be available in older Docker
- name: Run full system prune (aggressive)
shell: docker system prune -a -f --volumes
register: system_prune
when: not cleanup_dry_run
- name: Display system prune result
debug:
msg: "{{ system_prune.stdout }}"
when: not cleanup_dry_run
- name: "Phase 3: Verify cleanup results"
block:
- name: Get updated Docker stats
shell: docker system df
register: docker_after
changed_when: false
- name: Display Docker stats after cleanup
debug:
msg: |
Docker Stats After Cleanup:
{{ docker_after.stdout }}
- name: Get updated mount usage
shell: df {{ docker_mount_point }} | tail -1
register: mount_after
changed_when: false
- name: Display mount usage after
debug:
msg: "Mount usage after: {{ mount_after.stdout }}"
- name: "Phase 4: Identify additional cleanup candidates"
block:
- name: Find stopped containers
shell: docker ps -f status=exited -q
register: stopped_containers
changed_when: false
- name: Find containers older than 30 days
shell: |
docker ps -a --format "{{.CreatedAt}}\t{{.ID}}\t{{.Names}}" | \
awk -v cutoff=$(date -d '30 days ago' '+%Y-%m-%d') \
'{if ($1 < cutoff) print $2, $3}' | head -5
register: old_containers
changed_when: false
- name: Display cleanup candidates
debug:
msg: |
Additional Cleanup Candidates:
Stopped containers ({{ stopped_containers.stdout_lines | length }}):
{{ stopped_containers.stdout }}
Containers older than 30 days:
{{ old_containers.stdout or "None found" }}
To remove stopped containers:
docker container prune -f
- name: "Phase 5: Space verification and summary"
block:
- name: Final space check
shell: |
TOTAL=$(df {{ docker_mount_point }} | tail -1 | awk '{print $2}')
USED=$(df {{ docker_mount_point }} | tail -1 | awk '{print $3}')
AVAIL=$(df {{ docker_mount_point }} | tail -1 | awk '{print $4}')
PCT=$(df {{ docker_mount_point }} | tail -1 | awk '{print $5}' | sed 's/%//')
echo "Total: $((TOTAL/1024))GB Used: $((USED/1024))GB Available: $((AVAIL/1024))GB Percentage: $PCT%"
register: final_space
changed_when: false
- name: Display final status
debug:
msg: |
╔══════════════════════════════════════════════════════════════╗
║ DOCKER STORAGE CLEANUP COMPLETED ║
╚══════════════════════════════════════════════════════════════╝
Final Status: {{ final_space.stdout }}
Target: <75% utilization
{% if docker_disk_usage|int < 75 %}
✓ TARGET MET
{% else %}
⚠️ TARGET NOT MET - May need manual cleanup of large images/containers
{% endif %}
Next Steps:
1. Monitor for 24 hours to ensure stability
2. Schedule weekly cleanup: docker system prune -af
3. Configure log rotation to prevent regrowth
4. Consider storing large images on dlx-nfs-* storage
If still >80%:
- Review running container logs (docker logs -f <id> | wc -l)
- Migrate large containers to separate storage
- Archive old build artifacts and analysis data
- name: "Configure automatic Docker cleanup on proxmox-01"
hosts: proxmox-01
gather_facts: yes
tasks:
- name: Create Docker cleanup cron job
cron:
name: "Weekly Docker system prune"
weekday: "0" # Sunday
hour: "2"
minute: "0"
job: "docker system prune -af --volumes >> /var/log/docker-cleanup.log 2>&1"
user: root
- name: Create cleanup log rotation
copy:
content: |
/var/log/docker-cleanup.log {
daily
rotate 7
compress
missingok
notifempty
}
dest: /etc/logrotate.d/docker-cleanup
become: yes
- name: Set up disk usage monitoring
copy:
content: |
#!/bin/bash
# Monitor Docker storage utilization
THRESHOLD=80
USAGE=$(df /mnt/pve/dlx-docker | tail -1 | awk '{print $5}' | sed 's/%//')
if [ $USAGE -gt $THRESHOLD ]; then
echo "WARNING: dlx-docker storage at ${USAGE}%" | \
logger -t docker-monitor -p local0.warning
# Could send alert here
fi
dest: /usr/local/bin/check-docker-storage.sh
mode: "0755"
become: yes
- name: Add monitoring to crontab
cron:
name: "Check Docker storage hourly"
hour: "*"
minute: "0"
job: "/usr/local/bin/check-docker-storage.sh"
user: root
- name: Display automation setup
debug:
msg: |
✓ Configured automatic Docker cleanup
- Weekly prune: Every Sunday at 02:00 UTC
- Hourly monitoring: Checks storage usage
- Log rotation: Daily rotation with 7-day retention
View cleanup logs:
tail -f /var/log/docker-cleanup.log