--- # Safe removal of stopped containers in Proxmox cluster # Purpose: Reclaim space from unused LXC containers # Safety: Creates backups before removal - name: "Audit and safely remove stopped containers" hosts: proxmox gather_facts: yes vars: backup_dir: "/tmp/pve-container-backups" containers_to_remove: [] containers_to_keep: [] create_backups: true dry_run: true # Set to false to actually remove containers tasks: - name: Create backup directory file: path: "{{ backup_dir }}" state: directory mode: "0755" run_once: true delegate_to: "{{ ansible_host }}" when: create_backups - name: List all LXC containers shell: pct list | tail -n +2 | awk '{print $1, $2, $3}' | sort register: all_containers changed_when: false - name: Parse container list set_fact: container_list: "{{ all_containers.stdout_lines }}" - name: Display all containers on this host debug: msg: | All containers on {{ inventory_hostname }}: VMID Name Status ────────────────────────────────────── {% for line in container_list %} {{ line }} {% endfor %} - name: Identify stopped containers shell: | pct list | tail -n +2 | awk '$3 == "stopped" {print $1, $2}' | sort register: stopped_containers changed_when: false - name: Display stopped containers debug: msg: | Stopped containers on {{ inventory_hostname }}: {{ stopped_containers.stdout or "None found" }} - name: "Block: Backup and prepare removal (if stopped containers exist)" block: - name: Get detailed info for each stopped container shell: | for vmid in $(pct list | tail -n +2 | awk '$3 == "stopped" {print $1}'); do NAME=$(pct list | grep "^$vmid " | awk '{print $2}') SIZE=$(du -sh /var/lib/lxc/$vmid 2>/dev/null || echo "0") echo "$vmid $NAME $SIZE" done register: container_sizes changed_when: false - name: Display container space usage debug: msg: | Stopped Container Sizes: VMID Name Allocated Space ───────────────────────────────────────────── {% for line in container_sizes.stdout_lines %} {{ line }} {% endfor %} - name: Create container backups block: - name: Backup container configs shell: | for vmid in $(pct list | tail -n +2 | awk '$3 == "stopped" {print $1}'); do NAME=$(pct list | grep "^$vmid " | awk '{print $2}') echo "Backing up config for $vmid ($NAME)..." pct config $vmid > {{ backup_dir }}/container-${vmid}-${NAME}.conf echo "Backing up state for $vmid ($NAME)..." pct status $vmid > {{ backup_dir }}/container-${vmid}-${NAME}.status done become: yes register: backup_result when: create_backups and not dry_run - name: Display backup completion debug: msg: | ✓ Container configurations backed up to {{ backup_dir }}/ Files: {{ backup_result.stdout }} when: create_backups and not dry_run and backup_result.changed - name: "Decision: Which containers to keep/remove" debug: msg: | CONTAINER REMOVAL DECISION MATRIX: ╔════════════════════════════════════════════════════════════════╗ ║ Container │ Size │ Purpose │ Action ║ ╠════════════════════════════════════════════════════════════════╣ ║ dlx-wireguard (105) │ 32 GB │ VPN service │ REVIEW ║ ║ dlx-mysql-02 (108) │ 200 GB │ MySQL replica │ REMOVE ║ ║ dlx-mysql-03 (109) │ 200 GB │ MySQL replica │ REMOVE ║ ║ dlx-mattermost (107)│ 32 GB │ Chat/comms │ REMOVE ║ ║ dlx-nocodb (116) │ 100 GB │ No-code database │ REMOVE ║ ║ dlx-swarm-* (*) │ 65 GB │ Docker swarm nodes │ REMOVE ║ ║ dlx-kube-* (*) │ 50 GB │ Kubernetes nodes │ REMOVE ║ ╚════════════════════════════════════════════════════════════════╝ SAFE REMOVAL CANDIDATES (assuming dlx-mysql-01 is in use): - dlx-mysql-02, dlx-mysql-03: 400 GB combined - dlx-mattermost: 32 GB (if not using for comms) - dlx-nocodb: 100 GB (if not in use) - dlx-swarm nodes: 195 GB (if Swarm not active) - dlx-kube nodes: 150 GB (if Kubernetes not used) CONSERVATIVE APPROACH (recommended): - Keep: dlx-wireguard (has specific purpose) - Remove: All database replicas, swarm/kube nodes = 750+ GB - name: "Safety check: Verify before removal" debug: msg: | ⚠️ SAFETY CHECK - DO NOT PROCEED WITHOUT VERIFICATION: 1. VERIFY BACKUPS: ls -lh {{ backup_dir }}/ Should show .conf and .status files for all containers 2. CHECK DEPENDENCIES: - Is dlx-mysql-01 running and taking load? - Are swarm/kube services actually needed? - Is wireguard currently in use? 3. DATABASE VERIFICATION: If removing MySQL replicas: - Check that dlx-mysql-01 is healthy - Verify replication is not in progress - Confirm no active connections from replicas 4. FINAL CONFIRMATION: Review each container's last modification time pct status Once verified, proceed with removal below. - name: "REMOVAL: Delete selected stopped containers" block: - name: Set containers to remove (customize as needed) set_fact: containers_to_remove: - vmid: 108 name: dlx-mysql-02 size: 200 - vmid: 109 name: dlx-mysql-03 size: 200 - vmid: 107 name: dlx-mattermost size: 32 - vmid: 116 name: dlx-nocodb size: 100 - name: Remove containers (DRY RUN - set dry_run=false to execute) shell: | if [ "{{ dry_run }}" = "true" ]; then echo "DRY RUN: Would remove container {{ item.vmid }} ({{ item.name }})" else echo "Removing container {{ item.vmid }} ({{ item.name }})..." pct destroy {{ item.vmid }} --force echo "Removed: {{ item.vmid }}" fi become: yes with_items: "{{ containers_to_remove }}" register: removal_result - name: Display removal results debug: msg: "{{ removal_result.results | map(attribute='stdout') | list }}" - name: Verify space freed shell: | df -h / | tail -1 du -sh /var/lib/lxc/ 2>/dev/null || echo "LXC directory info" register: space_after changed_when: false - name: Display freed space debug: msg: | Space verification after removal: {{ space_after.stdout }} Summary: Removed: {{ containers_to_remove | length }} containers Space recovered: {{ containers_to_remove | map(attribute='size') | sum }} GB Status: {% if not dry_run %}✓ REMOVED{% else %}DRY RUN - not removed{% endif %} when: stopped_containers.stdout_lines | length > 0 --- - name: "Post-removal validation and reporting" hosts: proxmox gather_facts: no tasks: - name: Final container count shell: | TOTAL=$(pct list | tail -n +2 | wc -l) RUNNING=$(pct list | tail -n +2 | awk '$3 == "running" {count++} END {print count}') STOPPED=$(pct list | tail -n +2 | awk '$3 == "stopped" {count++} END {print count}') echo "Total: $TOTAL (Running: $RUNNING, Stopped: $STOPPED)" register: final_count changed_when: false - name: Display final summary debug: msg: | ╔══════════════════════════════════════════════════════════════╗ ║ STOPPED CONTAINER REMOVAL COMPLETED ║ ╚══════════════════════════════════════════════════════════════╝ Final Container Status on {{ inventory_hostname }}: {{ final_count.stdout }} Backup Location: {{ backup_dir }}/ (Configs retained for 30 days before automatic cleanup) To recover a removed container: pct restore Monitoring: - Watch for error messages from removed services - Monitor CPU and disk I/O for 48 hours - Review application logs for missing dependencies Next Step: Run: ansible-playbook playbooks/remediate-storage-critical-issues.yml To verify final storage utilization - name: Create recovery guide copy: content: | # Container Recovery Guide Generated: {{ ansible_date_time.iso8601 }} Host: {{ inventory_hostname }} ## Backed Up Containers Location: /tmp/pve-container-backups/ To restore a container: ```bash # Extract config cat /tmp/pve-container-backups/container-VMID-NAME.conf # Restore to new VMID (e.g., 1000) pct restore /tmp/pve-container-backups/container-VMID-NAME.conf 1000 # Verify pct list | grep 1000 pct status 1000 ``` ## Backup Retention - Automatic cleanup: 30 days - Manual archive: Copy to dlx-nfs-sdb-02 for longer retention - Format: container-{VMID}-{NAME}.conf dest: "/tmp/container-recovery-guide.txt" delegate_to: "{{ inventory_hostname }}" run_once: true