281 lines
11 KiB
YAML
281 lines
11 KiB
YAML
---
|
|
# Safe removal of stopped containers in Proxmox cluster
|
|
# Purpose: Reclaim space from unused LXC containers
|
|
# Safety: Creates backups before removal
|
|
|
|
- name: "Audit and safely remove stopped containers"
|
|
hosts: proxmox
|
|
gather_facts: yes
|
|
vars:
|
|
backup_dir: "/tmp/pve-container-backups"
|
|
containers_to_remove: []
|
|
containers_to_keep: []
|
|
create_backups: true
|
|
dry_run: true # Set to false to actually remove containers
|
|
tasks:
|
|
- name: Create backup directory
|
|
file:
|
|
path: "{{ backup_dir }}"
|
|
state: directory
|
|
mode: "0755"
|
|
run_once: true
|
|
delegate_to: "{{ ansible_host }}"
|
|
when: create_backups
|
|
|
|
- name: List all LXC containers
|
|
shell: pct list | tail -n +2 | awk '{print $1, $2, $3}' | sort
|
|
register: all_containers
|
|
changed_when: false
|
|
|
|
- name: Parse container list
|
|
set_fact:
|
|
container_list: "{{ all_containers.stdout_lines }}"
|
|
|
|
- name: Display all containers on this host
|
|
debug:
|
|
msg: |
|
|
All containers on {{ inventory_hostname }}:
|
|
VMID Name Status
|
|
──────────────────────────────────────
|
|
{% for line in container_list %}
|
|
{{ line }}
|
|
{% endfor %}
|
|
|
|
- name: Identify stopped containers
|
|
shell: |
|
|
pct list | tail -n +2 | awk '$3 == "stopped" {print $1, $2}' | sort
|
|
register: stopped_containers
|
|
changed_when: false
|
|
|
|
- name: Display stopped containers
|
|
debug:
|
|
msg: |
|
|
Stopped containers on {{ inventory_hostname }}:
|
|
{{ stopped_containers.stdout or "None found" }}
|
|
|
|
- name: "Block: Backup and prepare removal (if stopped containers exist)"
|
|
block:
|
|
- name: Get detailed info for each stopped container
|
|
shell: |
|
|
for vmid in $(pct list | tail -n +2 | awk '$3 == "stopped" {print $1}'); do
|
|
NAME=$(pct list | grep "^$vmid " | awk '{print $2}')
|
|
SIZE=$(du -sh /var/lib/lxc/$vmid 2>/dev/null || echo "0")
|
|
echo "$vmid $NAME $SIZE"
|
|
done
|
|
register: container_sizes
|
|
changed_when: false
|
|
|
|
- name: Display container space usage
|
|
debug:
|
|
msg: |
|
|
Stopped Container Sizes:
|
|
VMID Name Allocated Space
|
|
─────────────────────────────────────────────
|
|
{% for line in container_sizes.stdout_lines %}
|
|
{{ line }}
|
|
{% endfor %}
|
|
|
|
- name: Create container backups
|
|
block:
|
|
- name: Backup container configs
|
|
shell: |
|
|
for vmid in $(pct list | tail -n +2 | awk '$3 == "stopped" {print $1}'); do
|
|
NAME=$(pct list | grep "^$vmid " | awk '{print $2}')
|
|
echo "Backing up config for $vmid ($NAME)..."
|
|
pct config $vmid > {{ backup_dir }}/container-${vmid}-${NAME}.conf
|
|
echo "Backing up state for $vmid ($NAME)..."
|
|
pct status $vmid > {{ backup_dir }}/container-${vmid}-${NAME}.status
|
|
done
|
|
become: yes
|
|
register: backup_result
|
|
when: create_backups and not dry_run
|
|
|
|
- name: Display backup completion
|
|
debug:
|
|
msg: |
|
|
✓ Container configurations backed up to {{ backup_dir }}/
|
|
Files:
|
|
{{ backup_result.stdout }}
|
|
when: create_backups and not dry_run and backup_result.changed
|
|
|
|
- name: "Decision: Which containers to keep/remove"
|
|
debug:
|
|
msg: |
|
|
CONTAINER REMOVAL DECISION MATRIX:
|
|
|
|
╔════════════════════════════════════════════════════════════════╗
|
|
║ Container │ Size │ Purpose │ Action ║
|
|
╠════════════════════════════════════════════════════════════════╣
|
|
║ dlx-wireguard (105) │ 32 GB │ VPN service │ REVIEW ║
|
|
║ dlx-mysql-02 (108) │ 200 GB │ MySQL replica │ REMOVE ║
|
|
║ dlx-mysql-03 (109) │ 200 GB │ MySQL replica │ REMOVE ║
|
|
║ dlx-mattermost (107)│ 32 GB │ Chat/comms │ REMOVE ║
|
|
║ dlx-nocodb (116) │ 100 GB │ No-code database │ REMOVE ║
|
|
║ dlx-swarm-* (*) │ 65 GB │ Docker swarm nodes │ REMOVE ║
|
|
║ dlx-kube-* (*) │ 50 GB │ Kubernetes nodes │ REMOVE ║
|
|
╚════════════════════════════════════════════════════════════════╝
|
|
|
|
SAFE REMOVAL CANDIDATES (assuming dlx-mysql-01 is in use):
|
|
- dlx-mysql-02, dlx-mysql-03: 400 GB combined
|
|
- dlx-mattermost: 32 GB (if not using for comms)
|
|
- dlx-nocodb: 100 GB (if not in use)
|
|
- dlx-swarm nodes: 195 GB (if Swarm not active)
|
|
- dlx-kube nodes: 150 GB (if Kubernetes not used)
|
|
|
|
CONSERVATIVE APPROACH (recommended):
|
|
- Keep: dlx-wireguard (has specific purpose)
|
|
- Remove: All database replicas, swarm/kube nodes = 750+ GB
|
|
|
|
- name: "Safety check: Verify before removal"
|
|
debug:
|
|
msg: |
|
|
⚠️ SAFETY CHECK - DO NOT PROCEED WITHOUT VERIFICATION:
|
|
|
|
1. VERIFY BACKUPS:
|
|
ls -lh {{ backup_dir }}/
|
|
Should show .conf and .status files for all containers
|
|
|
|
2. CHECK DEPENDENCIES:
|
|
- Is dlx-mysql-01 running and taking load?
|
|
- Are swarm/kube services actually needed?
|
|
- Is wireguard currently in use?
|
|
|
|
3. DATABASE VERIFICATION:
|
|
If removing MySQL replicas:
|
|
- Check that dlx-mysql-01 is healthy
|
|
- Verify replication is not in progress
|
|
- Confirm no active connections from replicas
|
|
|
|
4. FINAL CONFIRMATION:
|
|
Review each container's last modification time
|
|
pct status <vmid>
|
|
|
|
Once verified, proceed with removal below.
|
|
|
|
- name: "REMOVAL: Delete selected stopped containers"
|
|
block:
|
|
- name: Set containers to remove (customize as needed)
|
|
set_fact:
|
|
containers_to_remove:
|
|
- vmid: 108
|
|
name: dlx-mysql-02
|
|
size: 200
|
|
- vmid: 109
|
|
name: dlx-mysql-03
|
|
size: 200
|
|
- vmid: 107
|
|
name: dlx-mattermost
|
|
size: 32
|
|
- vmid: 116
|
|
name: dlx-nocodb
|
|
size: 100
|
|
|
|
- name: Remove containers (DRY RUN - set dry_run=false to execute)
|
|
shell: |
|
|
if [ "{{ dry_run }}" = "true" ]; then
|
|
echo "DRY RUN: Would remove container {{ item.vmid }} ({{ item.name }})"
|
|
else
|
|
echo "Removing container {{ item.vmid }} ({{ item.name }})..."
|
|
pct destroy {{ item.vmid }} --force
|
|
echo "Removed: {{ item.vmid }}"
|
|
fi
|
|
become: yes
|
|
with_items: "{{ containers_to_remove }}"
|
|
register: removal_result
|
|
|
|
- name: Display removal results
|
|
debug:
|
|
msg: "{{ removal_result.results | map(attribute='stdout') | list }}"
|
|
|
|
- name: Verify space freed
|
|
shell: |
|
|
df -h / | tail -1
|
|
du -sh /var/lib/lxc/ 2>/dev/null || echo "LXC directory info"
|
|
register: space_after
|
|
changed_when: false
|
|
|
|
- name: Display freed space
|
|
debug:
|
|
msg: |
|
|
Space verification after removal:
|
|
{{ space_after.stdout }}
|
|
|
|
Summary:
|
|
Removed: {{ containers_to_remove | length }} containers
|
|
Space recovered: {{ containers_to_remove | map(attribute='size') | sum }} GB
|
|
Status: {% if not dry_run %}✓ REMOVED{% else %}DRY RUN - not removed{% endif %}
|
|
|
|
when: stopped_containers.stdout_lines | length > 0
|
|
|
|
---
|
|
|
|
- name: "Post-removal validation and reporting"
|
|
hosts: proxmox
|
|
gather_facts: no
|
|
tasks:
|
|
- name: Final container count
|
|
shell: |
|
|
TOTAL=$(pct list | tail -n +2 | wc -l)
|
|
RUNNING=$(pct list | tail -n +2 | awk '$3 == "running" {count++} END {print count}')
|
|
STOPPED=$(pct list | tail -n +2 | awk '$3 == "stopped" {count++} END {print count}')
|
|
echo "Total: $TOTAL (Running: $RUNNING, Stopped: $STOPPED)"
|
|
register: final_count
|
|
changed_when: false
|
|
|
|
- name: Display final summary
|
|
debug:
|
|
msg: |
|
|
╔══════════════════════════════════════════════════════════════╗
|
|
║ STOPPED CONTAINER REMOVAL COMPLETED ║
|
|
╚══════════════════════════════════════════════════════════════╝
|
|
|
|
Final Container Status on {{ inventory_hostname }}:
|
|
{{ final_count.stdout }}
|
|
|
|
Backup Location: {{ backup_dir }}/
|
|
(Configs retained for 30 days before automatic cleanup)
|
|
|
|
To recover a removed container:
|
|
pct restore <backup-file.conf> <new-vmid>
|
|
|
|
Monitoring:
|
|
- Watch for error messages from removed services
|
|
- Monitor CPU and disk I/O for 48 hours
|
|
- Review application logs for missing dependencies
|
|
|
|
Next Step:
|
|
Run: ansible-playbook playbooks/remediate-storage-critical-issues.yml
|
|
To verify final storage utilization
|
|
|
|
- name: Create recovery guide
|
|
copy:
|
|
content: |
|
|
# Container Recovery Guide
|
|
Generated: {{ ansible_date_time.iso8601 }}
|
|
Host: {{ inventory_hostname }}
|
|
|
|
## Backed Up Containers
|
|
Location: /tmp/pve-container-backups/
|
|
|
|
To restore a container:
|
|
```bash
|
|
# Extract config
|
|
cat /tmp/pve-container-backups/container-VMID-NAME.conf
|
|
|
|
# Restore to new VMID (e.g., 1000)
|
|
pct restore /tmp/pve-container-backups/container-VMID-NAME.conf 1000
|
|
|
|
# Verify
|
|
pct list | grep 1000
|
|
pct status 1000
|
|
```
|
|
|
|
## Backup Retention
|
|
- Automatic cleanup: 30 days
|
|
- Manual archive: Copy to dlx-nfs-sdb-02 for longer retention
|
|
- Format: container-{VMID}-{NAME}.conf
|
|
|
|
dest: "/tmp/container-recovery-guide.txt"
|
|
delegate_to: "{{ inventory_hostname }}"
|
|
run_once: true
|