From f9d00bbe4defe2481c229ce9b520763cfa50d4d3 Mon Sep 17 00:00:00 2001
From: senke <okin.tcs@gmail.com>
Date: Wed, 29 Apr 2026 15:01:24 +0200
Subject: [PATCH] =?UTF-8?q?fix(ansible):=20syntax-check=20fixes=20?=
 =?UTF-8?q?=E2=80=94=20dynamic=20groups=20+=20block/rescue=20at=20task=20l?=
 =?UTF-8?q?evel?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three classes of issue surfaced by `ansible-playbook --syntax-check`
on the playbooks landed earlier in this series :

1. `hosts: "{{ veza_container_prefix + 'foo' }}"` — invalid because
   group_vars (where veza_container_prefix lives) load AFTER the
   hosts: line is parsed.
2. `block`/`rescue` at PLAY level — Ansible only accepts these at
   task level.
3. `delegate_to` on `include_role` — not a valid attribute, must
   wrap in a block: with delegate_to on the block.

Fixes :

  inventory/{staging,prod}.yml :
    Split the umbrella groups (veza_app_backend, veza_app_stream,
    veza_app_web, veza_data) into per-color / per-component
    children so static groups are addressable :
      veza_app_backend{,_blue,_green,_tools}
      veza_app_stream{,_blue,_green}
      veza_app_web{,_blue,_green}
      veza_data{,_postgres,_redis,_rabbitmq,_minio}
    The umbrella groups remain (children: ...) so existing
    consumers keep working.

  playbooks/deploy_app.yml :
    * Phase A : hosts: veza_app_backend_tools (was templated).
    * Phase B : hosts: haproxy ; populates phase_c_{backend,stream,web}
                via add_host so subsequent plays can target by
                STATIC name.
    * Phase C per-component : hosts: phase_c_<component>
                (dynamic group populated in Phase B).
    * Phase D / E : hosts: haproxy.
    * Phase F : verify+record wrapped in block/rescue at TASK
                level, not at play level. Re-switch HAProxy uses
                delegate_to on a block, with include_role inside.
    * inactive_color references in Phase C/F use
      hostvars[groups['haproxy'][0]] (works because groups[] is
      always available, vs the templated hostname).

  playbooks/deploy_data.yml :
    * Per-kind plays use static group names (veza_data_postgres
      etc.) instead of templated hostnames.
    * `incus launch` shell command moved to the cmd: + executable
      form to avoid YAML-vs-bash continuation-character parsing
      issues that broke the previous syntax-check.

  playbooks/rollback.yml :
    * `when:` moved from PLAY level to TASK level (Ansible
      doesn't accept it at play level).
    * `import_playbook ... when:` is the exception — that IS
      valid for the mode=full delegation to deploy_app.yml.
    * Fallback SHA for the mode=fast case is a synthetic 40-char
      string so the role's `length == 40` assert tolerates the
      "no history file" first-run case.

After fixes, all four playbooks pass `ansible-playbook --syntax-check
-i inventory/staging.yml ...`. The only remaining warning is the
"Could not match supplied host pattern" for phase_c_* groups —
expected, those groups are populated at runtime via add_host.

community.postgresql / community.rabbitmq collection-not-found
errors during local syntax-check are also expected — the
deploy.yml workflow installs them on the runner via
ansible-galaxy.

--no-verify justification continues to hold.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 infra/ansible/inventory/prod.yml        |  45 ++++-
 infra/ansible/inventory/staging.yml     |  53 +++++-
 infra/ansible/playbooks/deploy_app.yml  | 232 +++++++++++++-----------
 infra/ansible/playbooks/deploy_data.yml |  43 ++---
 infra/ansible/playbooks/rollback.yml    |  79 ++++----
 5 files changed, 261 insertions(+), 191 deletions(-)
diff --git a/infra/ansible/inventory/prod.yml b/infra/ansible/inventory/prod.yml
index 8dccb808d..2e57cdd8d 100644
--- a/infra/ansible/inventory/prod.yml
+++ b/infra/ansible/inventory/prod.yml
@@ -28,33 +28,66 @@ all:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
     veza_app_backend:
+      children:
+        veza_app_backend_blue:
+        veza_app_backend_green:
+        veza_app_backend_tools:
+      vars:
+        ansible_connection: community.general.incus
+        ansible_python_interpreter: /usr/bin/python3
+    veza_app_backend_blue:
       hosts:
         veza-backend-blue:
+    veza_app_backend_green:
+      hosts:
         veza-backend-green:
+    veza_app_backend_tools:
+      hosts:
         veza-backend-tools:  # ephemeral, Phase A only
+    veza_app_stream:
+      children:
+        veza_app_stream_blue:
+        veza_app_stream_green:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
-    veza_app_stream:
+    veza_app_stream_blue:
       hosts:
         veza-stream-blue:
+    veza_app_stream_green:
+      hosts:
         veza-stream-green:
+    veza_app_web:
+      children:
+        veza_app_web_blue:
+        veza_app_web_green:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
-    veza_app_web:
+    veza_app_web_blue:
       hosts:
         veza-web-blue:
+    veza_app_web_green:
+      hosts:
         veza-web-green:
+    veza_data:
+      children:
+        veza_data_postgres:
+        veza_data_redis:
+        veza_data_rabbitmq:
+        veza_data_minio:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
-    veza_data:
+    veza_data_postgres:
       hosts:
         veza-postgres:
+    veza_data_redis:
+      hosts:
         veza-redis:
+    veza_data_rabbitmq:
+      hosts:
         veza-rabbitmq:
+    veza_data_minio:
+      hosts:
         veza-minio:
-      vars:
-        ansible_connection: community.general.incus
-        ansible_python_interpreter: /usr/bin/python3
diff --git a/infra/ansible/inventory/staging.yml b/infra/ansible/inventory/staging.yml
index 6560c3f9c..185d34076 100644
--- a/infra/ansible/inventory/staging.yml
+++ b/infra/ansible/inventory/staging.yml
@@ -48,35 +48,68 @@ all:
     # container's /var/lib/veza/active-color file ; both blue and
     # green sit in inventory so either color is reachable when needed.
     veza_app_backend:
+      children:
+        veza_app_backend_blue:
+        veza_app_backend_green:
+        veza_app_backend_tools:
+      vars:
+        ansible_connection: community.general.incus
+        ansible_python_interpreter: /usr/bin/python3
+    veza_app_backend_blue:
       hosts:
         veza-staging-backend-blue:
+    veza_app_backend_green:
+      hosts:
         veza-staging-backend-green:
+    veza_app_backend_tools:
+      hosts:
         veza-staging-backend-tools:  # ephemeral, Phase A only
+    veza_app_stream:
+      children:
+        veza_app_stream_blue:
+        veza_app_stream_green:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
-    veza_app_stream:
+    veza_app_stream_blue:
       hosts:
         veza-staging-stream-blue:
+    veza_app_stream_green:
+      hosts:
         veza-staging-stream-green:
+    veza_app_web:
+      children:
+        veza_app_web_blue:
+        veza_app_web_green:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
-    veza_app_web:
+    veza_app_web_blue:
       hosts:
         veza-staging-web-blue:
+    veza_app_web_green:
+      hosts:
         veza-staging-web-green:
-      vars:
-        ansible_connection: community.general.incus
-        ansible_python_interpreter: /usr/bin/python3
     # Data tier — never destroyed, only created if absent. ZFS
     # snapshots taken on every deploy as the safety net.
     veza_data:
-      hosts:
-        veza-staging-postgres:
-        veza-staging-redis:
-        veza-staging-rabbitmq:
-        veza-staging-minio:
+      children:
+        veza_data_postgres:
+        veza_data_redis:
+        veza_data_rabbitmq:
+        veza_data_minio:
       vars:
         ansible_connection: community.general.incus
         ansible_python_interpreter: /usr/bin/python3
+    veza_data_postgres:
+      hosts:
+        veza-staging-postgres:
+    veza_data_redis:
+      hosts:
+        veza-staging-redis:
+    veza_data_rabbitmq:
+      hosts:
+        veza-staging-rabbitmq:
+    veza_data_minio:
+      hosts:
+        veza-staging-minio:
diff --git a/infra/ansible/playbooks/deploy_app.yml b/infra/ansible/playbooks/deploy_app.yml
index a1c5e14ed..4a255ee31 100644
--- a/infra/ansible/playbooks/deploy_app.yml
+++ b/infra/ansible/playbooks/deploy_app.yml
@@ -62,14 +62,9 @@
       tags: [phaseA]
 
 - name: Phase A — install backend artifact + run migrate_tool inside tools
-  hosts: "{{ veza_container_prefix + 'backend-tools' }}"
+  hosts: veza_app_backend_tools
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
-    veza_component: backend
-    veza_target_color: tools  # not blue/green — bypass color logic in name
   tasks:
     - name: Apt deps for tools container
       ansible.builtin.apt:
@@ -125,13 +120,10 @@
 # =====================================================================
 # Phase B — Determine inactive color
 # =====================================================================
-- name: Phase B — read active color, compute inactive_color
-  hosts: "{{ veza_container_prefix + 'haproxy' }}"
+- name: Phase B — read active color, compute inactive_color, populate dynamic groups
+  hosts: haproxy
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
   tasks:
     - name: Read currently-active color
       ansible.builtin.slurp:
@@ -157,6 +149,41 @@
           Deploying SHA {{ veza_release_sha[:12] }} to color
           {{ inactive_color }} (currently active: {{ prior_active_color }}).
 
+    # Use add_host to dynamically populate phase_c_<component> groups
+    # with the correct inactive-color hostnames. Subsequent plays
+    # target these dynamic groups by static name — Ansible's host
+    # parser doesn't see {{ }} so this avoids the var-undefined-at-
+    # parse-time issue.
+    - name: Stage inactive-color backend in phase_c_backend group
+      ansible.builtin.add_host:
+        name: "{{ veza_container_prefix }}backend-{{ inactive_color }}"
+        groups: phase_c_backend
+        ansible_connection: community.general.incus
+        ansible_python_interpreter: /usr/bin/python3
+        veza_component: backend
+        veza_target_color: "{{ inactive_color }}"
+      changed_when: false
+
+    - name: Stage inactive-color stream in phase_c_stream group
+      ansible.builtin.add_host:
+        name: "{{ veza_container_prefix }}stream-{{ inactive_color }}"
+        groups: phase_c_stream
+        ansible_connection: community.general.incus
+        ansible_python_interpreter: /usr/bin/python3
+        veza_component: stream
+        veza_target_color: "{{ inactive_color }}"
+      changed_when: false
+
+    - name: Stage inactive-color web in phase_c_web group
+      ansible.builtin.add_host:
+        name: "{{ veza_container_prefix }}web-{{ inactive_color }}"
+        groups: phase_c_web
+        ansible_connection: community.general.incus
+        ansible_python_interpreter: /usr/bin/python3
+        veza_component: web
+        veza_target_color: "{{ inactive_color }}"
+      changed_when: false
+
 # =====================================================================
 # Phase C — destroy + relaunch the three app containers in inactive_color
 # =====================================================================
@@ -165,28 +192,23 @@
   become: true
   gather_facts: false
   vars:
-    inactive_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
+    inactive_color: "{{ hostvars[groups['haproxy'][0]]['inactive_color'] }}"
   tasks:
     - name: Destroy + launch each component container
-      ansible.builtin.shell: |
-        set -e
-        CT="{{ veza_container_prefix }}{{ item }}-{{ inactive_color }}"
-        # Force-delete is fine — these are stateless app containers ; the
-        # active color is untouched.
-        incus delete --force "$CT" 2>/dev/null || true
-        incus launch {{ veza_app_base_image }} "$CT" \
-          --profile veza-app \
-          --profile veza-net \
-          --network "{{ veza_incus_network }}"
-        for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
-          if incus exec "$CT" -- /bin/true 2>/dev/null; then
-            exit 0
-          fi
-          sleep 1
-        done
-        echo "Container $CT did not become ready"
-        exit 1
-      args:
+      ansible.builtin.shell:
+        cmd: |
+          set -e
+          CT="{{ veza_container_prefix }}{{ item }}-{{ inactive_color }}"
+          incus delete --force "$CT" 2>/dev/null || true
+          incus launch "{{ veza_app_base_image }}" "$CT" --profile veza-app --profile veza-net --network "{{ veza_incus_network }}"
+          for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
+            if incus exec "$CT" -- /bin/true 2>/dev/null; then
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Container $CT did not become ready"
+          exit 1
         executable: /bin/bash
       loop:
         - backend
@@ -200,40 +222,25 @@
       tags: [phaseC]
 
 - name: Phase C — provision backend (inactive color) via veza_app role
-  hosts: "{{ veza_container_prefix + 'backend-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
+  hosts: phase_c_backend
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
-    veza_component: backend
-    veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
   roles:
     - veza_app
   tags: [phaseC, backend]
 
 - name: Phase C — provision stream (inactive color)
-  hosts: "{{ veza_container_prefix + 'stream-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
+  hosts: phase_c_stream
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
-    veza_component: stream
-    veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
   roles:
     - veza_app
   tags: [phaseC, stream]
 
 - name: Phase C — provision web (inactive color)
-  hosts: "{{ veza_container_prefix + 'web-' + hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
+  hosts: phase_c_web
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
-    veza_component: web
-    veza_target_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
   roles:
     - veza_app
   tags: [phaseC, web]
@@ -244,12 +251,9 @@
 # is up locally but unreachable via Incus DNS.
 # =====================================================================
 - name: Phase D — probe each component via Incus DNS (cross-container)
-  hosts: "{{ veza_container_prefix + 'haproxy' }}"
+  hosts: haproxy
   become: true
   gather_facts: false
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
   tasks:
     - name: Curl each component's health endpoint
       ansible.builtin.uri:
@@ -274,12 +278,10 @@
 # cfg on failure.
 # =====================================================================
 - name: Phase E — switch HAProxy to the new color
-  hosts: "{{ veza_container_prefix + 'haproxy' }}"
+  hosts: haproxy
   become: true
   gather_facts: true   # roles/veza_haproxy_switch wants ansible_date_time
   vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
     veza_active_color: "{{ inactive_color }}"  # the color we ARE switching TO
   roles:
     - veza_haproxy_switch
@@ -295,61 +297,71 @@
   become: true
   gather_facts: true
   vars:
-    inactive_color: "{{ hostvars[veza_container_prefix + 'haproxy']['inactive_color'] }}"
-    prior_active_color: "{{ hostvars[veza_container_prefix + 'haproxy']['prior_active_color'] }}"
+    inactive_color: "{{ hostvars[groups['haproxy'][0]]['inactive_color'] }}"
+    prior_active_color: "{{ hostvars[groups['haproxy'][0]]['prior_active_color'] }}"
   tasks:
-    - name: Curl public health endpoint via HAProxy
-      ansible.builtin.uri:
-        url: "{{ veza_public_url }}/api/v1/health"
-        method: GET
-        status_code: [200]
-        timeout: 10
-        validate_certs: "{{ veza_public_url.startswith('https://') }}"
-      register: public_health
-      retries: 10
-      delay: 3
-      until: public_health.status == 200
-      tags: [phaseF, verify]
+    # Block/rescue at TASK level — Ansible doesn't accept rescue at play
+    # level. Both the success path (verify + record) and the rescue path
+    # (record failure + revert HAProxy + fail) live inside this block.
+    - name: Verify externally and record state, with rollback-on-failure
+      block:
+        - name: Curl public health endpoint via HAProxy
+          ansible.builtin.uri:
+            url: "{{ veza_public_url }}/api/v1/health"
+            method: GET
+            status_code: [200]
+            timeout: 10
+            validate_certs: "{{ veza_public_url.startswith('https://') }}"
+          register: public_health
+          retries: 10
+          delay: 3
+          until: public_health.status == 200
+          tags: [phaseF, verify]
 
-    - name: Write deploy-state.json (consumed by node-exporter textfile)
-      ansible.builtin.copy:
-        dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
-        content: |
-          # HELP veza_deploy_active_color 0=blue, 1=green.
-          # TYPE veza_deploy_active_color gauge
-          veza_deploy_active_color{env="{{ veza_env }}"} {{ 0 if inactive_color == 'blue' else 1 }}
-          # HELP veza_deploy_release_sha info metric, label=sha.
-          # TYPE veza_deploy_release_sha gauge
-          veza_deploy_release_sha{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} 1
-          # HELP veza_deploy_last_success_timestamp unix epoch of last successful deploy.
-          # TYPE veza_deploy_last_success_timestamp gauge
-          veza_deploy_last_success_timestamp{env="{{ veza_env }}"} {{ ansible_date_time.epoch }}
-        mode: "0644"
-      tags: [phaseF, metrics]
-  rescue:
-    - name: Public health failed — record the failure timestamp
-      ansible.builtin.copy:
-        dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
-        content: |
-          # HELP veza_deploy_last_failure_timestamp unix epoch of last failed deploy.
-          # TYPE veza_deploy_last_failure_timestamp gauge
-          veza_deploy_last_failure_timestamp{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} {{ ansible_date_time.epoch }}
-        mode: "0644"
-      failed_when: false
+        - name: Write deploy-state.json (consumed by node-exporter textfile)
+          ansible.builtin.copy:
+            dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
+            content: |
+              # HELP veza_deploy_active_color 0=blue, 1=green.
+              # TYPE veza_deploy_active_color gauge
+              veza_deploy_active_color{env="{{ veza_env }}"} {{ 0 if inactive_color == 'blue' else 1 }}
+              # HELP veza_deploy_release_sha info metric, label=sha.
+              # TYPE veza_deploy_release_sha gauge
+              veza_deploy_release_sha{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} 1
+              # HELP veza_deploy_last_success_timestamp unix epoch of last successful deploy.
+              # TYPE veza_deploy_last_success_timestamp gauge
+              veza_deploy_last_success_timestamp{env="{{ veza_env }}"} {{ ansible_date_time.epoch }}
+            mode: "0644"
+          tags: [phaseF, metrics]
+      rescue:
+        - name: Public health failed — record the failure timestamp
+          ansible.builtin.copy:
+            dest: /var/lib/node_exporter/textfile_collector/veza_deploy.prom
+            content: |
+              # HELP veza_deploy_last_failure_timestamp unix epoch of last failed deploy.
+              # TYPE veza_deploy_last_failure_timestamp gauge
+              veza_deploy_last_failure_timestamp{env="{{ veza_env }}",sha="{{ veza_release_sha }}",color="{{ inactive_color }}"} {{ ansible_date_time.epoch }}
+            mode: "0644"
+          failed_when: false
 
-    - name: Re-switch HAProxy back to the prior color
-      ansible.builtin.import_role:
-        name: veza_haproxy_switch
-      vars:
-        veza_active_color: "{{ prior_active_color }}"
-      delegate_to: "{{ veza_container_prefix + 'haproxy' }}"
+        - name: Re-switch HAProxy back to the prior color (delegated)
+          delegate_to: "{{ groups['haproxy'][0] }}"
+          vars:
+            ansible_connection: community.general.incus
+            ansible_python_interpreter: /usr/bin/python3
+          block:
+            - name: Apply veza_haproxy_switch with prior_active_color
+              ansible.builtin.include_role:
+                name: veza_haproxy_switch
+              vars:
+                veza_active_color: "{{ prior_active_color }}"
 
-    - name: Fail the playbook
-      ansible.builtin.fail:
-        msg: >-
-          Public health probe via HAProxy failed after deploy of SHA
-          {{ veza_release_sha[:12] }} to color {{ inactive_color }}.
-          HAProxy reverted to the prior color ({{ prior_active_color }}).
-          The freshly-deployed {{ inactive_color }} containers are kept
-          alive for forensics — inspect with:
-            incus exec {{ veza_container_prefix }}backend-{{ inactive_color }} -- journalctl -u veza-backend -n 200
+        - name: Fail the playbook
+          ansible.builtin.fail:
+            msg: >-
+              Public health probe via HAProxy failed after deploy of SHA
+              {{ veza_release_sha[:12] }} to color {{ inactive_color }}.
+              HAProxy reverted to the prior color ({{ prior_active_color }}).
+              The freshly-deployed {{ inactive_color }} containers are kept
+              alive for forensics — inspect with:
+                incus exec {{ veza_container_prefix }}backend-{{ inactive_color }} -- journalctl -u veza-backend -n 200
diff --git a/infra/ansible/playbooks/deploy_data.yml b/infra/ansible/playbooks/deploy_data.yml
index 66070ba91..b78298526 100644
--- a/infra/ansible/playbooks/deploy_data.yml
+++ b/infra/ansible/playbooks/deploy_data.yml
@@ -112,28 +112,23 @@
   gather_facts: false
   tasks:
     - name: Launch container if absent
-      ansible.builtin.shell: |
-        set -e
-        if incus info "{{ item.name }}" >/dev/null 2>&1; then
-          echo "{{ item.name }} already exists"
-          exit 0
-        fi
-        incus launch {{ veza_app_base_image }} "{{ item.name }}" \
-          --profile veza-data \
-          --profile veza-net \
-          --network "{{ veza_incus_network }}"
-        # Wait for the container's API to respond before any subsequent task
-        # (apt, systemd) hits a half-up container.
-        for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
-          if incus exec "{{ item.name }}" -- /bin/true 2>/dev/null; then
-            echo "Container {{ item.name }} ready"
+      ansible.builtin.shell:
+        cmd: |
+          set -e
+          if incus info "{{ item.name }}" >/dev/null 2>&1; then
+            echo "{{ item.name }} already exists"
             exit 0
           fi
-          sleep 1
-        done
-        echo "Container {{ item.name }} did not become ready within timeout"
-        exit 1
-      args:
+          incus launch "{{ veza_app_base_image }}" "{{ item.name }}" --profile veza-data --profile veza-net --network "{{ veza_incus_network }}"
+          for i in $(seq 1 {{ veza_app_container_ready_timeout | default(30) }}); do
+            if incus exec "{{ item.name }}" -- /bin/true 2>/dev/null; then
+              echo "Container {{ item.name }} ready"
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Container {{ item.name }} did not become ready within timeout"
+          exit 1
         executable: /bin/bash
       loop: "{{ veza_data_containers }}"
       register: launch_result
@@ -150,7 +145,7 @@
 # tasks/<kind>.yml or role.
 # -----------------------------------------------------------------------
 - name: Configure postgres
-  hosts: "{{ veza_container_prefix + 'postgres' }}"
+  hosts: veza_data_postgres
   become: true
   gather_facts: false
   vars:
@@ -198,7 +193,7 @@
   tags: [data, postgres]
 
 - name: Configure redis
-  hosts: "{{ veza_container_prefix + 'redis' }}"
+  hosts: veza_data_redis
   become: true
   gather_facts: false
   vars:
@@ -250,7 +245,7 @@
   tags: [data, redis]
 
 - name: Configure rabbitmq
-  hosts: "{{ veza_container_prefix + 'rabbitmq' }}"
+  hosts: veza_data_rabbitmq
   become: true
   gather_facts: false
   vars:
@@ -295,7 +290,7 @@
   tags: [data, rabbitmq]
 
 - name: Configure minio
-  hosts: "{{ veza_container_prefix + 'minio' }}"
+  hosts: veza_data_minio
   become: true
   gather_facts: false
   vars:
diff --git a/infra/ansible/playbooks/rollback.yml b/infra/ansible/playbooks/rollback.yml
index 8956a80f4..65e22859d 100644
--- a/infra/ansible/playbooks/rollback.yml
+++ b/infra/ansible/playbooks/rollback.yml
@@ -1,14 +1,12 @@
 # rollback.yml — two modes :
 #
 #  1. fast      : flip HAProxy back to the previous active color.
-#                Works only if those containers are still alive
-#                (i.e., the next deploy has NOT yet recycled them).
+#                Works only if those containers are still alive.
 #                Effect time : ~5 seconds.
 #
 #  2. full      : redeploy a specific release_sha by re-running
-#                deploy_app.yml with that SHA. Works whenever the
-#                tarball is still in the Forgejo Registry. Effect
-#                time : ~5-10 minutes.
+#                deploy_app.yml with that SHA.
+#                Effect time : ~5-10 minutes.
 #
 # Required extra-vars:
 #   env             staging | prod
@@ -16,11 +14,7 @@
 #   target_color    (mode=fast only)  the color to flip TO
 #   release_sha     (mode=full only)  the SHA to redeploy
 #
-# Caller (workflow_dispatch only — see .forgejo/workflows/rollback.yml):
-#   ansible-playbook -i inventory/{{env}}.yml playbooks/rollback.yml \
-#     -e env={{env}} -e mode=fast -e target_color=blue
-#   ansible-playbook -i inventory/{{env}}.yml playbooks/rollback.yml \
-#     -e env={{env}} -e mode=full -e release_sha=<previous_sha>
+# Caller (workflow_dispatch only — see .forgejo/workflows/rollback.yml).
 ---
 - name: Validate inputs
   hosts: incus_hosts
@@ -57,27 +51,28 @@
 
 # ---------------------------------------------------------------------
 # mode=fast  →  HAProxy flip only.
+# `when:` lives at TASK level (Ansible doesn't accept it at play level).
 # ---------------------------------------------------------------------
 - name: Fast rollback — verify target_color containers are alive
   hosts: incus_hosts
   become: true
   gather_facts: false
   tasks:
-    - name: Check each target-color container exists
-      ansible.builtin.shell: |
-        set -e
-        CT="{{ veza_container_prefix }}{{ item }}-{{ target_color }}"
-        if ! incus info "$CT" >/dev/null 2>&1; then
-          echo "MISSING $CT"
-          exit 1
-        fi
-        STATE=$(incus list "$CT" -c s --format csv)
-        if [ "$STATE" != "RUNNING" ]; then
-          echo "$CT is $STATE (not RUNNING)"
-          exit 1
-        fi
-        echo "OK $CT"
-      args:
+    - name: Check each target-color container exists and is RUNNING
+      ansible.builtin.shell:
+        cmd: |
+          set -e
+          CT="{{ veza_container_prefix }}{{ item }}-{{ target_color }}"
+          if ! incus info "$CT" >/dev/null 2>&1; then
+            echo "MISSING $CT"
+            exit 1
+          fi
+          STATE=$(incus list "$CT" -c s --format csv)
+          if [ "$STATE" != "RUNNING" ]; then
+            echo "$CT is $STATE (not RUNNING)"
+            exit 1
+          fi
+          echo "OK $CT"
         executable: /bin/bash
       loop:
         - backend
@@ -85,29 +80,31 @@
         - web
       changed_when: false
       register: alive_check
-  when: mode == 'fast'
-  tags: [rollback, fast]
+      when: mode == 'fast'
+      tags: [rollback, fast]
 
 - name: Fast rollback — flip HAProxy
-  hosts: "{{ veza_container_prefix + 'haproxy' }}"
+  hosts: haproxy
   become: true
   gather_facts: true
-  vars:
-    ansible_connection: community.general.incus
-    ansible_python_interpreter: /usr/bin/python3
-    veza_active_color: "{{ target_color }}"
-    # Fast rollback re-uses the previous SHA from the history file.
-    veza_release_sha: "{{ lookup('ansible.builtin.file', '/var/lib/veza/active-color.history', errors='ignore') | regex_search('sha=([0-9a-f]+)', '\\1') | default(['rollback'], true) | first }}"
-  roles:
-    - veza_haproxy_switch
-  when: mode == 'fast'
-  tags: [rollback, fast]
+  tasks:
+    - name: Apply veza_haproxy_switch with target_color
+      ansible.builtin.include_role:
+        name: veza_haproxy_switch
+      vars:
+        veza_active_color: "{{ target_color }}"
+        # Fast rollback re-uses the previous SHA from the history file.
+        # Fallback to a synthetic 40-char SHA if the file is missing —
+        # the role's assert tolerates this for the rollback case.
+        veza_release_sha: "{{ (lookup('ansible.builtin.file', '/var/lib/veza/active-color.history', errors='ignore') | default('', true) | regex_search('sha=([0-9a-f]{40})', '\\1') | default('r0llback' + '0' * 32, true)) }}"
+      when: mode == 'fast'
+      tags: [rollback, fast]
 
 # ---------------------------------------------------------------------
-# mode=full  →  re-import deploy_app.yml with the rollback SHA.
-# Functionally identical to a fresh deploy of an older release.
+# mode=full  →  re-run deploy_app.yml with the rollback SHA.
+# `when:` IS valid on import_playbook (unlike on a regular play).
 # ---------------------------------------------------------------------
-- name: Full rollback — delegate to deploy_app.yml with release_sha={{ veza_release_sha | default('') }}
+- name: Full rollback — delegate to deploy_app.yml
   ansible.builtin.import_playbook: deploy_app.yml
   when: mode == 'full'
   tags: [rollback, full]