Skip to content

Nuage failed overcloud deployment debug script

psairam edited this page Jul 24, 2020 · 4 revisions
  1. As a stack user, copy below script to undercloud director into a filename nuage-overcloud-debug.sh
#!/bin/bash

stackrc=/home/stack/stackrc

echo "Checking if undercloud deployed properly or not"

if test -f "$stackrc"; then
    source $stackrc
    echo "Checking if failed overcloud deployment exists or not"
    stack_status=$(openstack stack list | grep 'FAILED')

    if [[ -n $stack_status ]]; then
        mkdir -p /tmp/overcloud_failed_logs/
        mkdir -p /tmp/overcloud_debug_scripts/

        echo "Logging stack failures"
        openstack stack failures list $(openstack stack list -f value -c 'Stack Name') --long > /tmp/overcloud_failed_logs/failures_list
        openstack server list > /tmp/overcloud_failed_logs/os_server_list

        echo "Checking if all nodes are in ACTIVE state or not"
        nova_status=$(openstack server list | grep 'ERROR')

        if [[ -z $nova_status ]]; then
            echo "Generating scripts to collect logs from overcloud nodes"

            cat <<EOF > /tmp/overcloud_debug_scripts/collect_compute_logs.sh
echo "======== /etc/default/openvswitch ========"
sudo cat /etc/default/openvswitch | grep -v ^$ | grep -v ^#
echo "======== openvswitch service status ========"
sudo systemctl status openvswitch
echo "======= ovs-vsctl show ========"
sudo ovs-vsctl show
EOF

            cat <<EOF > /tmp/overcloud_debug_scripts/collect_compute_avrs_logs.sh
echo "======== /etc/fast-path.env ========"
sudo cat /etc/fast-path.env | grep -v ^$ | grep -v ^#
echo "======== avrs service status ========"
sudo systemctl status avrs
echo "======== virtual-accelerator service status ========"
sudo systemctl status virtual-accelerator
echo "======== network service status ========"
sudo systemctl status network
EOF

            cat <<EOF > /tmp/overcloud_debug_scripts/collect_all_nodes.sh
echo "======== yum list installed ========"
sudo yum list installed | grep 'nuage\|6wind\|virtual-accelerator'
echo "======= restarting containers ======="
sudo docker ps -a | grep Restarting
echo "======= logs of restarting containers if any ======="
for id in $(sudo docker ps -a --filter "status=restarting" --format "{{.ID}}"); do echo "==== $id restarting ===="; sudo docker logs $id; done
echo "======= non-zero exit code containers ======="
echo "==== exit-code 1 ===="
sudo docker ps -a --filter "exited=1"
echo "==== exit-code 137 ===="
sudo docker ps -a --filter "exited=137"
echo "======= logs of non-zero exit code containers if any ======="
for id in $(sudo docker ps -a --filter "exited=1" --format "{{.ID}}"); do echo "==== $id exit-code 1 ===="; sudo docker logs $id; done
for id in $(sudo docker ps -a --filter "exited=1" --format "{{.ID}}"); do echo "==== $id exit-code 137 ===="; sudo docker logs $id; done
EOF

            chmod +x /tmp/overcloud_debug_scripts/*.sh

            echo "Copying scripts to respective nodes"
            for NODE in $(openstack server list -f value -c Networks | cut -d= -f2); do echo "=== $NODE ===" ; scp -o StrictHostKeyChecking=no -q /tmp/overcloud_debug_scripts/collect_all_nodes.sh heat-admin@$NODE:~/; done
            for NODE in $(openstack server list -f value -c Networks --name compute | cut -d= -f2); do echo "=== $NODE ===" ; scp -o StrictHostKeyChecking=no -q /tmp/overcloud_debug_scripts/collect_compute_logs.sh heat-admin@$NODE:~/; done
            for NODE in $(openstack server list -f value -c Networks --name computeavrs | cut -d= -f2); do echo "=== $NODE ===" ; scp -o StrictHostKeyChecking=no -q /tmp/overcloud_debug_scripts/collect_compute_avrs_logs.sh heat-admin@$NODE:~/; done

            echo "Collecting logs from all nodes"
            for NODE in $(openstack server list -f value -c Networks | cut -d= -f2); do echo "=== $NODE ===" ; ssh -o StrictHostKeyChecking=no -q heat-admin@$NODE ./collect_all_nodes.sh; done > /tmp/overcloud_failed_logs/collect_all.logs

            echo "Collecting logs from all compute nodes"
            for NODE in $(openstack server list -f value -c Networks --name compute | cut -d= -f2); do echo "=== $NODE ===" ; ssh -o StrictHostKeyChecking=no -q heat-admin@$NODE ./collect_compute_logs.sh; done > /tmp/overcloud_failed_logs/collect_compute.log

            echo "Collecting logs from all computeavrs nodes"
            for NODE in $(openstack server list -f value -c Networks --name computeavrs | cut -d= -f2); do echo "=== $NODE ===" ;  ssh -o StrictHostKeyChecking=no -q heat-admin@$NODE ./collect_compute_avrs_logs.sh; done > /tmp/overcloud_failed_logs/collect_compute_avrs.log

        else
            echo "There were error while bringing up overcloud nodes"
            exit 0
        fi
    else
        echo "No FAILED stacks found"
        exit 0
    fi
else
    echo "There were errors deploying undercloud"
    echo "Collecting undercloud logs"
    mkdir -p /tmp/undercloud_failed/
    cp /home/stack/.instack/install-undercloud.log /tmp/undercloud_failed/
    exit 0
fi
  1. Make the file executable by running below command
$ chmod +x nuage-overcloud-debug.sh
  1. Run this script as shown below
$ ./nuage-overcloud-debug.sh
  1. Once the script completes, all the logs can be found under /tmp/overcloud_failed_logs/
Clone this wiki locally