From 4595db4666d06362fff2140d238cd83b0968d762 Mon Sep 17 00:00:00 2001 From: anamehra <54692434+anamehra@users.noreply.github.com> Date: Sun, 17 Dec 2023 04:48:15 -0800 Subject: [PATCH] Fixed determine/process reboot-cause service dependency (#17406) Signed-off-by: anamehra anamehra@cisco.com Why I did it Fixes #16990 for 202305/202205 branch Note: This PR is for 202305 and 202205. For master, a new PR will be raised with a new field (Uphold=) provided by debian bookworm to handle the dependency failure restartability of the processes. determine-reboot-cause and process-reboot-cause service does not start If the database service fails to restart in the first attempt. Even if the Database service succeeds in the next attempt, these reboot-cause services do not start. The process-reboot-cause service also does not restart if the docker or database service restarts, which leads to an empty reboot-cause history deploy-mg from sonic-mgmt also triggers the docker service restart. The restart of the docker service caused the issue stated in 2 above. The docker restart also triggers determine-reboot-cause to restart which creates an additional reboot-cause file in history and modifies the last reboot-cause. This PR fixes these issues by making both processes start again when dependency meets after dependency failure, making both processes restart when the database service restarts, and preventing duplicate processing of the last reboot reason. Work item tracking Microsoft ADO 25892856 How I did it Modified systemd unit files to make determine-reboot-cause and process-reboot-cause services restartable when the database service restarts. On the restart, the determine-reboot-cause service should not recreate a new reboot-cause entry in the database. Added check for first start or restart to skip entry for restart case. How to verify it On single asic pizza box: Installed the image and check reboot-cause history restart database service and verify that determine-reboot-cause and process-reboot-cause services also restart. Verify that reboot-cause shows correct data and no new entry is created for restart. On Chassis: Installed the image and check reboot-cause history restart the database service and verify that determine-reboot-cause and process-reboot-cause services also restart. Verify that reboot-cause shows correct data and no new entry is created for restart. Reboot LC. On Supervicor, stop database-chassis service. Let database service on LC fail the first time. determine-reboot-cause and process-reboot-cause would fail to start due to dependency failure start database-chassis on Supervisor. Database service on LC should now start successfully. Verify determine-reboot-cause and process-reboot-cause also starts Verify show reboot-cause history output --- ...host-services-data.determine-reboot-cause.service | 5 +++-- ...c-host-services-data.process-reboot-cause.service | 12 ++++++++++-- ...nic-host-services-data.process-reboot-cause.timer | 9 --------- 3 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.timer diff --git a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service index e834b9337331..fa9e70773fd2 100644 --- a/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service +++ b/src/sonic-host-services-data/debian/sonic-host-services-data.determine-reboot-cause.service @@ -1,7 +1,8 @@ [Unit] Description=Reboot cause determination service -Requires=rc-local.service database.service -After=rc-local.service database.service +Requires=rc-local.service +After=rc-local.service +Wants=process-reboot-cause.service [Service] Type=oneshot diff --git a/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.service b/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.service index 14af8868e164..712b7e00d482 100644 --- a/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.service +++ b/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.service @@ -1,8 +1,16 @@ [Unit] Description=Retrieve the reboot cause from the history files and save them to StateDB -Requires=database.service determine-reboot-cause.service +PartOf=database.service After=database.service determine-reboot-cause.service - + [Service] Type=simple +ExecStartPre=/usr/bin/systemctl is-active database +ExecStartPre=/usr/bin/systemctl is-active determine-reboot-cause +Restart=on-failure +RestartSec=30 +RemainAfterExit=yes ExecStart=/usr/local/bin/process-reboot-cause + +[Install] +WantedBy=multi-user.target diff --git a/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.timer b/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.timer deleted file mode 100644 index 222c51a79a03..000000000000 --- a/src/sonic-host-services-data/debian/sonic-host-services-data.process-reboot-cause.timer +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Delays process-reboot-cause until network is stably connected - -[Timer] -OnBootSec=1min 30 sec -Unit=process-reboot-cause.service - -[Install] -WantedBy=timers.target