From eb5ebeef4da56070f5c409c0c33b41dd019f87bd Mon Sep 17 00:00:00 2001 From: Marco Fargetta Date: Wed, 2 Aug 2023 16:10:09 +0200 Subject: [PATCH] Health check verify all clones `pki-healthcheck` stop its activity in case a clone is not working and report the error. If multiple clones are configured the check should go ahead to verify the other clones. With this fix all clones are verified during the health check. --- .../clones/connectivity_and_data.py | 93 +++++++++++-------- 1 file changed, 53 insertions(+), 40 deletions(-) diff --git a/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py b/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py index d9bb480f7f9..fccf20e11f1 100644 --- a/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py +++ b/base/server/healthcheck/pki/server/healthcheck/clones/connectivity_and_data.py @@ -21,6 +21,7 @@ class ClonesConnectivyAndDataCheck(ClonesPlugin): Assure master and clones within a pki instance are reachable """ def check_ca_clones(self): + host_error = [] for host in self.clone_cas: cur_clone_msg = ' Host: ' + host.Hostname + ' Port: ' + host.SecurePort # Reach out and get some certs, to serve as a data and connectivity check @@ -40,11 +41,12 @@ def check_ca_clones(self): raise BaseException('CA clone problem reading data.' + cur_clone_msg) except BaseException as e: logger.error("Internal server error %s", e) - raise BaseException('Internal error testing CA clone.' + cur_clone_msg) + host_error.append(BaseException('Internal error testing CA clone.' + cur_clone_msg)) - return + return host_error def check_kra_clones(self): + host_error = [] for host in self.clone_kras: url = 'https://' + host.Hostname + ':' + host.SecurePort @@ -58,13 +60,15 @@ def check_kra_clones(self): logger.info('KRA at %s is %s', url, status) if status != 'running': - raise Exception('KRA at %s is %s' % (url, status)) + raise BaseException('KRA at %s is %s' % (url, status)) - except Exception as e: + except BaseException as e: logger.error('Unable to reach KRA at %s: %s', url, e) - raise Exception('Unable to reach KRA at %s: %s' % (url, e)) + host_error.append(BaseException('Unable to reach KRA at %s: %s' % (url, e))) + return host_error def check_ocsp_clones(self): + host_error = [] for host in self.clone_ocsps: url = 'https://' + host.Hostname + ':' + host.SecurePort @@ -78,13 +82,15 @@ def check_ocsp_clones(self): logger.info('OCSP at %s is %s', url, status) if status != 'running': - raise Exception('OCSP at %s is %s' % (url, status)) + raise BaseException('OCSP at %s is %s' % (url, status)) - except Exception as e: + except BaseException as e: logger.error('Unable to reach OCSP at %s: %s', url, e) - raise Exception('Unable to reach OCSP at %s: %s' % (url, e)) + host_error.append(BaseException('Unable to reach OCSP at %s: %s' % (url, e))) + return host_error def check_tks_clones(self): + host_error = [] for host in self.clone_tkss: url = 'https://' + host.Hostname + ':' + host.SecurePort @@ -98,13 +104,15 @@ def check_tks_clones(self): logger.info('TKS at %s is %s', url, status) if status != 'running': - raise Exception('TKS at %s is %s' % (url, status)) + raise BaseException('TKS at %s is %s' % (url, status)) - except Exception as e: + except BaseException as e: logger.error('Unable to reach TKS at %s: %s', url, e) - raise Exception('Unable to reach TKS at %s: %s' % (url, e)) + host_error.append(BaseException('Unable to reach TKS at %s: %s' % (url, e))) + return host_error def check_tps_clones(self): + host_error = [] for host in self.clone_tpss: url = 'https://' + host.Hostname + ':' + host.SecurePort @@ -118,11 +126,12 @@ def check_tps_clones(self): logger.info('TPS at %s is %s', url, status) if status != 'running': - raise Exception('TPS at %s is %s' % (url, status)) + raise BaseException('TPS at %s is %s' % (url, status)) - except Exception as e: + except BaseException as e: logger.error('Unable to reach TPS at %s: %s', url, e) - raise Exception('Unable to reach TPS at %s: %s' % (url, e)) + host_error.append(BaseException('Unable to reach TPS at %s: %s' % (url, e))) + return host_error @duration def check(self): @@ -147,51 +156,55 @@ def check(self): logger.info('About to check the subsystem clones') hard_msg = ' Clones tested successfully, or not present.' - try: - self.check_ca_clones() + host_error = self.check_ca_clones() + if not host_error: yield Result(self, constants.SUCCESS, instance_name=self.instance.name, status='CA' + hard_msg) + else: + for err in host_error: + yield Result(self, constants.ERROR, + status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - except BaseException as e: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(e)) - - try: - self.check_kra_clones() + host_error = self.check_kra_clones() + if not host_error: yield Result(self, constants.SUCCESS, instance_name=self.instance.name, status='KRA' + hard_msg) - except BaseException as e: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(e)) + else: + for err in host_error: + yield Result(self, constants.ERROR, + status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - try: - self.check_ocsp_clones() + host_error = self.check_ocsp_clones() + if not host_error: yield Result(self, constants.SUCCESS, instance_name=self.instance.name, status='OCSP' + hard_msg) - except BaseException as e: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(e)) + else: + for err in host_error: + yield Result(self, constants.ERROR, + status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - try: - self.check_tks_clones() + host_error = self.check_tks_clones() + if not host_error: yield Result(self, constants.SUCCESS, instance_name=self.instance.name, status='TKS' + hard_msg) - except BaseException as e: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(e)) + else: + for err in host_error: + yield Result(self, constants.ERROR, + status='ERROR: %s' % self.instance.name + ' : ' + str(err)) - try: - self.check_tps_clones() + host_error = self.check_tps_clones() + if not host_error: yield Result(self, constants.SUCCESS, instance_name=self.instance.name, status="TPS Clones tested successfully, or not present.") - except BaseException as e: - yield Result(self, constants.ERROR, - status='ERROR: %s' % self.instance.name + ' : ' + str(e)) + else: + for err in host_error: + yield Result(self, constants.ERROR, + status='ERROR: %s' % self.instance.name + ' : ' + str(err)) else: yield Result(self, constants.SUCCESS, instance_name=self.instance.name,