Skip to content

Commit

Permalink
Add control plane ready timers
Browse files Browse the repository at this point in the history
  • Loading branch information
morenod committed Oct 7, 2024
1 parent b4fe8db commit 4384ad0
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 11 deletions.
50 changes: 42 additions & 8 deletions libs/platforms/azure/hypershiftcli/hypershiftcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def get_metadata(self, platform, cluster_name):
except Exception as err:
self.logging.error(f"Cannot load metadata for cluster {cluster_name} from {self.environment['mgmt_cluster_name']}")
self.logging.error(err)
metadata['status'] = "not found"
return metadata
metadata["cluster_name"] = result.get("metadata", {}).get("name", None)
metadata["cluster_id"] = result.get("spec", {}).get("clusterID", None)
metadata["network_type"] = result.get("spec", {}).get("networking", {}).get("networkType", None)
Expand Down Expand Up @@ -191,6 +193,7 @@ def delete_cluster(self, platform, cluster_name):
cluster_info = platform.environment["clusters"][cluster_name]
cluster_start_time = int(datetime.datetime.utcnow().timestamp())
cluster_info["uuid"] = self.environment["uuid"]
cluster_info["timestamp"] = datetime.datetime.utcnow().isoformat()
cluster_info['mgmt_cluster_name'] = self.environment['mgmt_cluster_name']
cluster_info["install_method"] = "hypershiftcli"
cluster_info['resource_group_name'] = "rg-" + cluster_name
Expand Down Expand Up @@ -225,9 +228,34 @@ def delete_cluster(self, platform, cluster_name):
self.logging.error(err)
self.logging.error(f"Failed to write metadata_destroy.json file located at {cluster_info['path']}")
if self.es is not None:
cluster_info["timestamp"] = datetime.datetime.utcnow().isoformat()
self.es.index_metadata(cluster_info)

def wait_for_controlplane_ready(self, cluster_name, wait_time):
myenv = os.environ.copy()
myenv["KUBECONFIG"] = self.environment['mc_kubeconfig']
starting_time = datetime.datetime.utcnow().timestamp()
while datetime.datetime.utcnow().timestamp() < starting_time + wait_time * 60:
if self.utils.force_terminate:
self.logging.error(f"Exiting install times capturing on {cluster_name} cluster after capturing Ctrl-C")
return 0
self.logging.info(f"Getting cluster information for cluster {cluster_name} on {self.environment['mgmt_cluster_name']}")
cluster_status_code, cluster_status_out, cluster_status_err = self.utils.subprocess_exec("oc get hostedcluster -n clusters " + cluster_name + " -o json", extra_params={"env": myenv, "universal_newlines": True})
current_time = int(datetime.datetime.utcnow().timestamp())
try:
cluster_status = json.loads(cluster_status_out).get("status", {}).get("conditions", [])
except Exception as err:
self.logging.error(f"Cannot load command result for cluster {cluster_name}. Waiting 1 seconds for next check...")
self.logging.error(err)
time.sleep(1)
continue
if any(item["message"] == "The hosted control plane is available" and item["status"] == "True" for item in cluster_status):
time_to_completed = int(round(current_time - starting_time, 0))
self.logging.info(f"Control Plane for cluster {cluster_name} is ready after {time_to_completed} seconds")
return time_to_completed
else:
self.logging.info(f"Control Plane for cluster {cluster_name} not ready after {int(round(current_time - starting_time, 0))} seconds, waiting 1 second for the next check")
time.sleep(1)

def wait_for_cluster_ready(self, cluster_name, wait_time):
myenv = os.environ.copy()
myenv["KUBECONFIG"] = self.environment['mc_kubeconfig']
Expand Down Expand Up @@ -259,7 +287,8 @@ def _wait_for_workers(self, kubeconfig, worker_nodes, wait_time, cluster_name, m
myenv = os.environ.copy()
myenv["KUBECONFIG"] = kubeconfig
result = [machinepool_name]
starting_time = datetime.datetime.utcnow().timestamp()

starting_time = int(datetime.datetime.utcnow().timestamp())
self.logging.debug(f"Waiting {wait_time} minutes for nodes to be Ready on cluster {cluster_name} until {datetime.datetime.fromtimestamp(starting_time + wait_time * 60)}")
while datetime.datetime.utcnow().timestamp() < starting_time + wait_time * 60:
if self.utils.force_terminate:
Expand All @@ -286,7 +315,7 @@ def _wait_for_workers(self, kubeconfig, worker_nodes, wait_time, cluster_name, m
if ready_nodes == worker_nodes:
self.logging.info(f"Found {ready_nodes}/{worker_nodes} ready nodes on machinepool {machinepool_name} for cluster {cluster_name}. Stopping wait.")
result.append(ready_nodes)
result.append(int(datetime.datetime.utcnow().timestamp()))
result.append(int(datetime.datetime.utcnow().timestamp()) - starting_time)
return result
else:
self.logging.info(f"Found {ready_nodes}/{worker_nodes} ready nodes on machinepool {machinepool_name} for cluster {cluster_name}. Waiting 15 seconds for next check...")
Expand All @@ -302,6 +331,7 @@ def create_cluster(self, platform, cluster_name):
myenv["KUBECONFIG"] = self.environment['mc_kubeconfig']
cluster_info = platform.environment["clusters"][cluster_name]
cluster_info["uuid"] = self.environment["uuid"]
cluster_info["timestamp"] = datetime.datetime.utcnow().isoformat()
cluster_info["hostedclusters"] = self.environment["cluster_count"]
cluster_info["install_method"] = "hypershiftcli"
cluster_info['mgmt_cluster_name'] = self.environment['mgmt_cluster_name']
Expand Down Expand Up @@ -357,12 +387,15 @@ def create_cluster(self, platform, cluster_name):
# cluster_info["mc_namespace_timing"] = mc_namespace.result() - cluster_start_time if platform.environment["mc_kubeconfig"] != "" else None
# cluster_start_time_on_mc = mc_namespace.result()
cluster_end_time = int(datetime.datetime.utcnow().timestamp())
index_time = datetime.datetime.utcnow().isoformat()
# # Getting againg metadata to update the cluster status
cluster_info["metadata"] = self.get_metadata(platform, cluster_name)
cluster_info["install_duration"] = cluster_end_time - cluster_start_time
self.logging.info(f"Waiting up to 10 minutes until cluster {cluster_name} control plane will be ready on {self.environment['mgmt_cluster_name']}")
cluster_info["cluster_controlplane_ready_delta"] = self.wait_for_controlplane_ready(cluster_name, 10)
cluster_info["cluster_controlplane_ready_total"] = sum(x or 0 for x in [cluster_info["install_duration"], cluster_info["cluster_controlplane_ready_delta"]])
self.logging.info(f"Waiting 60 minutes until cluster {cluster_name} status on {self.environment['mgmt_cluster_name']} will be completed")
cluster_info["cluster_ready"] = self.wait_for_cluster_ready(cluster_name, 60)
cluster_info["cluster_ready_delta"] = self.wait_for_cluster_ready(cluster_name, 60)
cluster_info["cluster_ready_total"] = sum(x or 0 for x in [cluster_info["cluster_controlplane_ready_total"], cluster_info["cluster_ready_delta"]])
cluster_info["kubeconfig"] = self.download_kubeconfig(cluster_name, cluster_info["path"])
if not cluster_info["kubeconfig"]:
self.logging.error(f"Failed to download kubeconfig file for cluster {cluster_name}. Disabling wait for workers and workload execution")
Expand All @@ -380,7 +413,8 @@ def create_cluster(self, platform, cluster_name):
if result[0] == cluster_name:
default_pool_workers = int(result[1])
if default_pool_workers == cluster_info["workers"]:
cluster_info["workers_ready"] = result[2] - cluster_start_time
cluster_info["workers_ready_delta"] = result[2]
cluster_info["workers_ready_total"] = sum(x or 0 for x in [cluster_info["cluster_ready_total"], cluster_info["workers_ready_delta"]])
else:
cluster_info['workers_ready'] = None
cluster_info['status'] = "Completed, missing workers"
Expand All @@ -389,7 +423,8 @@ def create_cluster(self, platform, cluster_name):
extra_pool_workers = int(result[1])
if "extra_machinepool" in platform.environment and extra_pool_workers == platform.environment["extra_machinepool"]["replicas"]:
# cluster_info["extra_pool_workers_ready"] = result[2] - extra_machine_pool_start_time
cluster_info["extra_pool_workers_ready"] = result[2] - cluster_start_time
cluster_info["extra_pool_workers_ready_delta"] = result[2]
cluster_info["extra_pool_workers_ready_total"] = sum(x or 0 for x in [cluster_info["cluster_ready_total"], cluster_info["extra_poolworkers_ready_delta"]])
else:
cluster_info["extra_pool_workers_ready"] = None
cluster_info['status'] = "Completed, missing extra pool workers"
Expand All @@ -403,7 +438,6 @@ def create_cluster(self, platform, cluster_name):
self.logging.error(err)
self.logging.error(f"Failed to write metadata_install.json file located at {cluster_info['path']}")
if self.es is not None:
cluster_info["timestamp"] = index_time
self.es.index_metadata(cluster_info)
self.logging.info("Indexing Management cluster stats")
os.environ["START_TIME"] = f"{cluster_start_time}"
Expand Down
6 changes: 3 additions & 3 deletions libs/platforms/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ def __init__(self, arguments, logging, utils, es):
self.environment["workers_wait_time"] = (
arguments["workers_wait_time"] if arguments["wait_for_workers"] else None
)
if arguments["install_clusters"].lower() == "true":
if str(arguments["install_clusters"]).lower() == "true":
self.environment["install_clusters"] = True
else:
self.environment["install_clusters"] = False

self.environment['load'] = {}
if arguments['enable_workload'].lower() == "true":
if str(arguments['enable_workload']).lower() == "true":
self.environment['load']['enabled'] = "true"

self.environment['load']["workload"] = arguments["workload"]
Expand All @@ -76,7 +76,7 @@ def __init__(self, arguments, logging, utils, es):

self.environment["wildcard_options"] = arguments["wildcard_options"]

if arguments["cleanup_clusters"].lower() == "true":
if str(arguments["cleanup_clusters"]).lower() == "true":
self.environment["cleanup_clusters"] = True
self.environment["wait_before_cleanup"] = arguments["wait_before_cleanup"]
self.environment["delay_between_cleanup"] = arguments[
Expand Down

0 comments on commit 4384ad0

Please sign in to comment.