Skip to content

Commit

Permalink
updated k8s jobs for cluster activate (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffrey1330 authored Sep 9, 2024
1 parent b3ca9e6 commit d17d8ca
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 13 deletions.
75 changes: 63 additions & 12 deletions charts/latest/spdk-csi/templates/config-map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,61 @@ metadata:
name: mgmt-api-script-cm
data:
action.py: |
import json, os, requests
import json
import os
import requests
from time import sleep
def get_node_statuses(cluster_ip, cluster_uuid, cluster_secret):
url = f"{cluster_ip}/storagenode"
headers = {
"Content-Type": "application/json",
"Authorization": f"{cluster_uuid} {cluster_secret}"
}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
results = response.json().get('results', [])
return results
else:
print(f"Failed to get node statuses, Status Code: {response.status_code}")
except Exception as e:
print(f"Error occurred while getting node statuses: {e}")
return []
def activate_cluster_if_needed(cluster_ip, cluster_uuid, cluster_secret):
retries = 60
while retries > 0:
node_statuses = get_node_statuses(cluster_ip, cluster_uuid, cluster_secret)
online_nodes = [node for node in node_statuses if node.get('status') == 'online']
if len(online_nodes) >= 3:
print("Proceeding with cluster activation.")
url = f"{cluster_ip}/cluster/activate/{cluster_uuid}"
headers = {
"Content-Type": "application/json",
"Authorization": f"{cluster_uuid} {cluster_secret}"
}
try:
response = requests.put(url, headers=headers)
if response.status_code == 200:
print("Successfully activated the cluster.")
return
else:
print(f"Failed to activate the cluster, Status Code: {response.status_code}")
except Exception as e:
print(f"Error occurred while activating the cluster: {e}")
return
retries -= 1
print(f"Not enough 'online' nodes. Retrying in 5 seconds... Remaining retries: {retries}")
sleep(5)
print("Cluster not activated: Number of 'online' storage nodes is less than 3 after maximum retries.")
# Load environment variables
action_type = os.getenv("ACTION_TYPE")
uuid = os.getenv("SNODE_UUID", "")
secret = json.loads(os.getenv("SPDKCSI_SECRET"))
Expand All @@ -127,22 +179,18 @@ data:
"Authorization": f"{cluster_uuid} {cluster_secret}"
}
if action_type in ["sn_restart", "sn_shutdown", "sn_remove"] and uuid:
# Check the action type and perform the appropriate action
if action_type == "cl_activate":
# Check if we should activate the cluster
activate_cluster_if_needed(cluster_ip, cluster_uuid, cluster_secret)
elif action_type in ["sn_restart", "sn_shutdown", "sn_remove"] and uuid:
if action_type == "sn_restart":
url = f"{cluster_ip}/storagenode/restart/{uuid}"
elif action_type == "sn_shutdown":
url = f"{cluster_ip}/storagenode/shutdown/{uuid}?force=True"
elif action_type == "sn_remove":
url = f"{cluster_ip}/storagenode/remove/{uuid}"
elif action_type == "cl_activate":
url = f"{cluster_ip}/cluster/activate/{cluster_uuid}"
elif action_type == "sn_idle":
print("No action needed for 'sn_idle'.")
else:
print(f"Invalid action type or UUID is missing for action: {action_type}")
exit(1) # Exit the script if the action type is invalid or UUID is missing
if (action_type in ["sn_restart", "sn_shutdown", "sn_remove"] and uuid) or action_type == "cl_activate":
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
Expand All @@ -151,5 +199,8 @@ data:
print(f"Failed to execute action: {action_type} for UUID: {uuid}, Status Code: {response.status_code}")
except Exception as e:
print(f"Error occurred: {e}")
elif action_type == "sn_idle":
print("No action needed for 'sn_idle'.")
else:
print("No GET request sent because the action type is 'sn_idle' or UUID is missing.")
print(f"Invalid action type or UUID is missing for action: {action_type}")
exit(1)
2 changes: 1 addition & 1 deletion charts/latest/spdk-csi/templates/job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ spec:
fieldRef:
fieldPath: metadata.namespace
- name: ACTION_TYPE
value: "sn_idle" #options: "sn_idle", "sn_restart", "sn_shutdown", "sn_remove", "cl_activate"
value: "cl_activate" #options: "sn_idle", "sn_restart", "sn_shutdown", "sn_remove", "cl_activate"
- name: SNODE_UUID
value: ""
volumeMounts:
Expand Down

0 comments on commit d17d8ca

Please sign in to comment.