Skip to content

Commit

Permalink
fix: no default resource limits (#768)
Browse files Browse the repository at this point in the history
  • Loading branch information
barnabasbusa authored Sep 19, 2024
1 parent 8fec454 commit 4c4831b
Show file tree
Hide file tree
Showing 26 changed files with 565 additions and 910 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ participants:
# Resource management for el containers
# CPU is milicores
# RAM is in MB
# Defaults are set per client
# Defaults to 0, which results in no resource limits
el_min_cpu: 0
el_max_cpu: 0
el_min_mem: 0
Expand Down Expand Up @@ -278,7 +278,7 @@ participants:
# Resource management for cl containers
# CPU is milicores
# RAM is in MB
# Defaults are set per client
# Defaults to 0, which results in no resource limits
cl_min_cpu: 0
cl_max_cpu: 0
cl_min_mem: 0
Expand Down Expand Up @@ -340,7 +340,7 @@ participants:
# Resource management for vc containers
# CPU is milicores
# RAM is in MB
# Defaults are set per client
# Defaults to 0, which results in no resource limits
vc_min_cpu: 0
vc_max_cpu: 0
vc_min_mem: 0
Expand Down
50 changes: 33 additions & 17 deletions src/cl/cl_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def launch(
or constants.NETWORK_NAME.shadowfork in network_params.network
else None
)

network_name = shared_utils.get_network_name(network_params.network)
for index, participant in enumerate(participants):
cl_type = participant.cl_type
el_type = participant.el_type
Expand All @@ -104,6 +104,26 @@ def launch(
global_node_selectors,
)

tolerations = input_parser.get_client_tolerations(
participant.cl_tolerations, participant.tolerations, global_tolerations
)

(
cl_min_cpu,
cl_max_cpu,
cl_min_mem,
cl_max_mem,
cl_volume_size,
) = shared_utils.get_cpu_mem_resource_limits(
participant.cl_min_cpu,
participant.cl_max_cpu,
participant.cl_min_mem,
participant.cl_max_mem,
participant.cl_volume_size,
network_name,
participant.cl_type,
)

if cl_type not in cl_launchers:
fail(
"Unsupported launcher '{0}', need one of '{1}'".format(
Expand Down Expand Up @@ -158,10 +178,10 @@ def launch(
el_context,
full_name,
new_cl_node_validator_keystores,
participant.cl_min_cpu,
participant.cl_max_cpu,
participant.cl_min_mem,
participant.cl_max_mem,
cl_min_cpu,
cl_max_cpu,
cl_min_mem,
cl_max_mem,
participant.snooper_enabled,
snooper_engine_context,
participant.blobber_enabled,
Expand All @@ -170,10 +190,8 @@ def launch(
participant.cl_extra_env_vars,
participant.cl_extra_labels,
persistent,
participant.cl_volume_size,
participant.cl_tolerations,
participant.tolerations,
global_tolerations,
cl_volume_size,
tolerations,
node_selectors,
participant.use_separate_vc,
participant.keymanager_enabled,
Expand All @@ -195,10 +213,10 @@ def launch(
el_context,
full_name,
new_cl_node_validator_keystores,
participant.cl_min_cpu,
participant.cl_max_cpu,
participant.cl_min_mem,
participant.cl_max_mem,
cl_min_cpu,
cl_max_cpu,
cl_min_mem,
cl_max_mem,
participant.snooper_enabled,
snooper_engine_context,
participant.blobber_enabled,
Expand All @@ -207,10 +225,8 @@ def launch(
participant.cl_extra_env_vars,
participant.cl_extra_labels,
persistent,
participant.cl_volume_size,
participant.cl_tolerations,
participant.tolerations,
global_tolerations,
cl_volume_size,
tolerations,
node_selectors,
participant.use_separate_vc,
participant.keymanager_enabled,
Expand Down
80 changes: 29 additions & 51 deletions src/cl/grandine/grandine_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ BEACON_DISCOVERY_PORT_NUM = 9000
BEACON_HTTP_PORT_NUM = 4000
BEACON_METRICS_PORT_NUM = 8008

# The min/max CPU/memory that the beacon node can use
BEACON_MIN_CPU = 50
BEACON_MIN_MEMORY = 1024

BEACON_METRICS_PATH = "/metrics"

MIN_PEERS = 1
Expand Down Expand Up @@ -58,9 +54,7 @@ def launch(
extra_labels,
persistent,
cl_volume_size,
cl_tolerations,
participant_tolerations,
global_tolerations,
tolerations,
node_selectors,
use_separate_vc,
keymanager_enabled,
Expand All @@ -74,33 +68,8 @@ def launch(
participant_log_level, global_log_level, VERBOSITY_LEVELS
)

tolerations = input_parser.get_client_tolerations(
cl_tolerations, participant_tolerations, global_tolerations
)

extra_params = [param for param in extra_params]

network_name = shared_utils.get_network_name(launcher.network)

cl_min_cpu = int(cl_min_cpu) if int(cl_min_cpu) > 0 else BEACON_MIN_CPU
cl_max_cpu = (
int(cl_max_cpu)
if int(cl_max_cpu) > 0
else constants.RAM_CPU_OVERRIDES[network_name]["grandine_max_cpu"]
)
cl_min_mem = int(cl_min_mem) if int(cl_min_mem) > 0 else BEACON_MIN_MEMORY
cl_max_mem = (
int(cl_max_mem)
if int(cl_max_mem) > 0
else constants.RAM_CPU_OVERRIDES[network_name]["grandine_max_mem"]
)

cl_volume_size = (
int(cl_volume_size)
if int(cl_volume_size) > 0
else constants.VOLUME_SIZE[network_name]["grandine_volume_size"]
)

config = get_beacon_config(
plan,
launcher.el_cl_genesis_data,
Expand Down Expand Up @@ -386,33 +355,42 @@ def get_beacon_config(
persistent_key="data-{0}".format(service_name),
size=cl_volume_size,
)

return ServiceConfig(
image=image,
ports=used_ports,
public_ports=public_ports,
cmd=cmd,
env_vars=extra_env_vars,
files=files,
private_ip_address_placeholder=constants.PRIVATE_IP_ADDRESS_PLACEHOLDER,
ready_conditions=cl_node_ready_conditions.get_ready_conditions(
config_args = {
"image": image,
"ports": used_ports,
"public_ports": public_ports,
"cmd": cmd,
"files": files,
"env_vars": extra_env_vars,
"private_ip_address_placeholder": constants.PRIVATE_IP_ADDRESS_PLACEHOLDER,
"ready_conditions": cl_node_ready_conditions.get_ready_conditions(
constants.HTTP_PORT_ID
),
min_cpu=cl_min_cpu,
max_cpu=cl_max_cpu,
min_memory=cl_min_mem,
max_memory=cl_max_mem,
labels=shared_utils.label_maker(
"labels": shared_utils.label_maker(
constants.CL_TYPE.grandine,
constants.CLIENT_TYPES.cl,
image,
el_context.client_name,
extra_labels,
),
user=User(uid=0, gid=0),
tolerations=tolerations,
node_selectors=node_selectors,
)
"tolerations": tolerations,
"node_selectors": node_selectors,
"user": User(uid=0, gid=0),
}

if cl_min_cpu > 0:
config_args["min_cpu"] = cl_min_cpu

if cl_max_cpu > 0:
config_args["max_cpu"] = cl_max_cpu

if cl_min_mem > 0:
config_args["min_memory"] = cl_min_mem

if cl_max_mem > 0:
config_args["max_memory"] = cl_max_mem

return ServiceConfig(**config_args)


def new_grandine_launcher(
Expand Down
73 changes: 28 additions & 45 deletions src/cl/lighthouse/lighthouse_launcher.star
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ def launch(
extra_labels,
persistent,
cl_volume_size,
cl_tolerations,
participant_tolerations,
global_tolerations,
tolerations,
node_selectors,
use_separate_vc,
keymanager_enabled,
Expand All @@ -77,31 +75,6 @@ def launch(
participant_log_level, global_log_level, VERBOSITY_LEVELS
)

tolerations = input_parser.get_client_tolerations(
cl_tolerations, participant_tolerations, global_tolerations
)

network_name = shared_utils.get_network_name(launcher.network)

cl_min_cpu = int(cl_min_cpu) if int(cl_min_cpu) > 0 else BEACON_MIN_CPU
cl_max_cpu = (
int(cl_max_cpu)
if int(cl_max_cpu) > 0
else constants.RAM_CPU_OVERRIDES[network_name]["lighthouse_max_cpu"]
)
cl_min_mem = int(cl_min_mem) if int(cl_min_mem) > 0 else BEACON_MIN_MEMORY
cl_max_mem = (
int(cl_max_mem)
if int(cl_max_mem) > 0
else constants.RAM_CPU_OVERRIDES[network_name]["lighthouse_max_mem"]
)

cl_volume_size = (
int(cl_volume_size)
if int(cl_volume_size) > 0
else constants.VOLUME_SIZE[network_name]["lighthouse_volume_size"]
)

# Launch Beacon node
beacon_config = get_beacon_config(
plan,
Expand Down Expand Up @@ -374,31 +347,41 @@ def get_beacon_config(
)
env = {RUST_BACKTRACE_ENVVAR_NAME: RUST_FULL_BACKTRACE_KEYWORD}
env.update(extra_env_vars)
return ServiceConfig(
image=image,
ports=used_ports,
public_ports=public_ports,
cmd=cmd,
files=files,
env_vars=env,
private_ip_address_placeholder=constants.PRIVATE_IP_ADDRESS_PLACEHOLDER,
ready_conditions=cl_node_ready_conditions.get_ready_conditions(
config_args = {
"image": image,
"ports": used_ports,
"public_ports": public_ports,
"cmd": cmd,
"files": files,
"env_vars": env,
"private_ip_address_placeholder": constants.PRIVATE_IP_ADDRESS_PLACEHOLDER,
"ready_conditions": cl_node_ready_conditions.get_ready_conditions(
constants.HTTP_PORT_ID
),
min_cpu=cl_min_cpu,
max_cpu=cl_max_cpu,
min_memory=cl_min_mem,
max_memory=cl_max_mem,
labels=shared_utils.label_maker(
"labels": shared_utils.label_maker(
constants.CL_TYPE.lighthouse,
constants.CLIENT_TYPES.cl,
image,
el_context.client_name,
extra_labels,
),
tolerations=tolerations,
node_selectors=node_selectors,
)
"tolerations": tolerations,
"node_selectors": node_selectors,
}

if cl_min_cpu > 0:
config_args["min_cpu"] = cl_min_cpu

if cl_max_cpu > 0:
config_args["max_cpu"] = cl_max_cpu

if cl_min_mem > 0:
config_args["min_memory"] = cl_min_mem

if cl_max_mem > 0:
config_args["max_memory"] = cl_max_mem

return ServiceConfig(**config_args)


def new_lighthouse_launcher(el_cl_genesis_data, jwt_file, network_params):
Expand Down
Loading

0 comments on commit 4c4831b

Please sign in to comment.