Skip to content

Commit

Permalink
noisy training update
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoCali0 committed Oct 8, 2024
1 parent a937f96 commit c4a9b2c
Showing 1 changed file with 39 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def random_reset_func():
observation[3] += np.random.rand() * 0.05

else:
rand = np.random.rand(4) * 0.01
rand = np.random.rand(4) * 0.03
rand[2:] = rand[2:] - 0.05
observation = [-1.0, -1.0, 0.0, 0.0] + rand

Expand Down Expand Up @@ -87,14 +87,12 @@ def reward_func(self, terminated, action):
+self.V()
+ 2 * (1 + costheta2) ** 2
- self.T()
- 10 * lambda_action * np.square(a)
- 3 * lambda_delta * delta_action
)
else:
reward = (
(1 - np.abs(a)) * self.V() # for pendubot
self.V() # for pendubot
- lambda_action * np.square(a)
- 3 * lambda_velocities * (omega1**2 + omega2**2)
- 5 * lambda_velocities * (omega1**2 + omega2**2)
- 3 * lambda_delta * delta_action
)
else:
Expand All @@ -103,6 +101,39 @@ def reward_func(self, terminated, action):

def reset(self, seed=None, options=None):
super().reset(seed, options)

## noise

# process_noise_sigmas = [0.0, 0.0, 0.0, 0.0]
process_noise_sigmas = np.max(
[[0] * 4, np.random.normal(loc=[0.01] * 4, scale=[0.01] * 4)], axis=0
)

# meas_noise_sigmas = [0.0, 0.0, 0.1, 0.1]
meas_noise_sigmas = np.max(
[[0] * 4, np.random.normal(loc=[0.01] * 4, scale=[0.01] * 4)], axis=0
)

delay_mode = "posvel"
# delay = 0.05
delay = np.max([0, np.random.normal(loc=0.05, scale=0.01)])

# u_noise_sigmas = [0.01, 0.01]
u_noise_sigmas = np.max(
[[0, 0], np.random.normal(loc=[0.01] * 2, scale=[0.01] * 2)], axis=0
)

# u_responsiveness = 0.9
u_responsiveness = np.min([np.random.normal(loc=0.9, scale=0.05), 1])

simulator.set_process_noise(process_noise_sigmas=process_noise_sigmas)
simulator.set_measurement_parameters(
meas_noise_sigmas=meas_noise_sigmas, delay=delay, delay_mode=delay_mode
)
simulator.set_motor_parameters(
u_noise_sigmas=u_noise_sigmas, u_responsiveness=u_responsiveness
)

perturbation_array, _, _, _ = get_random_gauss_perturbation_array(
10, dt, 2, 1.0, [0.05, 0.1], [0.4, 0.6]
)
Expand All @@ -127,7 +158,7 @@ def reset(self, seed=None, options=None):
dt = 0.01


FOLDER_ID = f"{os.path.basename(__file__)}-{max_torque}-{robustness}-{WINDOW_SIZE}-{int(INCLUDE_TIME)}-{dt}-v2"
FOLDER_ID = f"{os.path.basename(__file__)}-{max_torque}-{robustness}-{WINDOW_SIZE}-{int(INCLUDE_TIME)}-{dt}"
TERMINATION = False

# setting log path for the training
Expand All @@ -154,26 +185,11 @@ def reset(self, seed=None, options=None):
mpar = model_parameters(filepath=model_par_path)
mpar.set_torque_limit(torque_limit)

## noise
process_noise_sigmas = [0.0, 0.0, 0.0, 0.0]
meas_noise_sigmas = [0.0, 0.0, 0.1, 0.1]
delay_mode = "posvel"
delay = 0.05
u_noise_sigmas = [0.01, 0.01]
u_responsiveness = 0.9

plant = SymbolicDoublePendulum(model_pars=mpar)
simulator = CustomSimulator(
plant=plant, robustness=robustness, max_torque=max_torque, robot=robot, model=model
)

simulator.set_process_noise(process_noise_sigmas=process_noise_sigmas)
simulator.set_measurement_parameters(
meas_noise_sigmas=meas_noise_sigmas, delay=delay, delay_mode=delay_mode
)
simulator.set_motor_parameters(
u_noise_sigmas=u_noise_sigmas, u_responsiveness=u_responsiveness
)

eval_simulator = Simulator(plant=plant)

Expand Down Expand Up @@ -244,7 +260,7 @@ def wrap(env):
n_envs=n_envs,
env_kwargs={
"dynamics_func": dynamics_func,
"reset_func": zero_reset_func,
"reset_func": random_reset_func,
"terminates": TERMINATION,
"obs_space": obs_space,
"act_space": act_space,
Expand Down Expand Up @@ -291,7 +307,7 @@ def wrap(env):
)

setproctitle(
f"noisy_training (reward v2)-> robot={robot} max_torque={max_torque}Nm robustness={robustness} history_length={WINDOW_SIZE}"
f"noisy_training (domain and noise randomization) -> robot={robot} max_torque={max_torque}Nm robustness={robustness}"
)

agent.learn(total_timesteps=training_steps, callback=eval_callback)

0 comments on commit c4a9b2c

Please sign in to comment.