From 7b050a30cc159035beac5838847f7179ac82a9da Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Mon, 22 Apr 2024 22:55:46 +0200 Subject: [PATCH 1/6] deactivate the controllers when they return error similar to the hardware errors --- controller_manager/doc/userdoc.rst | 6 ++++++ controller_manager/src/controller_manager.cpp | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/controller_manager/doc/userdoc.rst b/controller_manager/doc/userdoc.rst index c8aa68d774..f25ff302e2 100644 --- a/controller_manager/doc/userdoc.rst +++ b/controller_manager/doc/userdoc.rst @@ -228,3 +228,9 @@ Restarting hardware If hardware gets restarted then you should go through its lifecycle again. This can be simply achieved by returning ``ERROR`` from ``write`` and ``read`` methods of interface implementation. **NOT IMPLEMENTED YET - PLEASE STOP/RESTART ALL CONTROLLERS MANUALLY FOR NOW** The controller manager detects that and stops all the controllers that are commanding that hardware and restarts broadcasters that are listening to its states. + +Hardware and Controller Errors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the hardware during it's ``read`` or ``write`` method returns ``return_type::ERROR``, the controller manager will stop all controllers that are using the hardware's command and state interfaces. +Likewise, if a controller returns ``return_type::ERROR`` from its ``update`` method, the controller manager will deactivate the respective controller. In future, the controller manager will try to start any fallback controllers if available. diff --git a/controller_manager/src/controller_manager.cpp b/controller_manager/src/controller_manager.cpp index eadd1b0d78..aaf7518725 100644 --- a/controller_manager/src/controller_manager.cpp +++ b/controller_manager/src/controller_manager.cpp @@ -2023,6 +2023,7 @@ controller_interface::return_type ControllerManager::update( ++update_loop_counter_; update_loop_counter_ %= update_rate_; + std::vector failed_controllers_list; for (const auto & loaded_controller : rt_controller_list) { // TODO(v-lopez) we could cache this information @@ -2061,11 +2062,25 @@ controller_interface::return_type ControllerManager::update( if (controller_ret != controller_interface::return_type::OK) { + failed_controllers_list.push_back(loaded_controller.info.name); ret = controller_ret; } } } } + if (!failed_controllers_list.empty()) + { + std::string failed_controllers; + for (const auto & controller : failed_controllers_list) + { + failed_controllers += controller + " "; + } + RCLCPP_ERROR( + get_logger(), "Deactivating controllers: %s, as their update resulted in an error", + failed_controllers.c_str()); + + deactivate_controllers(rt_controller_list, failed_controllers_list); + } // there are controllers to (de)activate if (switch_params_.do_switch) From bff813c1f48c37e030519db71cc9ef3bc0409dab Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Mon, 22 Apr 2024 23:52:53 +0200 Subject: [PATCH 2/6] Better logging format --- controller_manager/src/controller_manager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controller_manager/src/controller_manager.cpp b/controller_manager/src/controller_manager.cpp index aaf7518725..e3729b90f8 100644 --- a/controller_manager/src/controller_manager.cpp +++ b/controller_manager/src/controller_manager.cpp @@ -2073,10 +2073,10 @@ controller_interface::return_type ControllerManager::update( std::string failed_controllers; for (const auto & controller : failed_controllers_list) { - failed_controllers += controller + " "; + failed_controllers += "\n\t- " + controller; } RCLCPP_ERROR( - get_logger(), "Deactivating controllers: %s, as their update resulted in an error", + get_logger(), "Deactivating following controllers as their update resulted in an error :%s", failed_controllers.c_str()); deactivate_controllers(rt_controller_list, failed_controllers_list); From 3b0186662e77e49a106a8d3fbdc6c6d8990379e9 Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Mon, 22 Apr 2024 23:53:36 +0200 Subject: [PATCH 3/6] If external_commands_for_testing_ is set to not finite then return error in update cycle in test_controller --- .../test/test_controller/test_controller.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/controller_manager/test/test_controller/test_controller.cpp b/controller_manager/test/test_controller/test_controller.cpp index 7585ae36e5..df41ebf34e 100644 --- a/controller_manager/test/test_controller/test_controller.cpp +++ b/controller_manager/test/test_controller/test_controller.cpp @@ -76,6 +76,14 @@ controller_interface::return_type TestController::update( { for (size_t i = 0; i < command_interfaces_.size(); ++i) { + if (!std::isfinite(external_commands_for_testing_[i])) + { + RCLCPP_ERROR( + get_node()->get_logger(), + "External command value for command interface '%s' is not finite", + command_interfaces_[i].get_name().c_str()); + return controller_interface::return_type::ERROR; + } RCLCPP_INFO( get_node()->get_logger(), "Setting value of command interface '%s' to %f", command_interfaces_[i].get_name().c_str(), external_commands_for_testing_[i]); From a702a17d8d3fe46a337349b05c9f80b0a1b2ac6a Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Mon, 22 Apr 2024 23:54:10 +0200 Subject: [PATCH 4/6] Add stop_controllers_on_controller_error test case --- ...roller_manager_hardware_error_handling.cpp | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/controller_manager/test/test_controller_manager_hardware_error_handling.cpp b/controller_manager/test/test_controller_manager_hardware_error_handling.cpp index 4c800d41c2..6e2fba23db 100644 --- a/controller_manager/test/test_controller_manager_hardware_error_handling.cpp +++ b/controller_manager/test/test_controller_manager_hardware_error_handling.cpp @@ -405,6 +405,106 @@ TEST_P(TestControllerManagerWithTestableCM, stop_controllers_on_hardware_read_er } } +TEST_P(TestControllerManagerWithTestableCM, stop_controllers_on_controller_error) +{ + auto strictness = GetParam().strictness; + SetupAndConfigureControllers(strictness); + + rclcpp_lifecycle::State state_active( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, + hardware_interface::lifecycle_state_names::ACTIVE); + + { + EXPECT_EQ(controller_interface::return_type::OK, cm_->update(time_, PERIOD)); + EXPECT_GE(test_controller_actuator->internal_counter, 1u) + << "Controller is started at the end of update"; + EXPECT_GE(test_controller_system->internal_counter, 1u) + << "Controller is started at the end of update"; + EXPECT_GE(test_broadcaster_all->internal_counter, 1u) + << "Controller is started at the end of update"; + EXPECT_GE(test_broadcaster_sensor->internal_counter, 1u) + << "Controller is started at the end of update"; + } + + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_controller_actuator->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_controller_system->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_all->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_sensor->get_state().id()); + + // Execute first time without any errors + { + auto new_counter = test_controller_actuator->internal_counter + 1; + EXPECT_EQ(controller_interface::return_type::OK, cm_->update(time_, PERIOD)); + EXPECT_EQ(test_controller_actuator->internal_counter, new_counter) << "Execute without errors"; + EXPECT_EQ(test_controller_system->internal_counter, new_counter) << "Execute without errors"; + EXPECT_EQ(test_broadcaster_all->internal_counter, new_counter) << "Execute without errors"; + EXPECT_EQ(test_broadcaster_sensor->internal_counter, new_counter) << "Execute without errors"; + } + + // Simulate error in update method of the controllers but not in hardware + test_controller_actuator->external_commands_for_testing_[0] = + std::numeric_limits::quiet_NaN(); + test_controller_system->external_commands_for_testing_[0] = + std::numeric_limits::quiet_NaN(); + { + auto new_counter = test_controller_actuator->internal_counter + 1; + EXPECT_EQ(controller_interface::return_type::ERROR, cm_->update(time_, PERIOD)); + EXPECT_EQ(test_controller_actuator->internal_counter, new_counter) + << "Executes the current cycle and returns ERROR"; + EXPECT_EQ( + test_controller_actuator->get_state().id(), + lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE); + EXPECT_EQ(test_controller_system->internal_counter, new_counter) + << "Executes the current cycle and returns ERROR"; + EXPECT_EQ( + test_controller_system->get_state().id(), lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE); + EXPECT_EQ(test_broadcaster_all->internal_counter, new_counter) + << "Execute without errors to write value"; + EXPECT_EQ(test_broadcaster_sensor->internal_counter, new_counter) + << "Execute without errors to write value"; + } + + { + auto previous_counter = test_controller_actuator->internal_counter; + auto new_counter = test_controller_system->internal_counter + 1; + + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE, + test_controller_actuator->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE, test_controller_system->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_all->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_sensor->get_state().id()); + + EXPECT_EQ(controller_interface::return_type::OK, cm_->update(time_, PERIOD)); + EXPECT_EQ(test_controller_actuator->internal_counter, previous_counter) + << "Cannot execute as it should be currently deactivated"; + EXPECT_EQ(test_controller_system->internal_counter, previous_counter) + << "Cannot execute as it should be currently deactivated"; + EXPECT_EQ(test_broadcaster_all->internal_counter, new_counter) + << "Broadcaster all interfaces without errors"; + EXPECT_EQ(test_broadcaster_sensor->internal_counter, new_counter) + << "Execute without errors to write value"; + + // The states shouldn't change as there are no more controller errors + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE, + test_controller_actuator->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE, test_controller_system->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_all->get_state().id()); + EXPECT_EQ( + lifecycle_msgs::msg::State::PRIMARY_STATE_ACTIVE, test_broadcaster_sensor->get_state().id()); + } +} + TEST_P(TestControllerManagerWithTestableCM, stop_controllers_on_hardware_write_error) { auto strictness = GetParam().strictness; From 45ea56a2a8ac5a15dcec7f0205fa26322ec4da20 Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Tue, 23 Apr 2024 00:04:56 +0200 Subject: [PATCH 5/6] remove the outdated docs --- controller_manager/doc/userdoc.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/controller_manager/doc/userdoc.rst b/controller_manager/doc/userdoc.rst index f25ff302e2..6097d5cbdb 100644 --- a/controller_manager/doc/userdoc.rst +++ b/controller_manager/doc/userdoc.rst @@ -225,9 +225,7 @@ Note that not all controllers have to be restarted, e.g., broadcasters. Restarting hardware ^^^^^^^^^^^^^^^^^^^^^ -If hardware gets restarted then you should go through its lifecycle again. -This can be simply achieved by returning ``ERROR`` from ``write`` and ``read`` methods of interface implementation. -**NOT IMPLEMENTED YET - PLEASE STOP/RESTART ALL CONTROLLERS MANUALLY FOR NOW** The controller manager detects that and stops all the controllers that are commanding that hardware and restarts broadcasters that are listening to its states. +If hardware gets restarted then you should go through its lifecycle again inorder to reconfigure and export the interfaces Hardware and Controller Errors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 81cc113812c8497c1181f63d8436d9efa730189c Mon Sep 17 00:00:00 2001 From: Sai Kishor Kothakota Date: Wed, 24 Apr 2024 19:51:10 +0200 Subject: [PATCH 6/6] Fix the userdoc Co-authored-by: Bence Magyar --- controller_manager/doc/userdoc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller_manager/doc/userdoc.rst b/controller_manager/doc/userdoc.rst index 6097d5cbdb..4deb85291b 100644 --- a/controller_manager/doc/userdoc.rst +++ b/controller_manager/doc/userdoc.rst @@ -225,7 +225,7 @@ Note that not all controllers have to be restarted, e.g., broadcasters. Restarting hardware ^^^^^^^^^^^^^^^^^^^^^ -If hardware gets restarted then you should go through its lifecycle again inorder to reconfigure and export the interfaces +If hardware gets restarted then you should go through its lifecycle again in order to reconfigure and export the interfaces Hardware and Controller Errors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^