Skip to content

Commit

Permalink
Handle hardware errors in Resource Manager (#805)
Browse files Browse the repository at this point in the history
* Add code for deactivating controller when hardware gets an error on read and write.

(cherry picked from commit e748824)
  • Loading branch information
destogl committed Aug 9, 2023
1 parent bf80cd9 commit 126c647
Show file tree
Hide file tree
Showing 8 changed files with 491 additions and 71 deletions.
34 changes: 32 additions & 2 deletions hardware_interface/include/hardware_interface/resource_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ class SensorInterface;
class SystemInterface;
class ResourceStorage;

struct HardwareReadWriteStatus
{
bool ok;
std::vector<std::string> failed_hardware_names;
};

class HARDWARE_INTERFACE_PUBLIC ResourceManager
{
public:
Expand Down Expand Up @@ -166,6 +172,26 @@ class HARDWARE_INTERFACE_PUBLIC ResourceManager
*/
void remove_controller_reference_interfaces(const std::string & controller_name);

/// Cache mapping between hardware and controllers using it
/**
* Find mapping between controller and hardware based on interfaces controller with
* \p controller_name is using and cache those for later usage.
*
* \param[in] controller_name name of the controller which interfaces are provided.
* \param[in] interfaces list of interfaces controller with \p controller_name is using.
*/
void cache_controller_to_hardware(
const std::string & controller_name, const std::vector<std::string> & interfaces);

/// Return cached controllers for a specific hardware.
/**
* Return list of cached controller names that use the hardware with name \p hardware_name.
*
* \param[in] hardware_name the name of the hardware for which cached controllers should be returned.
* \returns list of cached controller names that depend on hardware with name \p hardware_name.
*/
std::vector<std::string> get_cached_controllers_to_hardware(const std::string & hardware_name);

/// Checks whether a command interface is already claimed.
/**
* Any command interface can only be claimed by a single instance.
Expand Down Expand Up @@ -345,7 +371,7 @@ class HARDWARE_INTERFACE_PUBLIC ResourceManager
* Part of the real-time critical update loop.
* It is realtime-safe if used hadware interfaces are implemented adequately.
*/
void read(const rclcpp::Time & time, const rclcpp::Duration & period);
HardwareReadWriteStatus read(const rclcpp::Time & time, const rclcpp::Duration & period);

/// Write all loaded hardware components.
/**
Expand All @@ -354,7 +380,7 @@ class HARDWARE_INTERFACE_PUBLIC ResourceManager
* Part of the real-time critical update loop.
* It is realtime-safe if used hadware interfaces are implemented adequately.
*/
void write(const rclcpp::Time & time, const rclcpp::Duration & period);
HardwareReadWriteStatus write(const rclcpp::Time & time, const rclcpp::Duration & period);

/// Activates all available hardware components in the system.
/**
Expand All @@ -373,7 +399,11 @@ class HARDWARE_INTERFACE_PUBLIC ResourceManager

mutable std::recursive_mutex resource_interfaces_lock_;
mutable std::recursive_mutex claimed_command_interfaces_lock_;
mutable std::recursive_mutex resources_lock_;
std::unique_ptr<ResourceStorage> resource_storage_;

// Structure to store read and write status so it is not initialized in the real-time loop
HardwareReadWriteStatus read_write_status;
};

} // namespace hardware_interface
Expand Down
2 changes: 2 additions & 0 deletions hardware_interface/src/actuator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ const rclcpp_lifecycle::State & Actuator::get_state() const { return impl_->get_

return_type Actuator::read(const rclcpp::Time & time, const rclcpp::Duration & period)
{
// TODO(destogl): discuss what should be default return value, e.g., "NOT_EXECUTED"
return_type result = return_type::ERROR;
if (
impl_->get_state().id() == lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE ||
Expand All @@ -234,6 +235,7 @@ return_type Actuator::read(const rclcpp::Time & time, const rclcpp::Duration & p

return_type Actuator::write(const rclcpp::Time & time, const rclcpp::Duration & period)
{
// TODO(destogl): discuss what should be default return value, e.g., "NOT_EXECUTED"
return_type result = return_type::ERROR;
if (
impl_->get_state().id() == lifecycle_msgs::msg::State::PRIMARY_STATE_INACTIVE ||
Expand Down
235 changes: 166 additions & 69 deletions hardware_interface/src/resource_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class ResourceStorage
component_info.class_type = hardware_info.hardware_class_type;

hardware_info_map_.insert(std::make_pair(component_info.name, component_info));
hardware_used_by_controllers_.insert(
std::make_pair(component_info.name, std::vector<std::string>()));
}

template <class HardwareT>
Expand Down Expand Up @@ -195,6 +197,58 @@ class ResourceStorage
return result;
}

void remove_all_hardware_interfaces_from_available_list(const std::string & hardware_name)
{
// remove all command interfaces from available list
for (const auto & interface : hardware_info_map_[hardware_name].command_interfaces)
{
auto found_it = std::find(
available_command_interfaces_.begin(), available_command_interfaces_.end(), interface);

if (found_it != available_command_interfaces_.end())
{
available_command_interfaces_.erase(found_it);
RCUTILS_LOG_DEBUG_NAMED(
"resource_manager", "(hardware '%s'): '%s' command interface removed from available list",
hardware_name.c_str(), interface.c_str());
}
else
{
// TODO(destogl): do here error management if interfaces are only partially added into
// "available" list - this should never be the case!
RCUTILS_LOG_WARN_NAMED(
"resource_manager",
"(hardware '%s'): '%s' command interface not in available list. "
"This should not happen (hint: multiple cleanup calls).",
hardware_name.c_str(), interface.c_str());
}
}
// remove all state interfaces from available list
for (const auto & interface : hardware_info_map_[hardware_name].state_interfaces)
{
auto found_it = std::find(
available_state_interfaces_.begin(), available_state_interfaces_.end(), interface);

if (found_it != available_state_interfaces_.end())
{
available_state_interfaces_.erase(found_it);
RCUTILS_LOG_DEBUG_NAMED(
"resource_manager", "(hardware '%s'): '%s' state interface removed from available list",
hardware_name.c_str(), interface.c_str());
}
else
{
// TODO(destogl): do here error management if interfaces are only partially added into
// "available" list - this should never be the case!
RCUTILS_LOG_WARN_NAMED(
"resource_manager",
"(hardware '%s'): '%s' state interface not in available list. "
"This should not happen (hint: multiple cleanup calls).",
hardware_name.c_str(), interface.c_str());
}
}
}

template <class HardwareT>
bool cleanup_hardware(HardwareT & hardware)
{
Expand All @@ -204,55 +258,7 @@ class ResourceStorage

if (result)
{
// remove all command interfaces from available list
for (const auto & interface : hardware_info_map_[hardware.get_name()].command_interfaces)
{
auto found_it = std::find(
available_command_interfaces_.begin(), available_command_interfaces_.end(), interface);

if (found_it != available_command_interfaces_.end())
{
available_command_interfaces_.erase(found_it);
RCUTILS_LOG_DEBUG_NAMED(
"resource_manager",
"(hardware '%s'): '%s' command interface removed from available list",
hardware.get_name().c_str(), interface.c_str());
}
else
{
// TODO(destogl): do here error management if interfaces are only partially added into
// "available" list - this should never be the case!
RCUTILS_LOG_WARN_NAMED(
"resource_manager",
"(hardware '%s'): '%s' command interface not in available list."
" This can happen due to multiple calls to 'cleanup'",
hardware.get_name().c_str(), interface.c_str());
}
}
// remove all state interfaces from available list
for (const auto & interface : hardware_info_map_[hardware.get_name()].state_interfaces)
{
auto found_it = std::find(
available_state_interfaces_.begin(), available_state_interfaces_.end(), interface);

if (found_it != available_state_interfaces_.end())
{
available_state_interfaces_.erase(found_it);
RCUTILS_LOG_DEBUG_NAMED(
"resource_manager", "(hardware '%s'): '%s' state interface removed from available list",
hardware.get_name().c_str(), interface.c_str());
}
else
{
// TODO(destogl): do here error management if interfaces are only partially added into
// "available" list - this should never be the case!
RCUTILS_LOG_WARN_NAMED(
"resource_manager",
"(hardware '%s'): '%s' state interface not in available list. "
"This can happen due to multiple calls to 'cleanup'",
hardware.get_name().c_str(), interface.c_str());
}
}
remove_all_hardware_interfaces_from_available_list(hardware.get_name());
}
return result;
}
Expand Down Expand Up @@ -545,6 +551,10 @@ class ResourceStorage

std::unordered_map<std::string, HardwareComponentInfo> hardware_info_map_;

/// Mapping between hardware and controllers that are using it (accessing data from it)
std::unordered_map<std::string, std::vector<std::string>> hardware_used_by_controllers_;

/// Mapping between controllers and list of reference interfaces they are using
std::unordered_map<std::string, std::vector<std::string>> controllers_reference_interfaces_map_;

/// Storage of all available state interfaces
Expand Down Expand Up @@ -614,6 +624,11 @@ void ResourceManager::load_urdf(const std::string & urdf, bool validate_interfac
{
validate_storage(hardware_info);
}

std::lock_guard<std::recursive_mutex> guard(resources_lock_);
read_write_status.failed_hardware_names.reserve(
resource_storage_->actuators_.size() + resource_storage_->sensors_.size() +
resource_storage_->systems_.size());
}

LoanedStateInterface ResourceManager::claim_state_interface(const std::string & key)
Expand Down Expand Up @@ -719,6 +734,51 @@ void ResourceManager::remove_controller_reference_interfaces(const std::string &
resource_storage_->remove_command_interfaces(interface_names);
}

void ResourceManager::cache_controller_to_hardware(
const std::string & controller_name, const std::vector<std::string> & interfaces)
{
for (const auto & interface : interfaces)
{
bool found = false;
for (const auto & [hw_name, hw_info] : resource_storage_->hardware_info_map_)
{
auto cmd_itf_it =
std::find(hw_info.command_interfaces.begin(), hw_info.command_interfaces.end(), interface);
if (cmd_itf_it != hw_info.command_interfaces.end())
{
found = true;
}
auto state_itf_it =
std::find(hw_info.state_interfaces.begin(), hw_info.state_interfaces.end(), interface);
if (state_itf_it != hw_info.state_interfaces.end())
{
found = true;
}

if (found)
{
// check if controller exist already in the list and if not add it
auto controllers = resource_storage_->hardware_used_by_controllers_[hw_name];
auto ctrl_it = std::find(controllers.begin(), controllers.end(), controller_name);
if (ctrl_it == controllers.end())
{
// add because it does not exist
controllers.reserve(controllers.size() + 1);
controllers.push_back(controller_name);
}
resource_storage_->hardware_used_by_controllers_[hw_name] = controllers;
break;
}
}
}
}

std::vector<std::string> ResourceManager::get_cached_controllers_to_hardware(
const std::string & hardware_name)
{
return resource_storage_->hardware_used_by_controllers_[hardware_name];
}

// CM API: Called in "update"-thread
bool ResourceManager::command_interface_is_claimed(const std::string & key) const
{
Expand Down Expand Up @@ -807,19 +867,31 @@ size_t ResourceManager::sensor_components_size() const
void ResourceManager::import_component(
std::unique_ptr<ActuatorInterface> actuator, const HardwareInfo & hardware_info)
{
std::lock_guard<std::recursive_mutex> guard(resources_lock_);
resource_storage_->initialize_actuator(std::move(actuator), hardware_info);
read_write_status.failed_hardware_names.reserve(
resource_storage_->actuators_.size() + resource_storage_->sensors_.size() +
resource_storage_->systems_.size());
}

void ResourceManager::import_component(
std::unique_ptr<SensorInterface> sensor, const HardwareInfo & hardware_info)
{
std::lock_guard<std::recursive_mutex> guard(resources_lock_);
resource_storage_->initialize_sensor(std::move(sensor), hardware_info);
read_write_status.failed_hardware_names.reserve(
resource_storage_->actuators_.size() + resource_storage_->sensors_.size() +
resource_storage_->systems_.size());
}

void ResourceManager::import_component(
std::unique_ptr<SystemInterface> system, const HardwareInfo & hardware_info)
{
std::lock_guard<std::recursive_mutex> guard(resources_lock_);
resource_storage_->initialize_system(std::move(system), hardware_info);
read_write_status.failed_hardware_names.reserve(
resource_storage_->actuators_.size() + resource_storage_->sensors_.size() +
resource_storage_->systems_.size());
}

size_t ResourceManager::system_components_size() const
Expand Down Expand Up @@ -1001,32 +1073,57 @@ return_type ResourceManager::set_component_state(
return result;
}

void ResourceManager::read(const rclcpp::Time & time, const rclcpp::Duration & period)
HardwareReadWriteStatus ResourceManager::read(
const rclcpp::Time & time, const rclcpp::Duration & period)
{
for (auto & component : resource_storage_->actuators_)
{
component.read(time, period);
}
for (auto & component : resource_storage_->sensors_)
{
component.read(time, period);
}
for (auto & component : resource_storage_->systems_)
std::lock_guard<std::recursive_mutex> guard(resources_lock_);
read_write_status.ok = true;
read_write_status.failed_hardware_names.clear();

auto read_components = [&](auto & components)
{
component.read(time, period);
}
for (auto & component : components)
{
if (component.read(time, period) != return_type::OK)
{
read_write_status.ok = false;
read_write_status.failed_hardware_names.push_back(component.get_name());
resource_storage_->remove_all_hardware_interfaces_from_available_list(component.get_name());
}
}
};

read_components(resource_storage_->actuators_);
read_components(resource_storage_->sensors_);
read_components(resource_storage_->systems_);

return read_write_status;
}

void ResourceManager::write(const rclcpp::Time & time, const rclcpp::Duration & period)
HardwareReadWriteStatus ResourceManager::write(
const rclcpp::Time & time, const rclcpp::Duration & period)
{
for (auto & component : resource_storage_->actuators_)
{
component.write(time, period);
}
for (auto & component : resource_storage_->systems_)
std::lock_guard<std::recursive_mutex> guard(resources_lock_);
read_write_status.ok = true;
read_write_status.failed_hardware_names.clear();

auto write_components = [&](auto & components)
{
component.write(time, period);
}
for (auto & component : components)
{
if (component.write(time, period) != return_type::OK)
{
read_write_status.ok = false;
read_write_status.failed_hardware_names.push_back(component.get_name());
resource_storage_->remove_all_hardware_interfaces_from_available_list(component.get_name());
}
}
};

write_components(resource_storage_->actuators_);
write_components(resource_storage_->systems_);

return read_write_status;
}

void ResourceManager::validate_storage(
Expand Down
Loading

0 comments on commit 126c647

Please sign in to comment.