diff --git a/include/gtsam_points/types/gaussian_voxelmap.hpp b/include/gtsam_points/types/gaussian_voxelmap.hpp index 6717b2a4..b0e0ccdf 100644 --- a/include/gtsam_points/types/gaussian_voxelmap.hpp +++ b/include/gtsam_points/types/gaussian_voxelmap.hpp @@ -39,10 +39,10 @@ double overlap(const GaussianVoxelMap::ConstPtr& target, const PointCloud::Const /** * @brief Calculate the fraction of points fell in targets' voxels - * @param target Set of target voxelized frames - * @param source Source frame - * @param delta Set of T_target_source - * @return Overlap rate + * @param target Set of target voxelized frames + * @param source Source frame + * @param Ts_target_source Set of T_target_source + * @return Overlap rate */ double overlap( const std::vector& targets, @@ -60,43 +60,64 @@ overlap(const std::vector& targets, const PointCloud::ConstPtr& sou /** * @brief Calculate the fraction of points fell in target voxels on GPU * @note Source points and target voxelmap must be pre-allocated on GPU. - * @param target Target voxelmap - * @param source Source frame - * @param delta_gpu T_target_source (on GPU memory) - * @return Overlap rate + * @param target Target voxelmap + * @param source Source frame + * @param T_target_source_gpu T_target_source (on GPU memory) + * @return Overlap rate */ double overlap_gpu( const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, - const Eigen::Isometry3f* delta_gpu, + const Eigen::Isometry3f* T_target_source_gpu, CUstream_st* stream = 0); /** * @brief Calculate the fraction of points fell in target voxels on GPU * @note Source points and target voxelmap must be pre-allocated on GPU. - * @param target Target voxelmap - * @param source Source frame - * @param delta T_target_source - * @return Overlap rate + * @param target Target voxelmap + * @param source Source frame + * @param T_target_source T_target_source + * @return Overlap rate */ -double -overlap_gpu(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta, CUstream_st* stream = 0); +double overlap_gpu( + const GaussianVoxelMap::ConstPtr& target, + const PointCloud::ConstPtr& source, + const Eigen::Isometry3d& T_target_source, + CUstream_st* stream = 0); /// @brief Calculate the fraction of points fell in target voxels on GPU -double overlap_gpu(const PointCloud::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta, CUstream_st* stream = 0); +double overlap_gpu( + const PointCloud::ConstPtr& target, + const PointCloud::ConstPtr& source, + const Eigen::Isometry3d& T_target_source, + CUstream_st* stream = 0); /** * @brief Calculate the fraction of points fell in targets' voxels on GPU * @note Source points and targets' voxelmap must be pre-allocated on GPU. - * @param targets Set of target voxelized frames - * @param source Source frame - * @param deltas Set of T_target_source - * @return Overlap rate + * @param targets Set of target voxelized frames + * @param source Source frame + * @param Ts_target_source Set of T_target_source + * @return Overlap rate */ double overlap_gpu( const std::vector& targets, const PointCloud::ConstPtr& source, - const std::vector& deltas, + const std::vector& Ts_target_source, + CUstream_st* stream = 0); + +/** + * @breif Batch calculation of the fraction of points fell in targets' voxels on GPU + * @note Source points and targets' voxelmap must be pre-allocated on GPU. + * @param targets Set of target voxelized frames + * @param sources Set of source frames + * @param Ts_target_source Set of T_target_source + * @return Overlap rate + */ +std::vector overlap_gpu( + const std::vector& targets, + const std::vector& sources, + const std::vector& Ts_target_source, CUstream_st* stream = 0); /// @brief Calculate the fraction of points fell in targets' voxels on GPU @@ -104,25 +125,39 @@ template std::enable_if_t, double> overlap_gpu( const std::vector& targets, const PointCloud::ConstPtr& source, - const std::vector& deltas, + const std::vector& Ts_target_source, CUstream_st* stream = 0) { const std::vector targets_(targets.begin(), targets.end()); - return overlap_gpu(targets_, source, deltas, stream); + return overlap_gpu(targets_, source, Ts_target_source, stream); +} + +/// @brief Calculate the fraction of points fell in targets' voxels on GPU +template +std:: + enable_if_t || !std::is_same_v, std::vector> + overlap_gpu( + const std::vector& targets, + const std::vector& sources, + const std::vector& Ts_target_source, + CUstream_st* stream = 0) { + const std::vector targets_(targets.begin(), targets.end()); + const std::vector sources_(sources.begin(), sources.end()); + return overlap_gpu(targets_, sources_, Ts_target_source, stream); } // Automatically select CPU or GPU method -double overlap_auto(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& delta); +double overlap_auto(const GaussianVoxelMap::ConstPtr& target, const PointCloud::ConstPtr& source, const Eigen::Isometry3d& T_target_source); double overlap_auto( const std::vector& targets, const PointCloud::ConstPtr& source, - const std::vector& deltas); + const std::vector& Ts_target_source); template std::enable_if_t, double> -overlap_auto(const std::vector& targets, const PointCloud::ConstPtr& source, const std::vector& deltas) { +overlap_auto(const std::vector& targets, const PointCloud::ConstPtr& source, const std::vector& Ts_target_source) { const std::vector targets_(targets.begin(), targets.end()); - return overlap_auto(targets_, source, deltas); + return overlap_auto(targets_, source, Ts_target_source); } } // namespace gtsam_points \ No newline at end of file diff --git a/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu b/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu index 36e6f5cd..9ccdae46 100644 --- a/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu +++ b/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu @@ -285,4 +285,81 @@ double overlap_gpu( return static_cast(num_inliers_cpu) / source->size(); } +std::vector overlap_gpu( + const std::vector& targets_, + const std::vector& sources, + const std::vector& Ts_target_source_, + CUstream_st* stream) { + if (targets_.size() != sources.size()) { + std::cerr << "error: The number of target voxelmaps and source point clouds must be the same!!" << std::endl; + abort(); + } + + size_t max_num_points = 0; + + std::vector targets(targets_.size()); + for (int i = 0; i < targets_.size(); i++) { + targets[i] = std::dynamic_pointer_cast(targets_[i]); + if (!targets[i]) { + std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl; + } + + if (!sources[i]->has_points_gpu()) { + std::cerr << "error: GPU source points have not been allocated!!" << std::endl; + } + + max_num_points = std::max(max_num_points, sources[i]->size()); + } + + std::vector h_deltas(Ts_target_source_.size()); + std::transform(Ts_target_source_.begin(), Ts_target_source_.end(), h_deltas.begin(), [](const Eigen::Isometry3d& delta) { + return delta.cast(); + }); + + Eigen::Isometry3f* deltas; + check_error << cudaMallocAsync(&deltas, sizeof(Eigen::Isometry3f) * Ts_target_source_.size(), stream); + check_error << cudaMemcpyAsync(deltas, h_deltas.data(), sizeof(Eigen::Isometry3f) * Ts_target_source_.size(), cudaMemcpyHostToDevice, stream); + + bool* overlap; + check_error << cudaMallocAsync(&overlap, sizeof(bool) * max_num_points, stream); + + int* num_inliers; + check_error << cudaMallocAsync(&num_inliers, sizeof(float) * sources.size(), stream); + + char* temp_storage = nullptr; + size_t temp_storage_bytes = 0; + + for (int i = 0; i < targets_.size(); i++) { + const auto& source = sources[i]; + const auto& target = targets[i]; + thrust::transform( + thrust::cuda::par_nosync.on(stream), + thrust::device_ptr(source->points_gpu), + thrust::device_ptr(source->points_gpu) + source->size(), + thrust::device_ptr(overlap), + overlap_count_kernel(*target, thrust::device_ptr(deltas + i))); + + if (i == 0) { + cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, overlap, num_inliers + i, source->size(), thrust::plus(), 0, stream); + check_error << cudaMallocAsync(&temp_storage, temp_storage_bytes, stream); + } + cub::DeviceReduce::Reduce(temp_storage, temp_storage_bytes, overlap, num_inliers + i, source->size(), thrust::plus(), 0, stream); + } + + std::vector h_num_inliers(sources.size()); + check_error << cudaMemcpyAsync(h_num_inliers.data(), num_inliers, sizeof(int) * sources.size(), cudaMemcpyDeviceToHost, stream); + + std::vector overlaps(sources.size()); + for (int i = 0; i < sources.size(); i++) { + overlaps[i] = static_cast(h_num_inliers[i]) / sources[i]->size(); + } + + check_error << cudaFreeAsync(deltas, stream); + check_error << cudaFreeAsync(overlap, stream); + check_error << cudaFreeAsync(temp_storage, stream); + check_error << cudaFreeAsync(num_inliers, stream); + + return overlaps; +} + } // namespace gtsam_points \ No newline at end of file