diff --git a/.gitignore b/.gitignore index 2a027b3..bb8bd5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # VSCode -.vscode/* +.vscode/ !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json diff --git a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt new file mode 100644 index 0000000..34eff57 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt @@ -0,0 +1,92 @@ +cmake_minimum_required(VERSION 3.8) +project(valve_detection) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 20) +endif() + +add_compile_options(-Wall -Wextra -Wpedantic) + +find_package(ament_cmake REQUIRED) + +find_package(rclcpp REQUIRED) +find_package(rclcpp_components REQUIRED) +find_package(message_filters REQUIRED) + +find_package(sensor_msgs REQUIRED) +find_package(vision_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(std_msgs REQUIRED) +find_package(cv_bridge REQUIRED) +find_package(OpenCV REQUIRED) + +find_package(PCL REQUIRED) +find_package(pcl_conversions REQUIRED) + +find_package(tf2 REQUIRED) +find_package(tf2_ros REQUIRED) + +find_package(vortex_msgs REQUIRED) +find_package(vortex_utils REQUIRED) + +find_package(vortex_utils_ros REQUIRED) + +include_directories(include) + +set(LIB_NAME "${PROJECT_NAME}_component") + +add_library(${LIB_NAME} SHARED + src/depth_image_processing.cpp + src/detection_utils.cpp + src/pcl_extraction.cpp + src/pose_estimator.cpp + src/ros_utils.cpp + src/valve_pose_ros.cpp +) + +target_link_libraries(${LIB_NAME} PUBLIC + ${OpenCV_LIBS} + ${PCL_LIBRARIES} +) + +target_include_directories(${LIB_NAME} PUBLIC + $ + $ + ${OpenCV_INCLUDE_DIRS} + ${PCL_INCLUDE_DIRS} +) + +ament_target_dependencies(${LIB_NAME} PUBLIC + rclcpp + rclcpp_components + message_filters + sensor_msgs + vision_msgs + geometry_msgs + std_msgs + cv_bridge + OpenCV + pcl_conversions + tf2 + tf2_ros + vortex_msgs + vortex_utils + vortex_utils_ros +) + +rclcpp_components_register_node( + ${LIB_NAME} + PLUGIN "valve_detection::ValvePoseNode" + EXECUTABLE ${PROJECT_NAME}_node +) + +install(TARGETS ${LIB_NAME} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(DIRECTORY include/ DESTINATION include) +install(DIRECTORY config launch DESTINATION share/${PROJECT_NAME}) + +ament_package() diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml new file mode 100644 index 0000000..3833a88 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -0,0 +1,99 @@ +/**: + ros__parameters: + # Inputs + depth_image_sub_topic: "/camera/camera/depth/image_rect_raw" + detections_sub_topic: "/obb_detections_output" + depth_image_info_topic: "/camera/camera/depth/camera_info" + color_image_info_topic: "/yolo_obb_encoder/internal/resize/camera_info" + + # Depth-to-color extrinsic + # TF frame names (without drone prefix) used to look up the depth-to-color + # transform from /tf_static. The node will wait until this transform is + # available before initialising the detector. + # NOTE: When launched via valve_intervention.launch.py these are overridden + # from cameras.yaml. Values here are standalone-launch defaults. + depth_frame_id: "front_camera_depth_optical" + color_frame_id: "front_camera_color_optical" + + # Outputs + landmarks_pub_topic: "/valve_landmarks" + + # TF frame used as the frame_id for all published poses and landmarks. + # Must match the depth optical frame in the robot's TF tree. + # NOTE: When launched via valve_intervention.launch.py this is set from + # cameras.yaml depth_frame_id. + output_frame_id: "front_camera_depth_optical" + + # Prepended to all TF frame names (e.g. "nautilus" -> "nautilus/front_camera_depth_optical"). + drone: "nautilus" + + # YOLO letterbox reference size + yolo_img_width: 640 + yolo_img_height: 640 + + # Undistort bounding-box detections using the color camera distortion + # coefficients from camera_info. Samples edge midpoints and center, + # undistorts them, and refits the OBB — gives a tighter fit than + # corner-based undistortion for round objects like valves. + undistort_detections: false + + # Annulus and plane fit + # Fraction of the bounding-box half-size used as the annulus ring radius + # (unitless, 0–1). At 0.8 the ring samples the outer 80 % of the box, + # avoiding the central hub while staying inside the valve rim. + # Tuned empirically on recorded valve footage. + annulus_radius_ratio: 0.8 + + # Maximum point-to-plane distance (metres) for a depth point to be counted + # as a RANSAC inlier. 0.01 m (1 cm) tolerates typical RealSense depth noise + # at ~0.5–1 m range without accepting gross outliers. + plane_ransac_threshold: 0.01 + + # Number of RANSAC iterations. 150 gives >99 % probability of finding a + # good plane even when the inlier ratio drops to ~30 %. + plane_ransac_max_iterations: 150 + + # Duplicate-detection suppression + # Two bounding boxes are considered the same valve when their + # axis-aligned IoU exceeds this threshold OR when the smaller box + # overlaps the larger by more than 70 % (intersection-over-minimum). + # Maximum 2 landmarks are published per frame. + iou_duplicate_threshold: 0.5 + + # Pose offset + # Shift the estimated position along the plane normal by this amount (metres). + # Value taken from the official valve CAD file: distance between the valve + # face frame and the inside length of the handle is 0.02 m. + valve_handle_offset: 0.02 + + debug_visualize: true # enable all debug visualization topics + + # Debug visualization only active when debug_visualize: true + debug: + # All detected valve poses in one PoseArray – useful for visualising + # multiple simultaneous detections in Foxglove. + valve_poses_pub_topic: "/valve_poses" + + # Annulus point cloud (points sampled inside the bounding-box ring used + # for plane fitting) projected into 3D – useful for verifying the depth + # extraction region. + depth_cloud_pub_topic: "/valve_depth_cloud" + + # Depth image false-coloured with COLORMAP_TURBO and OBB outlines drawn + # on top – useful for checking detection alignment on the depth frame. + depth_colormap_pub_topic: "/valve_detection_depth_colormap" + # Colormap scaling range in raw 16UC1 depth units (millimetres). + # Min ~0 mm clips near-zero noise; max ~1125 mm (≈1.1 m) covers the + # expected close-range operating distance for valve interaction. + depth_colormap_value_min: 0.1 + depth_colormap_value_max: 1125.5 + + # Points sampled from the annulus region of the bounding box, collected + # as a byproduct of pose estimation – useful for verifying the depth + # extraction region in 3D. + annulus_pub_topic: "/bbx_annulus_pcl" + + # RANSAC plane inlier points, collected as a byproduct of pose estimation + # – useful for checking that the fitted plane normal (and therefore the + # final pose orientation) is correct. + plane_pub_topic: "/annulus_plane_pcl" diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp new file mode 100644 index 0000000..ad757e8 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -0,0 +1,47 @@ +#ifndef VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ +#define VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ + +#include +#include +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +/** + * @brief Back-projects a depth pixel to a 3D point in the camera frame. + * + * Assumes no lens distortion — this is valid for depth images, which are + * typically rectified before publication by the camera driver. + */ +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& out); + +/// @brief Undistorts a bounding box by undistorting the edge midpoints and +/// center, then refitting an oriented bounding box. +/// TODO: Had problems with this, when it undistorted the orientation would be +/// off by a constant offset Workaround is to just disable this and undistort +/// the whole image instead (done upstream in perception_setup) +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr); + +// Projects a color image pixel to depth image coordinates. +// u_c, v_c: pixel coordinates in the color image. +// Z: depth of the point in the color camera frame (metres). +cv::Point2f project_color_pixel_to_depth(float u_c, + float v_c, + float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); + +} // namespace valve_detection + +#endif // VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp new file mode 100644 index 0000000..f849923 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp @@ -0,0 +1,22 @@ +#ifndef VALVE_DETECTION__DETECTION_UTILS_HPP_ +#define VALVE_DETECTION__DETECTION_UTILS_HPP_ + +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +/** + * @brief Greedy NMS: returns indices of kept detections (max 2). + * + * scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin + * (intersection / min-area) or IoU exceeds iou_duplicate_threshold. + */ +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold); + +} // namespace valve_detection + +#endif // VALVE_DETECTION__DETECTION_UTILS_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp new file mode 100644 index 0000000..9967d55 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp @@ -0,0 +1,48 @@ +#ifndef VALVE_DETECTION__PCL_EXTRACTION_HPP_ +#define VALVE_DETECTION__PCL_EXTRACTION_HPP_ + +#include +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, then checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // annulus defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out); + +// Extracts all valid depth points whose color-frame projection falls inside +// the oriented bounding box. Output points are in the color camera frame. +void extract_bbox_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are in the depth camera frame. +void extract_bbox_pcl_depth( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +} // namespace valve_detection + +#endif // VALVE_DETECTION__PCL_EXTRACTION_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp new file mode 100644 index 0000000..c8252a3 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp @@ -0,0 +1,104 @@ +#ifndef VALVE_DETECTION__POSE_ESTIMATOR_HPP_ +#define VALVE_DETECTION__POSE_ESTIMATOR_HPP_ + +#include "valve_detection/pcl_extraction.hpp" +#include "valve_detection/types.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +enum class DetectorMode { standard, debug }; + +struct DebugOutput { + pcl::PointCloud::Ptr annulus_cloud; + pcl::PointCloud::Ptr plane_cloud; +}; + +struct DetectionResult { + Pose pose; + bool valid{false}; + // Populated when DetectorMode::debug is used: + pcl::PointCloud::Ptr annulus_cloud; + pcl::PointCloud::Ptr plane_cloud; +}; + +class PoseEstimator { + public: + PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset); + + void set_color_image_properties(const ImageProperties& props); + void set_depth_image_properties(const ImageProperties& props); + void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); + + /// @brief Computes letterbox scale and padding from color image dimensions + /// and YOLO input size. Must be called after set_color_image_properties(). + void compute_letterbox_transform(); + + /// @brief Remaps a bounding box from YOLO letterbox coordinates to original + /// image coordinates. + BoundingBox letterbox_to_image_coords(const BoundingBox& bbox) const; + + DetectionResult compute_pose_from_depth( + const cv::Mat& depth_image, + const BoundingBox& bbox_org, + DetectorMode mode = DetectorMode::standard) const; + + private: + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coeff, + pcl::PointIndices::Ptr& inliers) const; + + Eigen::Vector3f compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin = Eigen::Vector3f::Zero()) const; + Eigen::Vector3f shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + // Variant that works entirely in depth frame: color rays are rotated by + // R_depth_from_color = R^T before intersecting the depth-frame plane. + Eigen::Matrix3f create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_depth_from_color) const; + + ImageProperties color_image_properties_{}; + ImageProperties depth_image_properties_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + + int yolo_img_width_; + int yolo_img_height_; + float annulus_radius_ratio_; + float plane_ransac_threshold_; + int plane_ransac_max_iterations_; + float valve_handle_offset_; + + double letterbox_scale_factor_{1.0}; + double letterbox_pad_x_{0}; + double letterbox_pad_y_{0}; + + mutable Eigen::Vector3f filter_direction_{Eigen::Vector3f::Zero()}; +}; + +} // namespace valve_detection + +#endif // VALVE_DETECTION__POSE_ESTIMATOR_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp new file mode 100644 index 0000000..4cc8e1f --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp @@ -0,0 +1,44 @@ +#ifndef VALVE_DETECTION__TYPES_HPP_ +#define VALVE_DETECTION__TYPES_HPP_ + +#include +#include +#include +#include + +namespace valve_detection { + +struct CameraIntrinsics { + double fx{0}, fy{0}, cx{0}, cy{0}; + std::array dist{0, 0, 0, 0, 0}; // k1, k2, p1, p2, k3 +}; + +struct ImageDimensions { + int width{0}, height{0}; +}; + +struct ImageProperties { + CameraIntrinsics intr; + ImageDimensions dim; +}; + +struct BoundingBox { + float center_x{0}; + float center_y{0}; + float size_x{0}; + float size_y{0}; + float theta{0}; // radians +}; + +using Pose = vortex::utils::types::Pose; + +// Rigid transform from depth camera frame to color camera frame. +// Rotation R and translation t satisfy: P_color = R * P_depth + t +struct DepthColorExtrinsic { + Eigen::Matrix3f R{Eigen::Matrix3f::Identity()}; + Eigen::Vector3f t{Eigen::Vector3f::Zero()}; +}; + +} // namespace valve_detection + +#endif // VALVE_DETECTION__TYPES_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp new file mode 100644 index 0000000..e067d7d --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "valve_detection/types.hpp" +#include "vortex_msgs/msg/landmark_array.hpp" + +#include + +namespace valve_detection { + +// Converts a ROS BoundingBox2D message to the internal BoundingBox type. +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b); + +// Builds a PoseArray message from a list of poses. +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, + const std_msgs::msg::Header& header); + +// Builds a LandmarkArray message from a list of poses. +// Subtype is always 0 (unset); resolved downstream by valve_subtype_resolver. +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, + const std_msgs::msg::Header& header); + +// Decodes a ROS depth image to a CV_32FC1 mat in metres. +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth); + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp new file mode 100644 index 0000000..4b69e9a --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -0,0 +1,129 @@ +#ifndef VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ +#define VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include "valve_detection/detection_utils.hpp" +#include "valve_detection/pcl_extraction.hpp" +#include "valve_detection/pose_estimator.hpp" +#include "valve_detection/types.hpp" + +#include +#include +#include +#include "vortex_msgs/msg/landmark_array.hpp" +#include "vortex_msgs/msg/landmark_subtype.hpp" +#include "vortex_msgs/msg/landmark_type.hpp" + +namespace valve_detection { + +class ValvePoseNode : public rclcpp::Node { + public: + explicit ValvePoseNode(const rclcpp::NodeOptions& options); + + private: + // Node setup — called from constructor. + void declare_params(); + void init_subscriptions(); + void try_activate_detector(); + void lookup_extrinsic(); + + // Camera info callbacks (one-shot). + void color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); + void depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); + + // Main synchronized callback: depth + detections. + void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); + + // sync_cb helpers + std::vector> collect_scored_boxes( + const vision_msgs::msg::Detection2DArray& det) const; + void publish_empty_results(const std_msgs::msg::Header& header) const; + cv::Mat build_depth_colormap( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) const; + void publish_debug(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const std::vector& boxes, + const std::vector& poses, + const pcl::PointCloud& ann_cloud, + const pcl::PointCloud& pln_cloud) const; + + using SyncPolicy = message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray>; + + // params + bool debug_visualize_; + float iou_duplicate_threshold_; + std::string output_frame_id_; + + // estimator config params (stored for deferred detector init) + int yolo_w_, yolo_h_; + float annulus_ratio_, ransac_thresh_, handle_offset_; + int ransac_iters_; + bool undistort_detections_{false}; + + // camera data (owned by node, passed to estimator and depth functions) + ImageProperties color_props_{}; + ImageProperties depth_props_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + bool color_props_ready_{false}; + bool depth_props_ready_{false}; + bool extrinsic_ready_{false}; + + // TF2 lookup for depth-to-color extrinsic + std::string depth_frame_id_; + std::string color_frame_id_; + std::shared_ptr tf_buffer_; + std::shared_ptr tf_listener_; + rclcpp::TimerBase::SharedPtr extrinsic_timer_; + + // estimator + std::unique_ptr detector_; + + // subs + rclcpp::Subscription::SharedPtr + color_cam_info_sub_; + rclcpp::Subscription::SharedPtr + depth_cam_info_sub_; + message_filters::Subscriber depth_sub_; + message_filters::Subscriber det_sub_; + std::shared_ptr> sync_; + + // pubs + rclcpp::Publisher::SharedPtr pose_pub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + rclcpp::Publisher::SharedPtr depth_colormap_pub_; + rclcpp::Publisher::SharedPtr annulus_pub_; + rclcpp::Publisher::SharedPtr plane_pub_; + rclcpp::Publisher::SharedPtr + depth_cloud_pub_; + + float depth_colormap_vmin_; + float depth_colormap_vmax_; +}; + +} // namespace valve_detection + +#endif // VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py new file mode 100644 index 0000000..3cbba5b --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py @@ -0,0 +1,129 @@ +import os + +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + + +def generate_launch_description(): + cfg = os.path.join( + get_package_share_directory('valve_detection'), + 'config', + 'valve_detection_params.yaml', + ) + + return LaunchDescription( + [ + # Input topics + DeclareLaunchArgument( + 'depth_image_sub_topic', + default_value='/camera/camera/depth/image_rect_raw', + description='Depth image topic', + ), + DeclareLaunchArgument( + 'detections_sub_topic', + default_value='/obb_detections_output', + description='YOLO detections topic', + ), + DeclareLaunchArgument( + 'depth_image_info_topic', + default_value='/camera/camera/depth/camera_info', + description='Depth camera info topic', + ), + DeclareLaunchArgument( + 'color_image_info_topic', + default_value='/yolo_obb_encoder/internal/resize/camera_info', + description='Color camera info topic (from DNN encoder)', + ), + # Frame IDs + DeclareLaunchArgument( + 'depth_frame_id', + default_value='front_camera_depth_optical', + description='Depth camera optical frame ID (without drone prefix)', + ), + DeclareLaunchArgument( + 'color_frame_id', + default_value='front_camera_color_optical', + description='Color camera optical frame ID (without drone prefix)', + ), + # Output topic + DeclareLaunchArgument( + 'landmarks_pub_topic', + default_value='/valve_landmarks', + description='Output valve landmarks topic', + ), + DeclareLaunchArgument( + 'output_frame_id', + default_value='front_camera_depth_optical', + description='Output frame ID for published poses (without drone prefix)', + ), + # Drone identifier + DeclareLaunchArgument( + 'drone', + default_value='nautilus', + description='Robot name, prepended to TF frame IDs (e.g. moby, orca)', + ), + # Detection processing + DeclareLaunchArgument( + 'undistort_detections', + default_value='false', + description='Undistort bounding-box detections using color camera distortion', + ), + # Debug visualization + DeclareLaunchArgument( + 'debug_visualize', + default_value='true', + description='Enable all debug visualization topics', + ), + # Node container with parameters from launch arguments + ComposableNodeContainer( + name='valve_detection_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + ComposableNode( + package='valve_detection', + plugin='valve_detection::ValvePoseNode', + name='valve_pose_node', + parameters=[ + cfg, + { + 'depth_image_sub_topic': LaunchConfiguration( + 'depth_image_sub_topic' + ), + 'detections_sub_topic': LaunchConfiguration( + 'detections_sub_topic' + ), + 'depth_image_info_topic': LaunchConfiguration( + 'depth_image_info_topic' + ), + 'color_image_info_topic': LaunchConfiguration( + 'color_image_info_topic' + ), + 'depth_frame_id': LaunchConfiguration('depth_frame_id'), + 'color_frame_id': LaunchConfiguration('color_frame_id'), + 'landmarks_pub_topic': LaunchConfiguration( + 'landmarks_pub_topic' + ), + 'output_frame_id': LaunchConfiguration( + 'output_frame_id' + ), + 'drone': LaunchConfiguration('drone'), + 'undistort_detections': LaunchConfiguration( + 'undistort_detections' + ), + 'debug_visualize': LaunchConfiguration( + 'debug_visualize' + ), + }, + ], + ) + ], + output='screen', + ), + ] + ) diff --git a/mission/tacc/visual_inspection/valve_detection/package.xml b/mission/tacc/visual_inspection/valve_detection/package.xml new file mode 100644 index 0000000..1cd75f4 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/package.xml @@ -0,0 +1,33 @@ + + + + valve_detection + 0.1.0 + Valve pose estimation from YOLOv26 OBB detections + depth image. + you + Apache-2.0 + + ament_cmake + rosidl_default_generators + + rclcpp + rclcpp_components + message_filters + sensor_msgs + vision_msgs + geometry_msgs + std_msgs + cv_bridge + pcl_conversions + tf2 + tf2_ros + vortex_msgs + vortex_utils + vortex_utils_ros + + rosidl_default_runtime + + + ament_cmake + + diff --git a/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py b/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py new file mode 100755 index 0000000..6767ce4 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import math + +import rclpy +from geometry_msgs.msg import PoseArray +from rclpy.node import Node +from scipy.spatial.transform import Rotation +from tf2_ros import Buffer, TransformListener +from vision_msgs.msg import Detection2DArray +from vortex_utils_ros.qos_profiles import sensor_data_profile + + +class ThetaMonitor(Node): + def __init__(self): + super().__init__("theta_monitor") + + # TF lookup for depth-to-color extrinsic + self.tf_buffer = Buffer() + self.tf_listener = TransformListener(self.tf_buffer, self) + self.extrinsic_printed = False + self.create_timer(1.0, self.print_extrinsic) + + self.create_subscription( + Detection2DArray, + "/obb_detections_output", + self.obb_cb, + sensor_data_profile(10), + ) + self.create_subscription( + PoseArray, "/valve_poses", self.pose_cb, sensor_data_profile(10) + ) + + def print_extrinsic(self): + if self.extrinsic_printed: + return + try: + tf = self.tf_buffer.lookup_transform( + "nautilus/front_camera_color_optical", + "nautilus/front_camera_depth_optical", + rclpy.time.Time(), + ) + q = tf.transform.rotation + t = tf.transform.translation + r = Rotation.from_quat([q.x, q.y, q.z, q.w]) + rot = r.as_matrix() + roll, pitch, yaw = r.as_euler("xyz", degrees=True) + + self.get_logger().info( + f"\n{'=' * 60}\n" + f"DEPTH-TO-COLOR EXTRINSIC (depth -> color)\n" + f" Translation: [{t.x:.6f}, {t.y:.6f}, {t.z:.6f}]\n" + f" RPY (xyz): roll={roll:.4f}° pitch={pitch:.4f}° yaw={yaw:.4f}°\n" + f" Rotation matrix:\n" + f" [{rot[0, 0]:9.6f} {rot[0, 1]:9.6f} {rot[0, 2]:9.6f}]\n" + f" [{rot[1, 0]:9.6f} {rot[1, 1]:9.6f} {rot[1, 2]:9.6f}]\n" + f" [{rot[2, 0]:9.6f} {rot[2, 1]:9.6f} {rot[2, 2]:9.6f}]\n" + f"\n" + f" R_depth_from_color (R^T, used in pose_estimator):\n" + f" [{rot[0, 0]:9.6f} {rot[1, 0]:9.6f} {rot[2, 0]:9.6f}]\n" + f" [{rot[0, 1]:9.6f} {rot[1, 1]:9.6f} {rot[2, 1]:9.6f}]\n" + f" [{rot[0, 2]:9.6f} {rot[1, 2]:9.6f} {rot[2, 2]:9.6f}]\n" + f"{'=' * 60}" + ) + self.extrinsic_printed = True + except Exception as e: + self.get_logger().warn(f"Waiting for TF: {e}") + + def obb_cb(self, msg: Detection2DArray): + for i, det in enumerate(msg.detections): + theta_rad = det.bbox.center.theta + theta_deg = math.degrees(theta_rad) + self.get_logger().info( + f"[OBB det {i}] theta: {theta_rad:.4f} rad ({theta_deg:.2f} deg)" + ) + + def pose_cb(self, msg: PoseArray): + for i, pose in enumerate(msg.poses): + q = pose.orientation + r = Rotation.from_quat([q.x, q.y, q.z, q.w]) + roll, pitch, yaw = r.as_euler("xyz", degrees=True) + self.get_logger().info( + f"[POSE {i}] " + f"roll: {roll:7.2f}° pitch: {pitch:7.2f}° yaw: {yaw:7.2f}° " + f"pos: ({pose.position.x:.3f}, {pose.position.y:.3f}, {pose.position.z:.3f})" + ) + + +def main(): + rclpy.init() + rclpy.spin(ThetaMonitor()) + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp new file mode 100644 index 0000000..70802aa --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -0,0 +1,124 @@ +// depth_image_processing.cpp +// Depth-to-3D back-projection and color-to-depth pixel projection. +#include "valve_detection/depth_image_processing.hpp" +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +/// TODO: Had problems with this, when it undistorted the orientation would be +/// off by a constant offset Workaround is to just disable this and undistort +/// the whole image instead (done upstream in perception_setup) +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, + intr.fy, intr.cy, 0, 0, 1); + const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], + intr.dist[2], intr.dist[3], intr.dist[4]); + + const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), + cv::Size2f(bbox.size_x, bbox.size_y), angle_deg); + + // Extract 4 corners to compute edge midpoints. + cv::Point2f corners[4]; + rrect.points(corners); + + // 5 sample points: 4 edge midpoints + bbox center. + std::vector pts; + pts.reserve(5); + for (int i = 0; i < 4; ++i) + pts.emplace_back((corners[i] + corners[(i + 1) % 4]) * 0.5f); + pts.emplace_back(bbox.center_x, bbox.center_y); + + std::vector undistorted; + cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); + + // Anchor result to the undistorted center (last point). + const cv::Point2f undist_center = undistorted.back(); + + // Refit an OBB to the 4 undistorted edge midpoints. + std::vector midpoints(undistorted.begin(), + undistorted.begin() + 4); + cv::RotatedRect fitted = cv::minAreaRect(midpoints); + + // minAreaRect returns angles in [-90, 0) and can flip which side it treats + // as the major axis, producing an angle ~90° off from the original for + // nearly-square boxes (e.g. circular valves). Normalise to stay within 90° + // of the input angle and swap width/height accordingly. + float fitted_angle_deg = fitted.angle; + float diff = fitted_angle_deg - angle_deg; + while (diff > 90.0f) { + diff -= 180.0f; + fitted_angle_deg -= 180.0f; + } + while (diff < -90.0f) { + diff += 180.0f; + fitted_angle_deg += 180.0f; + } + const bool swapped = std::abs(fitted_angle_deg - fitted.angle) > 45.0f; + + BoundingBox result = bbox; + result.center_x = undist_center.x; + result.center_y = undist_center.y; + result.size_x = swapped ? fitted.size.height : fitted.size.width; + result.size_y = swapped ? fitted.size.width : fitted.size.height; + result.theta = fitted_angle_deg * static_cast(M_PI) / 180.0f; + return result; +} + +// Back-projects a depth pixel (u, v, depth) to a 3D point using camera +// intrinsics. +void project_pixel_to_point(const int u, + const int v, + const float depth, + const double fx, + const double fy, + const double cx, + const double cy, + pcl::PointXYZ& out) { + if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { + out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); + return; + } + out.x = static_cast((u - cx) * depth / fx); + out.y = static_cast((v - cy) * depth / fy); + out.z = depth; +} + +// Projects a color image pixel to depth image coordinates using the full +// intrinsic + extrinsic pipeline. +cv::Point2f project_color_pixel_to_depth(const float u_c, + const float v_c, + const float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr) { + // Back-project color pixel -> 3-D point in color camera frame. + const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z / + static_cast(color_props.intr.fx); + const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z / + static_cast(color_props.intr.fy); + const Eigen::Vector3f Pc(Xc, Yc, Z); + + // Transform to depth camera frame: P_depth = R^T * (P_color - t) + const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); + + if (Pd.z() <= 0.0f) + return {u_c, v_c}; // degenerate – return original + + // Project into depth image. + const float u_d = + static_cast(depth_props.intr.fx) * Pd.x() / Pd.z() + + static_cast(depth_props.intr.cx); + const float v_d = + static_cast(depth_props.intr.fy) * Pd.y() / Pd.z() + + static_cast(depth_props.intr.cy); + return {u_d, v_d}; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp new file mode 100644 index 0000000..c2ed083 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp @@ -0,0 +1,86 @@ +#include "valve_detection/detection_utils.hpp" + +#include +#include + +namespace valve_detection { + +// Greedy NMS (non-maximum suppression) that keeps at most 2 detections. +// +// Boxes are processed in descending confidence order. For each kept box, +// all remaining boxes that overlap it above the IoU threshold OR whose +// intersection-over-minimum (IoM) exceeds 70 % are suppressed. IoM +// catches the case where a small spurious detection sits inside a +// larger, higher-confidence one. +// +// Uses axis-aligned bounding rectangles for the overlap test (ignoring +// OBB rotation) since the YOLO OBB angles are small and the cost of +// exact OBB intersection is not justified here. +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold) { + const size_t n = scored_boxes.size(); + if (n == 0) + return {}; + + // Sort indices by descending confidence score. + std::vector> order; + order.reserve(n); + for (size_t i = 0; i < n; ++i) + order.emplace_back(scored_boxes[i].first, i); + std::sort(order.begin(), order.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < order.size() && kept.size() < 2; ++si) { + const size_t i = order[si].second; + if (suppressed[i]) + continue; + + kept.push_back(i); + + // Compute AABB of the kept box for overlap tests. + const BoundingBox& bi = scored_boxes[i].second; + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + // Suppress lower-confidence boxes that overlap too much. + for (size_t sj = si + 1; sj < order.size(); ++sj) { + const size_t j = order[sj].second; + if (suppressed[j]) + continue; + + const BoundingBox& bj = scored_boxes[j].second; + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + // AABB intersection rectangle. + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) + continue; // no overlap + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = inter / std::min(ai, aj); + + if (iou > iou_duplicate_threshold || iom > 0.7f) + suppressed[j] = true; + } + } + + return kept; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp new file mode 100644 index 0000000..24e5ea6 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp @@ -0,0 +1,296 @@ +#include "valve_detection/pcl_extraction.hpp" +#include "valve_detection/depth_image_processing.hpp" + +#include +#include +#include +#include +#include + +namespace valve_detection { + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, and checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + const float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Annulus defined in color-image coordinates. + const float cx_c = color_bbox.center_x; + const float cy_c = color_bbox.center_y; + const float outer_rx = color_bbox.size_x * 0.5f; + const float outer_ry = color_bbox.size_y * 0.5f; + + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + // Inner ellipse scaled down by annulus_radius_ratio, forming the ring. + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // The depth and color images have different resolutions and focal lengths. + // Scale color-space bbox bounds into depth-image coordinates to get a + // coarse candidate region, then add a margin to cover the lateral shift + // introduced by the depth-to-color translation. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + constexpr int kMargin = 30; + + const int u0_d = + std::max(0, static_cast((cx_c - outer_rx) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((cx_c + outer_rx) * scale) + kMargin); + const int v0_d = + std::max(0, static_cast((cy_c - outer_ry) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((cy_c + outer_ry) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project depth pixel to 3-D point in depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Apply extrinsic to move the point into the color camera frame, + // where the annulus is defined. + const Eigen::Vector3f P_c = extrinsic.R * P_d + extrinsic.t; + if (P_c.z() <= 0.0f) + continue; + + // Project the color-frame point onto the color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Normalise offset by outer half-extents; keep only points inside + // the outer ellipse (sum of squares ≤ 1). + const float dxo = (u_c - cx_c) / outer_rx; + const float dyo = (v_c - cy_c) / outer_ry; + if (dxo * dxo + dyo * dyo > 1.0f) + continue; // outside outer ellipse + + // Discard points inside the inner ellipse to form the ring. + const float dxi = (u_c - cx_c) / inner_rx; + const float dyi = (v_c - cy_c) / inner_ry; + if (dxi * dxi + dyi * dyi <= 1.0f) + continue; // inside inner ellipse + + // Store the point in the color camera frame. + pcl::PointXYZ p; + p.x = P_c.x(); + p.y = P_c.y(); + p.z = P_c.z(); + out->points.push_back(p); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Extracts depth points whose color-frame projection falls inside the oriented +// bounding box. +void extract_bbox_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Pre-compute the OBB axes in color-image space for a fast rotated-rect + // test. + const float cos_t = std::cos(color_bbox.theta); + const float sin_t = std::sin(color_bbox.theta); + const float half_w = color_bbox.size_x * 0.5f; + const float half_h = color_bbox.size_y * 0.5f; + + // Approximate search region in depth-image space. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const float r = std::sqrt(half_w * half_w + half_h * half_h); + constexpr int kMargin = 30; + + const int u0_d = std::max( + 0, static_cast((color_bbox.center_x - r) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((color_bbox.center_x + r) * scale) + kMargin); + const int v0_d = std::max( + 0, static_cast((color_bbox.center_y - r) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((color_bbox.center_y + r) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Transform to color camera frame. + const Eigen::Vector3f P_c = extrinsic.R * P_d + extrinsic.t; + if (P_c.z() <= 0.0f) + continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Rotate into the OBB local frame and test against the + // half-extents. + const float dx = u_c - color_bbox.center_x; + const float dy = v_c - color_bbox.center_y; + const float local_x = cos_t * dx + sin_t * dy; + const float local_y = -sin_t * dx + cos_t * dy; + + if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) + continue; + + out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Sample a representative depth near the bbox center to project the color + // OBB corners into depth image space. The valve is roughly flat, so a + // single Z gives a good approximation for all 4 corners. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const int u_c_d = std::clamp(static_cast(color_bbox.center_x * scale), + 0, depth_image.cols - 1); + const int v_c_d = std::clamp(static_cast(color_bbox.center_y * scale), + 0, depth_image.rows - 1); + + float Z_est = 0.0f; + constexpr int kSampleRadius = 5; + for (int dv = -kSampleRadius; dv <= kSampleRadius && Z_est <= 0.0f; ++dv) { + for (int du = -kSampleRadius; du <= kSampleRadius && Z_est <= 0.0f; + ++du) { + const int u = std::clamp(u_c_d + du, 0, depth_image.cols - 1); + const int v = std::clamp(v_c_d + dv, 0, depth_image.rows - 1); + const float z = depth_image.at(v, u); + if (z > 0.0f && !std::isnan(z) && !std::isinf(z)) + Z_est = z; + } + } + if (Z_est <= 0.0f) + return; + + // Project the 4 color OBB corners into depth image coordinates. + const float angle_deg = + color_bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect color_rrect( + cv::Point2f(color_bbox.center_x, color_bbox.center_y), + cv::Size2f(color_bbox.size_x, color_bbox.size_y), angle_deg); + cv::Point2f color_corners[4]; + color_rrect.points(color_corners); + + std::vector depth_corners(4); + for (int i = 0; i < 4; ++i) { + depth_corners[i] = project_color_pixel_to_depth( + color_corners[i].x, color_corners[i].y, Z_est, color_props, + depth_props, extrinsic); + } + + // Fit a rotated rect to the 4 projected corners in depth image space. + const cv::RotatedRect depth_rrect = cv::minAreaRect(depth_corners); + const float angle_d = depth_rrect.angle * static_cast(M_PI) / 180.0f; + const float cos_d = std::cos(angle_d); + const float sin_d = std::sin(angle_d); + const float half_w_d = depth_rrect.size.width * 0.5f; + const float half_h_d = depth_rrect.size.height * 0.5f; + const float cx_d = depth_rrect.center.x; + const float cy_d = depth_rrect.center.y; + + // Bounding rectangle of the depth OBB. + const float r_d = std::sqrt(half_w_d * half_w_d + half_h_d * half_h_d); + const int u0_d = std::max(0, static_cast(cx_d - r_d) - 1); + const int u1_d = + std::min(depth_image.cols - 1, static_cast(cx_d + r_d) + 1); + const int v0_d = std::max(0, static_cast(cy_d - r_d) - 1); + const int v1_d = + std::min(depth_image.rows - 1, static_cast(cy_d + r_d) + 1); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + // Test against the depth-image OBB — no matrix multiply. + const float dx = u_d - cx_d; + const float dy = v_d - cy_d; + if (std::abs(cos_d * dx + sin_d * dy) > half_w_d || + std::abs(-sin_d * dx + cos_d * dy) > half_h_d) + continue; + + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to 3D depth frame. + out->points.push_back( + {static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx), + static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy), + z_d}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp new file mode 100644 index 0000000..d8c4596 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp @@ -0,0 +1,313 @@ +// pose_estimator.cpp +// RANSAC plane fitting, normal/ray-plane intersection, and 3D pose computation. +#include "valve_detection/pose_estimator.hpp" + +#include +#include + +namespace valve_detection { + +// Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle +// offset. +PoseEstimator::PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset) + : yolo_img_width_(yolo_img_width), + yolo_img_height_(yolo_img_height), + annulus_radius_ratio_(annulus_radius_ratio), + plane_ransac_threshold_(plane_ransac_threshold), + plane_ransac_max_iterations_(plane_ransac_max_iterations), + valve_handle_offset_(valve_handle_offset) {} + +// Stores color camera intrinsics and image dimensions. +void PoseEstimator::set_color_image_properties(const ImageProperties& props) { + color_image_properties_ = props; +} + +// Stores depth camera intrinsics and image dimensions. +void PoseEstimator::set_depth_image_properties(const ImageProperties& props) { + depth_image_properties_ = props; +} + +// Stores the depth-to-color extrinsic transform. +void PoseEstimator::set_depth_color_extrinsic(const DepthColorExtrinsic& extr) { + depth_color_extrinsic_ = extr; +} + +// Computes letterbox scale factor and padding from the color image and YOLO +// dimensions. +void PoseEstimator::compute_letterbox_transform() { + int org_image_width = color_image_properties_.dim.width; + int org_image_height = color_image_properties_.dim.height; + + letterbox_scale_factor_ = + std::min(static_cast(yolo_img_width_) / org_image_width, + static_cast(yolo_img_height_) / org_image_height); + + double resized_width = org_image_width * letterbox_scale_factor_; + double resized_height = org_image_height * letterbox_scale_factor_; + + letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; +} + +// Remaps a bounding box from YOLO letterbox space back to original image +// coordinates. +BoundingBox PoseEstimator::letterbox_to_image_coords( + const BoundingBox& bbox) const { + BoundingBox transformed = bbox; + transformed.center_x = + (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed.center_y = + (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed.size_x /= letterbox_scale_factor_; + transformed.size_y /= letterbox_scale_factor_; + return transformed; +} + +// Fits a plane to the point cloud using RANSAC; returns false if no inliers are +// found. +bool PoseEstimator::segment_plane( + const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const { + if (cloud->points.empty()) + return false; + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); +} + +// Extracts and normalizes the plane normal, flipping it to face the camera. +Eigen::Vector3f PoseEstimator::compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const { + if (!coefficients || coefficients->values.size() < 3) + return Eigen::Vector3f::Zero(); + + Eigen::Vector3f normal(coefficients->values[0], coefficients->values[1], + coefficients->values[2]); + normal.normalize(); + + if (normal.dot(ray_direction) > 0.0f) + normal = -normal; + return normal; +} + +// Finds the 3D point where a ray intersects the fitted plane. +// ray_origin defaults to the camera origin (zero), which is correct for the +// color-frame path. Pass the actual origin when the ray does not start at +// the frame origin (e.g. the color camera origin expressed in depth frame). +Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin) const { + // A plane ax+by+cz+d=0 has 4 coefficients: [a, b, c, d]. + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Vector3f::Zero(); + + const Eigen::Vector3f plane_normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + const float D = coefficients->values[3]; + const float denom = plane_normal.dot(ray_direction); + + if (std::abs(denom) < 1e-6f) + return Eigen::Vector3f::Zero(); + + const float lambda = -(plane_normal.dot(ray_origin) + D) / denom; + return ray_origin + lambda * ray_direction; +} + +// Shifts a 3D point along the plane normal by the valve handle offset. +Eigen::Vector3f PoseEstimator::shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const { + return intersection_point + (plane_normal * valve_handle_offset_); +} + +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected +// bbox angle (X), working entirely in the depth camera frame. Color-image +// rays are rotated into depth frame before intersecting the plane, and +// ray_origin is the color camera origin expressed in depth frame. +// +// Axis convention (valve body frame): +// X = handle dir -> roll = valve face tilt +// Y = cross product -> pitch = valve face tilt +// Z = plane normal -> yaw = handle rotation around normal +Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_depth_from_color) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back-project color pixels to rays, then rotate into depth frame. + const Eigen::Vector3f r1 = (R_depth_from_color * (Kinv * p1)).normalized(); + const Eigen::Vector3f r2 = (R_depth_from_color * (Kinv * p2)).normalized(); + + // Intersect each ray (from color origin in depth frame) with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const float n_dot_O = z_axis.dot(ray_origin); + const Eigen::Vector3f X1 = ray_origin + (-(n_dot_O + D) / denom1) * r1; + const Eigen::Vector3f X2 = ray_origin + (-(n_dot_O + D) / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping between frames). + // On the first valid computation filter_direction_ is zero; just accept + // whatever direction the geometry gives us. + if (!filter_direction_.isZero()) { + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + } + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; + rot.col(1) = y_axis; + rot.col(2) = z_axis; + return rot; +} + +// Extracts a point cloud from the depth image, fits a plane, and returns the +// valve pose. +DetectionResult PoseEstimator::compute_pose_from_depth( + const cv::Mat& depth_image, + const BoundingBox& bbox_org, + DetectorMode mode) const { + // Reject bounding boxes whose center projects entirely outside the depth + // image. The scale factor maps from color pixel space to depth pixel + // space. + const float scale = + (depth_image_properties_.intr.fx > 0.0 && + color_image_properties_.intr.fx > 0.0) + ? static_cast(depth_image_properties_.intr.fx / + color_image_properties_.intr.fx) + : 1.0f; + const float u_d = bbox_org.center_x * scale; + const float v_d = bbox_org.center_y * scale; + if (u_d < 0.0f || u_d >= static_cast(depth_image.cols) || + v_d < 0.0f || v_d >= static_cast(depth_image.rows)) + return {}; + + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); + + // Extract points directly in the depth camera frame; only the OBB + // membership test is done in the color frame (a per-pixel projection + // that cannot be avoided without approximating the bbox in depth space). + extract_bbox_pcl_depth(depth_image, bbox_org, color_image_properties_, + depth_image_properties_, depth_color_extrinsic_, + cloud); + + if (cloud->points.size() < 4) + return {}; + + DetectionResult result; + + if (mode == DetectorMode::debug) { + result.annulus_cloud = + std::make_shared>(*cloud); + } + + pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + if (!segment_plane(cloud, coeff, inliers)) + return {}; + + if (mode == DetectorMode::debug) { + result.plane_cloud = std::make_shared>(); + for (int idx : inliers->indices) + result.plane_cloud->points.push_back(cloud->points[idx]); + result.plane_cloud->width = + static_cast(result.plane_cloud->points.size()); + result.plane_cloud->height = 1; + result.plane_cloud->is_dense = false; + } + + // The color bbox center defines a ray that originates at the color + // camera, not the depth camera. Express that ray in depth frame: + // origin = -R^T * t (color camera origin in depth frame) + // direction = R^T * K_c⁻¹ * [cx, cy, 1]ᵀ (normalized) + const Eigen::Matrix3f R_depth_from_color = + depth_color_extrinsic_.R.transpose(); + const Eigen::Vector3f color_origin_in_depth_frame = + -(R_depth_from_color * depth_color_extrinsic_.t); + + const float fx_c = static_cast(color_image_properties_.intr.fx); + const float fy_c = static_cast(color_image_properties_.intr.fy); + const float cx_c = static_cast(color_image_properties_.intr.cx); + const float cy_c = static_cast(color_image_properties_.intr.cy); + const Eigen::Vector3f r_c((bbox_org.center_x - cx_c) / fx_c, + (bbox_org.center_y - cy_c) / fy_c, 1.0f); + const Eigen::Vector3f ray = (R_depth_from_color * r_c).normalized(); + + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = + find_ray_plane_intersection(coeff, ray, color_origin_in_depth_frame); + if (pos.isZero()) + return {}; + + const Eigen::Vector3f pos_shifted = shift_point_along_normal(pos, normal); + + // The OBB theta gives the direction of size_x. The valve handle + // aligns with the longer side of the bounding box, so rotate by + // 90° when size_y > size_x. + float handle_angle = bbox_org.theta; + if (bbox_org.size_y > bbox_org.size_x) + handle_angle += static_cast(M_PI / 2.0); + + const Eigen::Matrix3f rot = create_rotation_matrix_depth( + coeff, normal, handle_angle, color_origin_in_depth_frame, + R_depth_from_color); + result.pose = + Pose::from_eigen(pos_shifted.cast(), + Eigen::Quaternionf(rot).normalized().cast()); + result.valid = true; + return result; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp new file mode 100644 index 0000000..fc44bfd --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp @@ -0,0 +1,79 @@ +#include "valve_detection_ros/ros_utils.hpp" + +#include +#include +#include "vortex_msgs/msg/landmark.hpp" + +namespace valve_detection { + +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b) { + BoundingBox o; + o.center_x = static_cast(b.center.position.x); + o.center_y = static_cast(b.center.position.y); + o.size_x = static_cast(b.size_x); + o.size_y = static_cast(b.size_y); + o.theta = static_cast(b.center.theta); // radians + return o; +} + +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, + const std_msgs::msg::Header& header) { + geometry_msgs::msg::PoseArray msg; + msg.header = header; + msg.poses.reserve(poses.size()); + for (const auto& p : poses) { + geometry_msgs::msg::Pose po; + po.position.x = p.x; + po.position.y = p.y; + po.position.z = p.z; + po.orientation.x = p.qx; + po.orientation.y = p.qy; + po.orientation.z = p.qz; + po.orientation.w = p.qw; + msg.poses.push_back(po); + } + return msg; +} + +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, + const std_msgs::msg::Header& header) { + vortex_msgs::msg::LandmarkArray out; + out.header = header; + out.landmarks.reserve(poses.size()); + for (size_t i = 0; i < poses.size(); ++i) { + vortex_msgs::msg::Landmark lm; + lm.header = header; + lm.id = static_cast(i); + lm.type.value = vortex_msgs::msg::LandmarkType::VALVE; + lm.pose.pose.position.x = poses[i].x; + lm.pose.pose.position.y = poses[i].y; + lm.pose.pose.position.z = poses[i].z; + lm.pose.pose.orientation.x = poses[i].qx; + lm.pose.pose.orientation.y = poses[i].qy; + lm.pose.pose.orientation.z = poses[i].qz; + lm.pose.pose.orientation.w = poses[i].qw; + out.landmarks.push_back(lm); + } + return out; +} + +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) { + cv::Mat depth_img; + // RealSense publishes depth as 16UC1 (uint16 millimetres). + // cv_bridge type-casts without scaling, so we must divide by 1000. + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1) { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "16UC1"); + cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); + } else { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "32FC1"); + depth_img = cv_depth->image.clone(); + } + return depth_img; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp new file mode 100644 index 0000000..1ce850f --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -0,0 +1,396 @@ +// valve_pose_ros.cpp +// ROS node: subscribes to depth and detections; publishes valve poses +// and visualizations. +#include "valve_detection_ros/valve_pose_ros.hpp" +#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection_ros/ros_utils.hpp" + +#include + +#include + +#include + +#include + +namespace valve_detection { + +using std::placeholders::_1; +using std::placeholders::_2; + +ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) + : Node("valve_pose_node", options) { + declare_params(); + init_subscriptions(); +} + +void ValvePoseNode::declare_params() { + debug_visualize_ = declare_parameter("debug_visualize"); + iou_duplicate_threshold_ = static_cast( + declare_parameter("iou_duplicate_threshold")); + + const std::string frame_base = + declare_parameter("output_frame_id"); + if (frame_base.empty()) { + throw std::runtime_error("output_frame_id must not be empty"); + } + const std::string drone = + declare_parameter("drone", "nautilus"); + output_frame_id_ = drone + "/" + frame_base; + + // Estimator config params + yolo_w_ = declare_parameter("yolo_img_width"); + yolo_h_ = declare_parameter("yolo_img_height"); + annulus_ratio_ = declare_parameter("annulus_radius_ratio"); + ransac_thresh_ = declare_parameter("plane_ransac_threshold"); + ransac_iters_ = declare_parameter("plane_ransac_max_iterations"); + handle_offset_ = declare_parameter("valve_handle_offset"); + undistort_detections_ = declare_parameter("undistort_detections"); + + // TF frame IDs for the depth-to-color extrinsic lookup. + const std::string depth_frame_base = + declare_parameter("depth_frame_id"); + const std::string color_frame_base = + declare_parameter("color_frame_id"); + depth_frame_id_ = drone + "/" + depth_frame_base; + color_frame_id_ = drone + "/" + color_frame_base; + + // TF2 buffer and listener for extrinsic lookup. + tf_buffer_ = std::make_shared(get_clock()); + tf_listener_ = std::make_shared(*tf_buffer_); + + // Periodically attempt to look up the static transform until it arrives. + extrinsic_timer_ = + create_wall_timer(std::chrono::milliseconds(500), + std::bind(&ValvePoseNode::lookup_extrinsic, this)); + + if (debug_visualize_) { + const auto pose_topic = + declare_parameter("debug.valve_poses_pub_topic"); + const auto depth_cloud_topic = + declare_parameter("debug.depth_cloud_pub_topic"); + const auto depth_color_topic = + declare_parameter("debug.depth_colormap_pub_topic"); + const auto ann_topic = + declare_parameter("debug.annulus_pub_topic"); + const auto pln_topic = + declare_parameter("debug.plane_pub_topic"); + depth_colormap_vmin_ = static_cast( + declare_parameter("debug.depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast( + declare_parameter("debug.depth_colormap_value_max")); + + const auto sensor_qos = + vortex::utils::qos_profiles::reliable_profile(10); + pose_pub_ = create_publisher(pose_topic, + sensor_qos); + depth_cloud_pub_ = create_publisher( + depth_cloud_topic, sensor_qos); + depth_colormap_pub_ = create_publisher( + depth_color_topic, sensor_qos); + annulus_pub_ = create_publisher( + ann_topic, sensor_qos); + plane_pub_ = create_publisher( + pln_topic, sensor_qos); + } + + const auto lm_topic = declare_parameter("landmarks_pub_topic"); + landmark_pub_ = create_publisher( + lm_topic, vortex::utils::qos_profiles::reliable_profile(10)); + + try_activate_detector(); +} + +void ValvePoseNode::lookup_extrinsic() { + if (extrinsic_ready_) + return; + + try { + const auto tf = tf_buffer_->lookupTransform( + color_frame_id_, depth_frame_id_, tf2::TimePointZero); + const auto& t = tf.transform.translation; + const auto& q = tf.transform.rotation; + + const Eigen::Quaternionf quat( + static_cast(q.w), static_cast(q.x), + static_cast(q.y), static_cast(q.z)); + depth_color_extrinsic_.R = quat.toRotationMatrix(); + depth_color_extrinsic_.t = + Eigen::Vector3f(static_cast(t.x), static_cast(t.y), + static_cast(t.z)); + + extrinsic_ready_ = true; + extrinsic_timer_->cancel(); + RCLCPP_INFO(get_logger(), + "Depth-to-color extrinsic loaded from TF (%s -> %s, " + "t=[%.4f, %.4f, %.4f])", + depth_frame_id_.c_str(), color_frame_id_.c_str(), t.x, t.y, + t.z); + + if (detector_) { + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + } else { + try_activate_detector(); + } + } catch (const tf2::TransformException& ex) { + RCLCPP_WARN_THROTTLE( + get_logger(), *get_clock(), 5000, "Waiting for TF: %s -> %s: %s", + depth_frame_id_.c_str(), color_frame_id_.c_str(), ex.what()); + } +} + +void ValvePoseNode::try_activate_detector() { + if (!color_props_ready_ || !depth_props_ready_ || !extrinsic_ready_ || + detector_) + return; + + detector_ = std::make_unique(yolo_w_, yolo_h_, + annulus_ratio_, ransac_thresh_, + ransac_iters_, handle_offset_); + detector_->set_color_image_properties(color_props_); + detector_->set_depth_image_properties(depth_props_); + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + detector_->compute_letterbox_transform(); + RCLCPP_INFO(get_logger(), "Detector initialised"); +} + +void ValvePoseNode::init_subscriptions() { + const auto depth_topic = + declare_parameter("depth_image_sub_topic"); + const auto det_topic = + declare_parameter("detections_sub_topic"); + const auto depth_info_topic = + declare_parameter("depth_image_info_topic"); + const auto color_info_topic = + declare_parameter("color_image_info_topic"); + + const auto info_qos = vortex::utils::qos_profiles::sensor_data_profile(1); + + color_cam_info_sub_ = create_subscription( + color_info_topic, info_qos, + std::bind(&ValvePoseNode::color_camera_info_cb, this, _1)); + depth_cam_info_sub_ = create_subscription( + depth_info_topic, info_qos, + std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); + + const auto data_qos = vortex::utils::qos_profiles::sensor_data_profile(10); + depth_sub_.subscribe(this, depth_topic, data_qos.get_rmw_qos_profile()); + det_sub_.subscribe(this, det_topic, data_qos.get_rmw_qos_profile()); + + // Slop set to 100 ms to accommodate the detection pipeline latency + // (~66 ms observed between depth and detection timestamps). + sync_ = std::make_shared>( + SyncPolicy(20), depth_sub_, det_sub_); + sync_->setMaxIntervalDuration(rclcpp::Duration::from_seconds(0.15)); + sync_->registerCallback(std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); +} + +// One-shot callback that overrides color intrinsics and distortion from the +// camera_info topic. +void ValvePoseNode::color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + color_props_.intr.fx = msg->k[0]; + color_props_.intr.fy = msg->k[4]; + color_props_.intr.cx = msg->k[2]; + color_props_.intr.cy = msg->k[5]; + for (size_t i = 0; i < 5 && i < msg->d.size(); ++i) + color_props_.intr.dist[i] = msg->d[i]; + color_props_.dim.width = static_cast(msg->width); + color_props_.dim.height = static_cast(msg->height); + color_props_ready_ = true; + color_cam_info_sub_.reset(); + RCLCPP_INFO(get_logger(), "Color camera_info received (fx=%.2f fy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy); + if (detector_) { + detector_->set_color_image_properties(color_props_); + detector_->compute_letterbox_transform(); + } else { + try_activate_detector(); + } +} + +// One-shot callback that overrides depth intrinsics from the camera_info topic. +void ValvePoseNode::depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + depth_props_.intr.fx = msg->k[0]; + depth_props_.intr.fy = msg->k[4]; + depth_props_.intr.cx = msg->k[2]; + depth_props_.intr.cy = msg->k[5]; + depth_props_.dim.width = static_cast(msg->width); + depth_props_.dim.height = static_cast(msg->height); + depth_props_ready_ = true; + depth_cam_info_sub_.reset(); + RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", + depth_props_.intr.fx, depth_props_.intr.fy); + if (detector_) { + detector_->set_depth_image_properties(depth_props_); + } else { + try_activate_detector(); + } +} + +std::vector> ValvePoseNode::collect_scored_boxes( + const vision_msgs::msg::Detection2DArray& det) const { + std::vector> boxes; + boxes.reserve(det.detections.size()); + for (const auto& d : det.detections) { + const float score = + d.results.empty() + ? static_cast(d.bbox.size_x * d.bbox.size_y) + : static_cast(d.results[0].hypothesis.score); + boxes.emplace_back(score, to_bbox(d.bbox)); + } + return boxes; +} + +void ValvePoseNode::publish_empty_results( + const std_msgs::msg::Header& header) const { + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array({}, header)); + landmark_pub_->publish(make_landmark_array({}, header)); +} + +cv::Mat ValvePoseNode::build_depth_colormap( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) const { + cv_bridge::CvImageConstPtr cv_raw = cv_bridge::toCvShare(depth, "16UC1"); + cv::Mat depth_f; + cv_raw->image.convertTo(depth_f, CV_32FC1); + const float scale = 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); + cv::Mat depth_8u; + depth_f.convertTo(depth_8u, CV_8UC1, scale, -depth_colormap_vmin_ * scale); + cv::Mat colormap; + cv::applyColorMap(depth_8u, colormap, cv::COLORMAP_TURBO); + return colormap; +} + +void ValvePoseNode::publish_debug( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const std::vector& boxes, + const std::vector& poses, + const pcl::PointCloud& ann_cloud, + const pcl::PointCloud& pln_cloud) const { + std_msgs::msg::Header pcl_header = depth->header; + pcl_header.frame_id = output_frame_id_; + + if (depth_cloud_pub_ && depth_cloud_pub_->get_subscription_count() > 0) { + sensor_msgs::msg::PointCloud2 msg; + pcl::toROSMsg(ann_cloud, msg); + msg.header = pcl_header; + depth_cloud_pub_->publish(msg); + } + + if (depth_colormap_pub_) { + cv::Mat colormap = build_depth_colormap(depth); + for (size_t i = 0; i < boxes.size(); ++i) { + const auto& box = boxes[i]; + + // poses[i] is in the depth camera frame. + // project_color_pixel_to_depth needs Z in the color camera frame, + // so transform the 3-D point first. + const Eigen::Vector3f P_depth(static_cast(poses[i].x), + static_cast(poses[i].y), + static_cast(poses[i].z)); + const Eigen::Vector3f P_color = + depth_color_extrinsic_.R * P_depth + depth_color_extrinsic_.t; + const float Z_color = P_color.z(); + if (Z_color <= 0.0f) + continue; + + const float angle_deg = + box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + for (auto& c : corners) + c = project_color_pixel_to_depth(c.x, c.y, Z_color, + color_props_, depth_props_, + depth_color_extrinsic_); + for (int j = 0; j < 4; ++j) + cv::line(colormap, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", colormap).toImageMsg()); + } + + if (annulus_pub_ && plane_pub_) { + sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; + pcl::toROSMsg(ann_cloud, ann_msg); + pcl::toROSMsg(pln_cloud, pln_msg); + ann_msg.header = pcl_header; + pln_msg.header = pcl_header; + annulus_pub_->publish(ann_msg); + plane_pub_->publish(pln_msg); + } +} + +// Main synchronized callback: runs NMS, estimates poses, and publishes all +// outputs. +void ValvePoseNode::sync_cb( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) { + if (!depth || !det || !detector_) + return; + + const auto scored_boxes = collect_scored_boxes(*det); + const std::vector kept = + filter_duplicate_detections(scored_boxes, iou_duplicate_threshold_); + + std_msgs::msg::Header pose_header = depth->header; + pose_header.frame_id = output_frame_id_; + + if (det->detections.empty()) { + publish_empty_results(pose_header); + if (debug_visualize_ && depth_colormap_pub_) + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", + build_depth_colormap(depth)) + .toImageMsg()); + return; + } + + const cv::Mat depth_img = decode_depth_to_float(depth); + const DetectorMode mode = + debug_visualize_ ? DetectorMode::debug : DetectorMode::standard; + + std::vector raw_boxes; + std::vector poses; + pcl::PointCloud ann_dbg, pln_dbg; + + for (size_t idx : kept) { + const BoundingBox& yolo_box = scored_boxes[idx].second; + // YOLO outputs in 640×640 letterbox space — convert to color image + // space, then optionally correct for lens distortion. + BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); + if (undistort_detections_) + color_box = undistort_bbox(color_box, color_props_.intr); + + const auto result = + detector_->compute_pose_from_depth(depth_img, color_box, mode); + if (!result.valid) + continue; + + // Keep raw_boxes and poses aligned: only push when pose succeeded. + raw_boxes.push_back(color_box); + poses.push_back(result.pose); + if (mode == DetectorMode::debug) { + if (result.annulus_cloud) + ann_dbg += *result.annulus_cloud; + if (result.plane_cloud) + pln_dbg += *result.plane_cloud; + } + } + + if (debug_visualize_) + publish_debug(depth, raw_boxes, poses, ann_dbg, pln_dbg); + + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array(poses, pose_header)); + landmark_pub_->publish(make_landmark_array(poses, pose_header)); +} + +} // namespace valve_detection + +RCLCPP_COMPONENTS_REGISTER_NODE(valve_detection::ValvePoseNode) diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt b/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt new file mode 100644 index 0000000..6f4ee13 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.8) +project(valve_subtype_resolver) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() + +add_compile_options(-Wall -Wextra -Wpedantic) + +find_package(ament_cmake REQUIRED) +find_package(rclcpp REQUIRED) +find_package(rclcpp_components REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(std_msgs REQUIRED) +find_package(tf2 REQUIRED) +find_package(tf2_ros REQUIRED) +find_package(tf2_geometry_msgs REQUIRED) +find_package(vortex_msgs REQUIRED) + +set(LIB_NAME "${PROJECT_NAME}_component") + +add_library(${LIB_NAME} SHARED + src/valve_subtype_resolver.cpp +) + +target_include_directories(${LIB_NAME} PUBLIC + $ + $ +) + +ament_target_dependencies(${LIB_NAME} PUBLIC + rclcpp + rclcpp_components + geometry_msgs + std_msgs + tf2 + tf2_ros + tf2_geometry_msgs + vortex_msgs +) + +rclcpp_components_register_node( + ${LIB_NAME} + PLUGIN "valve_subtype_resolver::ValveSubtypeResolverNode" + EXECUTABLE ${PROJECT_NAME}_node +) + +install(TARGETS ${LIB_NAME} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(DIRECTORY include/ DESTINATION include) +install(DIRECTORY config launch DESTINATION share/${PROJECT_NAME}) + +ament_package() diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml b/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml new file mode 100644 index 0000000..1f03aa2 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml @@ -0,0 +1,32 @@ +/**: + ros__parameters: + # ── Inputs ──────────────────────────────────────────────────────────────── + # Landmarks published by valve_detection (subtype unset, 0). + landmarks_sub_topic: "/valve_landmarks" + + # ── Output ──────────────────────────────────────────────────────────────── + # Republished landmarks with subtype resolved. + landmarks_pub_topic: "/valve_landmarks_typed" + + # ── Robot identity ──────────────────────────────────────────────────────── + # Prepended to all TF frame names (e.g. "moby" → "moby/odom"). + # Override at launch time to switch between robots. + drone: "moby" + + # ── TF ──────────────────────────────────────────────────────────────────── + # Base name of the world/odometry frame (without drone prefix). + # The full frame used for TF lookup is: /. + # The full chain front_camera_depth_optical → base_link → odom must be + # available in the TF tree (static + dynamic). + odom_frame: "odom" + + # ── Subtype classification ───────────────────────────────────────────── + # Threshold on |world_normal.z|. + # + # The valve plane normal is rotated into the world frame. If its Z + # component exceeds this threshold the valve face is classified as: + # VALVE_VERTICAL (1) — normal points mostly up/down (wheel axis is vertical) + # VALVE_HORIZONTAL (2) — normal is mostly in the XY plane (wheel axis is horizontal) + # + # 0.5 corresponds to a 45-degree tilt boundary. + vertical_threshold: 0.5 diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp b/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp new file mode 100644 index 0000000..335ef65 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include + +namespace valve_subtype_resolver { + +class ValveSubtypeResolverNode : public rclcpp::Node { + public: + explicit ValveSubtypeResolverNode(const rclcpp::NodeOptions& options); + + private: + void landmarks_cb(const vortex_msgs::msg::LandmarkArray::SharedPtr msg); + + // Uses the TF tree to rotate the valve plane normal into the world frame, + // then returns VALVE_VERTICAL (1), VALVE_HORIZONTAL (2), or 0 (unknown). + int resolve_subtype(const vortex_msgs::msg::Landmark& landmark); + + // TF2 + std::shared_ptr tf_buffer_; + std::shared_ptr tf_listener_; + + rclcpp::Subscription::SharedPtr + landmark_sub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + + std::string world_frame_; + + // Threshold on |world_normal.z|: above this the valve is VALVE_VERTICAL + // (normal points up/down), below it VALVE_HORIZONTAL (normal in XY plane). + double vertical_threshold_; +}; + +} // namespace valve_subtype_resolver diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py b/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py new file mode 100644 index 0000000..5649d23 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py @@ -0,0 +1,34 @@ +import os + +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch_ros.actions import Node + + +def generate_launch_description(): + config = os.path.join( + get_package_share_directory("valve_subtype_resolver"), + "config", + "valve_subtype_resolver_params.yaml", + ) + + drone_arg = DeclareLaunchArgument( + "drone", + default_value="moby", + description="Robot name, prepended to TF frame IDs (e.g. moby, orca)", + ) + + return LaunchDescription( + [ + drone_arg, + Node( + package="valve_subtype_resolver", + executable="valve_subtype_resolver_node", + name="valve_subtype_resolver_node", + parameters=[config, {"drone": LaunchConfiguration("drone")}], + output="screen", + ), + ] + ) diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml b/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml new file mode 100644 index 0000000..a9924dd --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml @@ -0,0 +1,26 @@ + + + + valve_subtype_resolver + 0.1.0 + Subscribes to untyped valve landmarks; uses the TF tree to rotate the valve normal into the world frame and republishes with the resolved subtype (VALVE_VERTICAL / VALVE_HORIZONTAL). + you + Apache-2.0 + + ament_cmake + + rclcpp + rclcpp_components + geometry_msgs + std_msgs + tf2 + tf2_ros + tf2_geometry_msgs + vortex_msgs + + rosidl_default_runtime + + + ament_cmake + + diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp b/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp new file mode 100644 index 0000000..7f24e1c --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp @@ -0,0 +1,108 @@ +// valve_subtype_resolver.cpp +// Subscribes to valve landmarks (subtype unset) published by valve_detection. +// For each landmark, looks up the full transform from the landmark frame to the +// world frame via TF2, rotates the valve plane normal into the world frame, and +// classifies the valve as VALVE_VERTICAL or VALVE_HORIZONTAL based on how much +// the normal deviates from the world Z-axis. Republishes with subtype set. +#include "valve_subtype_resolver/valve_subtype_resolver.hpp" + +#include +#include + +namespace valve_subtype_resolver { + +using std::placeholders::_1; + +ValveSubtypeResolverNode::ValveSubtypeResolverNode( + const rclcpp::NodeOptions& options) + : Node("valve_subtype_resolver_node", options) { + const std::string drone = declare_parameter("drone", "moby"); + const std::string odom_frame_base = + declare_parameter("odom_frame", "odom"); + world_frame_ = drone + "/" + odom_frame_base; + vertical_threshold_ = declare_parameter("vertical_threshold", 0.5); + + const auto landmarks_in = declare_parameter( + "landmarks_sub_topic", "/valve_landmarks"); + const auto landmarks_out = declare_parameter( + "landmarks_pub_topic", "/valve_landmarks_typed"); + + tf_buffer_ = std::make_shared(get_clock()); + tf_listener_ = std::make_shared(*tf_buffer_); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + landmark_sub_ = create_subscription( + landmarks_in, qos, + std::bind(&ValveSubtypeResolverNode::landmarks_cb, this, _1)); + + landmark_pub_ = + create_publisher(landmarks_out, qos); +} + +void ValveSubtypeResolverNode::landmarks_cb( + const vortex_msgs::msg::LandmarkArray::SharedPtr msg) { + vortex_msgs::msg::LandmarkArray out = *msg; + for (auto& lm : out.landmarks) + lm.subtype.value = static_cast(resolve_subtype(lm)); + landmark_pub_->publish(out); +} + +// Resolves the valve subtype by rotating the valve plane normal into the world +// frame and checking its Z component. +// +// The valve plane normal is the Z-axis of the landmark orientation quaternion. +// It is transformed from the landmark frame into the world frame via a TF2 +// lookup that traverses the full chain: +// /front_camera_depth_optical → /front_camera_depth +// → /front_camera_link → /base_link → /odom +// +// Classification: +// |world_nz| >= vertical_threshold → VALVE_VERTICAL (1) +// The normal points mostly up or down; the valve face is horizontal, +// meaning the valve wheel spins around a vertical axis. +// |world_nz| < vertical_threshold → VALVE_HORIZONTAL (2) +// The normal is mostly in the XY plane; the valve face is vertical, +// meaning the valve wheel spins around a horizontal axis. +// +// Returns 0 (unknown) if the TF lookup fails. +int ValveSubtypeResolverNode::resolve_subtype( + const vortex_msgs::msg::Landmark& landmark) { + geometry_msgs::msg::TransformStamped transform; + try { + transform = tf_buffer_->lookupTransform( + world_frame_, landmark.header.frame_id, tf2::TimePointZero); + } catch (const tf2::TransformException& ex) { + RCLCPP_WARN_THROTTLE(get_logger(), *get_clock(), 1000, + "TF lookup failed: %s", ex.what()); + return 0; + } + + // Extract the valve plane normal (Z-axis of the landmark orientation). + // Rotating [0,0,1] by quaternion q gives: + // nx = 2(qx*qz + qw*qy) + // ny = 2(qy*qz - qw*qx) + // nz = 1 - 2(qx^2 + qy^2) + const auto& lq = landmark.pose.pose.orientation; + geometry_msgs::msg::Vector3Stamped normal_cam; + normal_cam.header = landmark.header; + normal_cam.vector.x = 2.0 * (lq.x * lq.z + lq.w * lq.y); + normal_cam.vector.y = 2.0 * (lq.y * lq.z - lq.w * lq.x); + normal_cam.vector.z = 1.0 - 2.0 * (lq.x * lq.x + lq.y * lq.y); + + // Rotate the normal into the world frame. + // tf2::doTransform on a Vector3Stamped applies only the rotation. + geometry_msgs::msg::Vector3Stamped normal_world; + tf2::doTransform(normal_cam, normal_world, transform); + + using ST = vortex_msgs::msg::LandmarkSubtype; + if (std::abs(normal_world.vector.z) >= vertical_threshold_) + return ST::VALVE_VERTICAL; + return ST::VALVE_HORIZONTAL; +} + +} // namespace valve_subtype_resolver + +RCLCPP_COMPONENTS_REGISTER_NODE( + valve_subtype_resolver::ValveSubtypeResolverNode)