From 49d746760d550c85e103fa13bd57349361b817f5 Mon Sep 17 00:00:00 2001 From: jens Date: Wed, 25 Mar 2026 22:14:23 +0100 Subject: [PATCH 1/7] feat: add valve-detection and valve-subtype-resolver to vortex-cv/mission/tacc/visual_inspection --- .../valve_detection/CMakeLists.txt | 82 +++ .../config/valve_detection_params.yaml | 134 +++++ .../depth_image_processing.hpp | 84 +++ .../valve_detection/pose_estimator.hpp | 90 +++ .../include/valve_detection/types.hpp | 46 ++ .../include/valve_detection_ros/ros_utils.hpp | 35 ++ .../valve_detection_ros/valve_pose_ros.hpp | 93 ++++ .../launch/valve_detection.launch.py | 52 ++ .../valve_detection/package.xml | 30 + .../src/depth_image_processing.cpp | 515 ++++++++++++++++++ .../valve_detection/src/pose_estimator.cpp | 374 +++++++++++++ .../valve_detection/src/ros_utils.cpp | 82 +++ .../valve_detection/src/valve_pose_ros.cpp | 336 ++++++++++++ .../valve_subtype_resolver/CMakeLists.txt | 57 ++ .../config/valve_subtype_resolver_params.yaml | 32 ++ .../valve_subtype_resolver.hpp | 41 ++ .../launch/valve_subtype_resolver.launch.py | 34 ++ .../valve_subtype_resolver/package.xml | 26 + .../src/valve_subtype_resolver.cpp | 108 ++++ 19 files changed, 2251 insertions(+) create mode 100644 mission/tacc/visual_inspection/valve_detection/CMakeLists.txt create mode 100644 mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py create mode 100644 mission/tacc/visual_inspection/valve_detection/package.xml create mode 100644 mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp create mode 100644 mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp create mode 100644 mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp create mode 100644 mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/package.xml create mode 100644 mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp diff --git a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt new file mode 100644 index 0000000..21c3cd5 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt @@ -0,0 +1,82 @@ +cmake_minimum_required(VERSION 3.8) +project(valve_detection) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 20) +endif() + +add_compile_options(-Wall -Wextra -Wpedantic) + +find_package(ament_cmake REQUIRED) + +find_package(rclcpp REQUIRED) +find_package(rclcpp_components REQUIRED) +find_package(message_filters REQUIRED) + +find_package(sensor_msgs REQUIRED) +find_package(vision_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(std_msgs REQUIRED) +find_package(cv_bridge REQUIRED) +find_package(OpenCV REQUIRED) + +find_package(PCL REQUIRED) +find_package(pcl_conversions REQUIRED) + +find_package(vortex_msgs REQUIRED) +find_package(vortex_utils REQUIRED) + +include_directories(include) + +set(LIB_NAME "${PROJECT_NAME}_component") + +add_library(${LIB_NAME} SHARED + src/depth_image_processing.cpp + src/pose_estimator.cpp + src/ros_utils.cpp + src/valve_pose_ros.cpp +) + +target_link_libraries(${LIB_NAME} PUBLIC + ${OpenCV_LIBS} + ${PCL_LIBRARIES} +) + +target_include_directories(${LIB_NAME} PUBLIC + $ + $ + ${OpenCV_INCLUDE_DIRS} + ${PCL_INCLUDE_DIRS} +) + +ament_target_dependencies(${LIB_NAME} PUBLIC + rclcpp + rclcpp_components + message_filters + sensor_msgs + vision_msgs + geometry_msgs + std_msgs + cv_bridge + OpenCV + pcl_conversions + vortex_msgs + vortex_utils +) + +rclcpp_components_register_node( + ${LIB_NAME} + PLUGIN "valve_detection::ValvePoseNode" + EXECUTABLE ${PROJECT_NAME}_node +) + +install(TARGETS ${LIB_NAME} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(DIRECTORY include/ DESTINATION include) +install(DIRECTORY config launch DESTINATION share/${PROJECT_NAME}) + +ament_package() diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml new file mode 100644 index 0000000..fc44fa6 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -0,0 +1,134 @@ +/**: + ros__parameters: + # ── Inputs ──────────────────────────────────────────────────────────────── + # depth_image_sub_topic: "/realsense/D555_409122300281_Depth" + depth_image_sub_topic: "/camera/camera/depth/image_rect_raw" + detections_sub_topic: "/yolo_obb_object_detection/detections" + + # Depth camera-info topic (optional – used to load depth intrinsics if published). + # Using the realsense depth camera_info since the depth image is from the realsense bag. + depth_image_info_topic: "/camera/camera/depth/camera_info" + + # Color camera-info topic (optional – overrides the fallback params below if published). + color_image_info_topic: "/moby/front_camera/camera_info" + + # Color camera intrinsics – fallback values used when color_image_info_topic is not published. + # Source: /camera/camera/color/camera_info + color_fx: 452.121521 + color_fy: 451.737976 + color_cx: 438.254059 + color_cy: 248.703217 + color_image_width: 896 + color_image_height: 504 + + # Distortion coefficients [k1, k2, p1, p2, k3] (plumb_bob model) + color_d1: -0.0548105500638485 + color_d2: 0.0597584918141365 + color_d3: 0.000486430013552308 + color_d4: 0.00031599000794813 + color_d5: -0.0192314591258764 + + # Depth camera intrinsics (fallback when depth camera_info is not published) + # Source: /camera/camera/depth/camera_info + depth_fx: 449.449707 + depth_fy: 449.449707 + depth_cx: 444.819946 + depth_cy: 248.226578 + depth_image_width: 896 + depth_image_height: 504 + + # ── Depth-to-color extrinsic ────────────────────────────────────────────── + # Translation (metres) of the depth origin expressed in the color camera + # frame. Rotation is assumed to be identity (both optical frames share the + # same orientation for the RealSense D555). + # Source: ros2 topic echo /realsense/extrinsics/depth_to_color + depth_to_color_tx: -0.059 + depth_to_color_ty: 0.0 + depth_to_color_tz: 0.0 + + # ── Outputs ─────────────────────────────────────────────────────────────── + # Primary output consumed by downstream nodes (e.g. EKF, mission planner). + landmarks_pub_topic: "/valve_landmarks" + + # ── YOLO letterbox reference size ───────────────────────────────────────── + yolo_img_width: 640 + yolo_img_height: 640 + + # ── Annulus and plane fit ───────────────────────────────────────────────── + # Fraction of the bounding-box half-size used as the annulus ring radius + # (unitless, 0–1). At 0.8 the ring samples the outer 80 % of the box, + # avoiding the central hub while staying inside the valve rim. + # Tuned empirically on recorded valve footage. + annulus_radius_ratio: 0.8 + + # Maximum point-to-plane distance (metres) for a depth point to be counted + # as a RANSAC inlier. 0.01 m (1 cm) tolerates typical RealSense depth noise + # at ~0.5–1 m range without accepting gross outliers. + plane_ransac_threshold: 0.01 + + # Number of RANSAC iterations. 50 gives >99 % probability of finding a + # good plane when the inlier ratio is ≥ 50 %, while keeping CPU load low. + plane_ransac_max_iterations: 50 + + # ── Duplicate-detection suppression ────────────────────────────────────── + # Two bounding boxes are considered the same valve when their + # axis-aligned IoU exceeds this threshold OR when the smaller box + # overlaps the larger by more than 70 % (intersection-over-minimum). + # Maximum 2 landmarks are published per frame. + iou_duplicate_threshold: 0.5 + + # ── Pose offset ─────────────────────────────────────────────────────────── + # Shift the estimated position along the plane normal by this amount (metres). + # Value taken from the official valve CAD file: distance between the valve + # face frame and the inside length of the handle is 0.065 m. + valve_handle_offset: 0.065 + + # ── Behaviour toggles ───────────────────────────────────────────────────── + debug_visualize: false # enable all debug visualization topics + + # ── Debug visualization (only active when debug_visualize: true) ────────── + debug: + # All detected valve poses in one PoseArray – useful for visualising + # multiple simultaneous detections in Foxglove. + valve_poses_pub_topic: "/valve_poses" + + # Annulus point cloud (points sampled inside the bounding-box ring used + # for plane fitting) projected into 3D – useful for verifying the depth + # extraction region. + depth_cloud_pub_topic: "/valve_depth_cloud" + + # Depth image false-coloured with COLORMAP_TURBO and OBB outlines drawn + # on top – useful for checking detection alignment on the depth frame. + depth_colormap_pub_topic: "/valve_detection_depth_colormap" + # Colormap scaling range in raw 16UC1 depth units (millimetres). + # Min ~0 mm clips near-zero noise; max ~1125 mm (≈1.1 m) covers the + # expected close-range operating distance for valve interaction. + depth_colormap_value_min: 0.1 + depth_colormap_value_max: 1125.5 + + # Points sampled from the annulus region of the bounding box, collected + # as a byproduct of pose estimation – useful for verifying the depth + # extraction region in 3D. + annulus_pub_topic: "/bbx_annulus_pcl" + + # RANSAC plane inlier points, collected as a byproduct of pose estimation + # – useful for checking that the fitted plane normal (and therefore the + # final pose orientation) is correct. + plane_pub_topic: "/annulus_plane_pcl" + + # ── Landmark fields ─────────────────────────────────────────────────────── + landmark_type: 5 # VALVE + # subtype is intentionally unset here (0 = UNKNOWN); resolved downstream + # by the valve_subtype_resolver node running on the PI. + + # ── Output frame ────────────────────────────────────────────────────────── + # TF frame used as the frame_id for all published poses and landmarks. + # Must match the depth optical frame in the robot's TF tree. + # Depth is from the realsense bag, pretending to be moby/depth_camera/image_depth. + # See: auv_setup/description/moby.urdf.xacro + output_frame_id: "front_camera_depth_optical" + input_color_frame_id: "front_camera_color_optical" + + # Prepended to all TF frame names (e.g. "moby" → "moby/front_camera_depth_optical"). + # Override at launch time to switch between robots: drone:=orca + drone: "moby" diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp new file mode 100644 index 0000000..ae6e582 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +void project_pixel_to_point(const int u, + const int v, + const float depth, + const double fx, + const double fy, + const double cx, + const double cy, + pcl::PointXYZ& out); + +void extract_annulus_pcl( + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox, // in ORIGINAL image pixels + const ImageProperties& img_props, + const float annulus_radius_ratio, // inner radius = outer*ratio + pcl::PointCloud::Ptr& out); + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, then checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // annulus defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + const float annulus_radius_ratio, + pcl::PointCloud::Ptr& out); + +// Extracts all valid depth points whose color-frame projection falls inside +// the oriented bounding box. Output points are in the color camera frame. +void extract_bbox_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth( + const cv::Mat& depth_image, // CV_32FC1 meters, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +// Project a color image pixel to depth image coordinates. +// u_c, v_c: pixel coordinates in the color image. +// Z: depth of the point in the color camera frame (metres). +cv::Point2f project_color_pixel_to_depth(const float u_c, + const float v_c, + const float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); + +// Returns undistorted copy of bbox (center_x/y corrected for lens distortion). +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr); + +// Greedy NMS: returns indices of kept detections (max 2). +// scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin +// (intersection / min-area) or IoU exceeds iou_duplicate_threshold. +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold); + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp new file mode 100644 index 0000000..d734a3d --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp @@ -0,0 +1,90 @@ +#pragma once + +#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/types.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +class PoseEstimator { + public: + PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset); + + void set_color_image_properties(const ImageProperties& props); + void set_depth_image_properties(const ImageProperties& props); + void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); + + void calculate_letterbox_padding(); + BoundingBox transform_bounding_box(const BoundingBox& bbox) const; + + PoseResult compute_pose_from_depth( + const cv::Mat& depth_image, // CV_32FC1 meters + const BoundingBox& bbox_org, // in original image pixels + pcl::PointCloud::Ptr annulus_dbg, + pcl::PointCloud::Ptr plane_dbg, + bool debug_visualize) const; + + private: + bool segment_plane(const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coeff, + pcl::PointIndices::Ptr& inliers) const; + + Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; + Eigen::Vector3f compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction) const; + Eigen::Vector3f find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coeff, + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin = Eigen::Vector3f::Zero()) const; + Eigen::Vector3f shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const; + Eigen::Matrix3f create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle) const; + // Variant that works entirely in depth frame: color rays are rotated by + // R_dc = R^T before intersecting the depth-frame plane. + Eigen::Matrix3f create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_dc) const; + + ImageProperties color_image_properties_{}; + ImageProperties depth_image_properties_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + bool has_depth_props_{false}; + + int yolo_img_width_; + int yolo_img_height_; + float annulus_radius_ratio_; + float plane_ransac_threshold_; + int plane_ransac_max_iterations_; + float valve_handle_offset_; + + double letterbox_scale_factor_{1.0}; + double letterbox_pad_x_{0}; + double letterbox_pad_y_{0}; + + mutable Eigen::Vector3f filter_direction_{1, 0, 0}; +}; + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp new file mode 100644 index 0000000..c2c8067 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include +#include + +namespace valve_detection { + +struct CameraIntrinsics { + double fx{0}, fy{0}, cx{0}, cy{0}; + // Plumb-bob distortion coefficients [k1, k2, p1, p2, k3]. + std::array dist{0, 0, 0, 0, 0}; +}; + +struct ImageDimensions { + int width{0}, height{0}; +}; + +struct ImageProperties { + CameraIntrinsics intr; + ImageDimensions dim; +}; + +struct BoundingBox { + float center_x{0}; + float center_y{0}; + float size_x{0}; + float size_y{0}; + float theta{0}; // radians +}; + +using Pose = vortex::utils::types::Pose; + +struct PoseResult { + Pose result; + bool result_valid{false}; +}; + +// Rigid transform from depth camera frame to color camera frame. +// Rotation R and translation t satisfy: P_color = R * P_depth + t +struct DepthColorExtrinsic { + Eigen::Matrix3f R{Eigen::Matrix3f::Identity()}; + Eigen::Vector3f t{Eigen::Vector3f::Zero()}; +}; + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp new file mode 100644 index 0000000..6f080bc --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "valve_detection/types.hpp" +#include "vortex_msgs/msg/landmark_array.hpp" + +#include + +namespace valve_detection { + +// Converts a ROS BoundingBox2D message to the internal BoundingBox type. +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b); + +// Builds a PoseArray message from a list of poses. +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, + const std_msgs::msg::Header& header); + +// Builds a LandmarkArray message from a list of poses. +// Subtype is always 0 (unset); resolved downstream by valve_subtype_resolver. +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, + const std_msgs::msg::Header& header, + int type); + +// Decodes a ROS depth image to a CV_32FC1 mat in metres. +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth); + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp new file mode 100644 index 0000000..06d6a08 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -0,0 +1,93 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/pose_estimator.hpp" +#include "valve_detection/types.hpp" + +#include +#include +#include +#include "vortex_msgs/msg/landmark_array.hpp" +#include "vortex_msgs/msg/landmark_subtype.hpp" +#include "vortex_msgs/msg/landmark_type.hpp" + +namespace valve_detection { + +class ValvePoseNode : public rclcpp::Node { + public: + explicit ValvePoseNode(const rclcpp::NodeOptions& options); + + private: + // Node setup — called from constructor. + void setup_estimator(); + void setup_publishers(const rclcpp::QoS& qos); + void setup_subscribers(const rclcpp::QoS& qos); + + // Camera info callbacks (one-shot, override config fallback). + void color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); + void depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg); + + // Main synchronized callback: depth + detections. + void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); + + using SyncPolicy = message_filters::sync_policies::ApproximateTime< + sensor_msgs::msg::Image, + vision_msgs::msg::Detection2DArray>; + + // params + bool debug_visualize_; + float iou_duplicate_threshold_; + std::string output_frame_id_; + int landmark_type_; + + // camera data (owned by node, passed to estimator and depth functions) + ImageProperties color_props_{}; + ImageProperties depth_props_{}; + DepthColorExtrinsic depth_color_extrinsic_{}; + + // estimator + std::unique_ptr detector_; + + // subs + rclcpp::Subscription::SharedPtr + color_cam_info_sub_; + rclcpp::Subscription::SharedPtr + depth_cam_info_sub_; + message_filters::Subscriber depth_sub_; + message_filters::Subscriber det_sub_; + std::shared_ptr> sync_; + + // pubs + rclcpp::Publisher::SharedPtr pose_pub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + rclcpp::Publisher::SharedPtr depth_colormap_pub_; + rclcpp::Publisher::SharedPtr annulus_pub_; + rclcpp::Publisher::SharedPtr plane_pub_; + rclcpp::Publisher::SharedPtr + depth_cloud_pub_; + + float depth_colormap_vmin_; + float depth_colormap_vmax_; +}; + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py new file mode 100644 index 0000000..e62e731 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py @@ -0,0 +1,52 @@ +import os + +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch_ros.actions import ComposableNodeContainer +from launch_ros.descriptions import ComposableNode + + +def generate_launch_description(): + cfg = os.path.join( + get_package_share_directory('valve_detection'), + 'config', + 'valve_detection_params.yaml', + ) + + drone_arg = DeclareLaunchArgument( + 'drone', + default_value='moby', + description='Robot name, prepended to TF frame IDs (e.g. moby, orca)', + ) + + debug_visualize_arg = DeclareLaunchArgument( + 'debug_visualize', + default_value='false', + description='Enable debug visualization topics (valve_pose, valve_poses, annulus, plane)', + ) + + container = ComposableNodeContainer( + name='valve_detection_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + ComposableNode( + package='valve_detection', + plugin='valve_detection::ValvePoseNode', + name='valve_pose_node', + parameters=[ + cfg, + { + 'drone': LaunchConfiguration('drone'), + 'debug_visualize': LaunchConfiguration('debug_visualize'), + }, + ], + ) + ], + output='screen', + ) + + return LaunchDescription([drone_arg, debug_visualize_arg, container]) diff --git a/mission/tacc/visual_inspection/valve_detection/package.xml b/mission/tacc/visual_inspection/valve_detection/package.xml new file mode 100644 index 0000000..9be782b --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/package.xml @@ -0,0 +1,30 @@ + + + + valve_detection + 0.1.0 + Valve pose estimation from YOLOv26 OBB detections + depth image. + you + Apache-2.0 + + ament_cmake + rosidl_default_generators + + rclcpp + rclcpp_components + message_filters + sensor_msgs + vision_msgs + geometry_msgs + std_msgs + cv_bridge + pcl_conversions + vortex_msgs + vortex_utils + + rosidl_default_runtime + + + ament_cmake + + diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp new file mode 100644 index 0000000..2c3feae --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -0,0 +1,515 @@ +// depth_image_processing.cpp +// Depth-to-3D back-projection, point cloud extraction, and color-to-depth pixel +// projection. +#include "valve_detection/depth_image_processing.hpp" +#include +#include +#include +#include +#include +#include + +namespace valve_detection { + +// Back-projects a depth pixel (u, v, depth) to a 3D point using camera +// intrinsics. +void project_pixel_to_point(const int u, + const int v, + const float depth, + const double fx, + const double fy, + const double cx, + const double cy, + pcl::PointXYZ& out) { + if (depth <= 0.0f || std::isnan(depth) || std::isinf(depth)) { + out.x = out.y = out.z = std::numeric_limits::quiet_NaN(); + return; + } + out.x = static_cast((u - cx) * depth / fx); + out.y = static_cast((v - cy) * depth / fy); + out.z = depth; +} + +// Extracts depth points that fall inside an elliptic annulus around the bbox +// center (no alignment). +void extract_annulus_pcl(const cv::Mat& depth_image, + const BoundingBox& bbox, + const ImageProperties& img_props, + const float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + const float cx = bbox.center_x; + const float cy = bbox.center_y; + // Outer ellipse half-extents match the bounding box. + const float outer_rx = bbox.size_x * 0.5f; + const float outer_ry = bbox.size_y * 0.5f; + + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + // Inner ellipse is scaled down by annulus_radius_ratio, creating a ring + // that avoids the central hub and samples only the valve rim. + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // Bounding rectangle of the outer ellipse in pixel space. + const int u0 = static_cast(std::floor(cx - outer_rx)); + const int u1 = static_cast(std::ceil(cx + outer_rx)); + const int v0 = static_cast(std::floor(cy - outer_ry)); + const int v1 = static_cast(std::ceil(cy + outer_ry)); + + for (int v = v0; v <= v1; ++v) { + if (v < 0 || v >= depth_image.rows) + continue; + for (int u = u0; u <= u1; ++u) { + if (u < 0 || u >= depth_image.cols) + continue; + + // Normalise pixel offset by the outer half-extents; the point is + // inside the outer ellipse when the sum of squares ≤ 1. + const float dxo = (u - cx) / outer_rx; + const float dyo = (v - cy) / outer_ry; + const bool inside_outer = (dxo * dxo + dyo * dyo) <= 1.0f; + + // Same test for the inner ellipse; keep only points outside it + // to form the annular ring. + const float dxi = (u - cx) / inner_rx; + const float dyi = (v - cy) / inner_ry; + const bool outside_inner = (dxi * dxi + dyi * dyi) > 1.0f; + + if (!inside_outer || !outside_inner) + continue; + + // Back-project the depth pixel to a 3-D point in camera space. + const float z = depth_image.at(v, u); + pcl::PointXYZ p; + project_pixel_to_point(u, v, z, img_props.intr.fx, + img_props.intr.fy, img_props.intr.cx, + img_props.intr.cy, p); + + // Discard invalid points (zero/NaN depth produces NaN coords). + if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { + out->points.push_back(p); + } + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, and checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + const float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Annulus defined in color-image coordinates. + const float cx_c = color_bbox.center_x; + const float cy_c = color_bbox.center_y; + const float outer_rx = color_bbox.size_x * 0.5f; + const float outer_ry = color_bbox.size_y * 0.5f; + + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + // Inner ellipse scaled down by annulus_radius_ratio, forming the ring. + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // The depth and color images have different resolutions and focal lengths. + // Scale color-space bbox bounds into depth-image coordinates to get a + // coarse candidate region, then add a margin to cover the lateral shift + // introduced by the depth-to-color translation. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + constexpr int kMargin = 30; + + const int u0_d = + std::max(0, static_cast((cx_c - outer_rx) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((cx_c + outer_rx) * scale) + kMargin); + const int v0_d = + std::max(0, static_cast((cy_c - outer_ry) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((cy_c + outer_ry) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project depth pixel to 3-D point in depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Apply extrinsic to move the point into the color camera frame, + // where the annulus is defined. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) + continue; + + // Project the color-frame point onto the color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Normalise offset by outer half-extents; keep only points inside + // the outer ellipse (sum of squares ≤ 1). + const float dxo = (u_c - cx_c) / outer_rx; + const float dyo = (v_c - cy_c) / outer_ry; + if (dxo * dxo + dyo * dyo > 1.0f) + continue; // outside outer ellipse + + // Discard points inside the inner ellipse to form the ring. + const float dxi = (u_c - cx_c) / inner_rx; + const float dyi = (v_c - cy_c) / inner_ry; + if (dxi * dxi + dyi * dyi <= 1.0f) + continue; // inside inner ellipse + + // Store the point in the color camera frame. + pcl::PointXYZ p; + p.x = P_c.x(); + p.y = P_c.y(); + p.z = P_c.z(); + out->points.push_back(p); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Extracts depth points whose color-frame projection falls inside the oriented +// bounding box. +void extract_bbox_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Pre-compute the OBB axes in color-image space for a fast rotated-rect + // test. + const float cos_t = std::cos(color_bbox.theta); + const float sin_t = std::sin(color_bbox.theta); + const float half_w = color_bbox.size_x * 0.5f; + const float half_h = color_bbox.size_y * 0.5f; + + // Approximate search region in depth-image space. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const float r = std::sqrt(half_w * half_w + half_h * half_h); + constexpr int kMargin = 30; + + const int u0_d = std::max( + 0, static_cast((color_bbox.center_x - r) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((color_bbox.center_x + r) * scale) + kMargin); + const int v0_d = std::max( + 0, static_cast((color_bbox.center_y - r) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((color_bbox.center_y + r) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Transform to color camera frame. + const Eigen::Vector3f P_c = extr.R * P_d + extr.t; + if (P_c.z() <= 0.0f) + continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Rotate into the OBB local frame and test against the + // half-extents. + const float dx = u_c - color_bbox.center_x; + const float dy = v_c - color_bbox.center_y; + const float local_x = cos_t * dx + sin_t * dy; + const float local_y = -sin_t * dx + cos_t * dy; + + if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) + continue; + + out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Sample a representative depth near the bbox center to project the color + // OBB corners into depth image space. The valve is roughly flat, so a + // single Z gives a good approximation for all 4 corners. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const int u_c_d = std::clamp(static_cast(color_bbox.center_x * scale), + 0, depth_image.cols - 1); + const int v_c_d = std::clamp(static_cast(color_bbox.center_y * scale), + 0, depth_image.rows - 1); + + float Z_est = 0.0f; + constexpr int kSampleRadius = 5; + for (int dv = -kSampleRadius; dv <= kSampleRadius && Z_est <= 0.0f; ++dv) { + for (int du = -kSampleRadius; du <= kSampleRadius && Z_est <= 0.0f; + ++du) { + const int u = std::clamp(u_c_d + du, 0, depth_image.cols - 1); + const int v = std::clamp(v_c_d + dv, 0, depth_image.rows - 1); + const float z = depth_image.at(v, u); + if (z > 0.0f && !std::isnan(z) && !std::isinf(z)) + Z_est = z; + } + } + if (Z_est <= 0.0f) + return; + + // Project the 4 color OBB corners into depth image coordinates. + const float angle_deg = + color_bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect color_rrect( + cv::Point2f(color_bbox.center_x, color_bbox.center_y), + cv::Size2f(color_bbox.size_x, color_bbox.size_y), angle_deg); + cv::Point2f color_corners[4]; + color_rrect.points(color_corners); + + std::vector depth_corners(4); + for (int i = 0; i < 4; ++i) { + depth_corners[i] = + project_color_pixel_to_depth(color_corners[i].x, color_corners[i].y, + Z_est, color_props, depth_props, extr); + } + + // Fit a rotated rect to the 4 projected corners in depth image space. + const cv::RotatedRect depth_rrect = cv::minAreaRect(depth_corners); + const float angle_d = depth_rrect.angle * static_cast(M_PI) / 180.0f; + const float cos_d = std::cos(angle_d); + const float sin_d = std::sin(angle_d); + const float half_w_d = depth_rrect.size.width * 0.5f; + const float half_h_d = depth_rrect.size.height * 0.5f; + const float cx_d = depth_rrect.center.x; + const float cy_d = depth_rrect.center.y; + + // Bounding rectangle of the depth OBB. + const float r_d = std::sqrt(half_w_d * half_w_d + half_h_d * half_h_d); + const int u0_d = std::max(0, static_cast(cx_d - r_d) - 1); + const int u1_d = + std::min(depth_image.cols - 1, static_cast(cx_d + r_d) + 1); + const int v0_d = std::max(0, static_cast(cy_d - r_d) - 1); + const int v1_d = + std::min(depth_image.rows - 1, static_cast(cy_d + r_d) + 1); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + // Test against the depth-image OBB — no matrix multiply. + const float dx = u_d - cx_d; + const float dy = v_d - cy_d; + if (std::abs(cos_d * dx + sin_d * dy) > half_w_d || + std::abs(-sin_d * dx + cos_d * dy) > half_h_d) + continue; + + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to 3D depth frame. + out->points.push_back( + {static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx), + static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy), + z_d}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Projects a color image pixel to depth image coordinates using the full +// intrinsic + extrinsic pipeline. +cv::Point2f project_color_pixel_to_depth(const float u_c, + const float v_c, + const float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr) { + // Back-project color pixel → 3-D point in color camera frame. + const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z / + static_cast(color_props.intr.fx); + const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z / + static_cast(color_props.intr.fy); + const Eigen::Vector3f Pc(Xc, Yc, Z); + + // Transform to depth camera frame: P_depth = R^T * (P_color - t) + const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); + + if (Pd.z() <= 0.0f) + return {u_c, v_c}; // degenerate – return original + + // Project into depth image. + const float u_d = + static_cast(depth_props.intr.fx) * Pd.x() / Pd.z() + + static_cast(depth_props.intr.cx); + const float v_d = + static_cast(depth_props.intr.fy) * Pd.y() / Pd.z() + + static_cast(depth_props.intr.cy); + return {u_d, v_d}; +} + +// Corrects the bbox center for lens distortion using the given intrinsics. +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, + intr.fy, intr.cy, 0, 0, 1); + const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], + intr.dist[2], intr.dist[3], intr.dist[4]); + // Build a RotatedRect and extract all 4 corners. + const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), + cv::Size2f(bbox.size_x, bbox.size_y), angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + + // Undistort all 4 corners. + std::vector pts(corners, corners + 4); + std::vector undistorted; + cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); + + // Refit an OBB to the undistorted corners. + cv::RotatedRect fitted = cv::minAreaRect(undistorted); + + BoundingBox result = bbox; + result.center_x = fitted.center.x; + result.center_y = fitted.center.y; + result.size_x = fitted.size.width; + result.size_y = fitted.size.height; + result.theta = fitted.angle * static_cast(M_PI) / 180.0f; + return result; +} + +// Greedy NMS: sorts by score descending, keeps at most 2 non-overlapping boxes. +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold) { + const size_t n = scored_boxes.size(); + if (n == 0) + return {}; + + std::vector> order; + order.reserve(n); + for (size_t i = 0; i < n; ++i) + order.emplace_back(scored_boxes[i].first, i); + std::sort(order.begin(), order.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < order.size() && kept.size() < 2; ++si) { + const size_t i = order[si].second; + if (suppressed[i]) + continue; + + kept.push_back(i); + + const BoundingBox& bi = scored_boxes[i].second; + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + for (size_t sj = si + 1; sj < order.size(); ++sj) { + const size_t j = order[sj].second; + if (suppressed[j]) + continue; + + const BoundingBox& bj = scored_boxes[j].second; + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) + continue; + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = inter / std::min(ai, aj); + + if (iou > iou_duplicate_threshold || iom > 0.7f) + suppressed[j] = true; + } + } + + return kept; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp new file mode 100644 index 0000000..bfd0b85 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp @@ -0,0 +1,374 @@ +// pose_estimator.cpp +// RANSAC plane fitting, normal/ray-plane intersection, and 3D pose computation. +#include "valve_detection/pose_estimator.hpp" + +namespace valve_detection { + +// Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle +// offset. +PoseEstimator::PoseEstimator(int yolo_img_width, + int yolo_img_height, + float annulus_radius_ratio, + float plane_ransac_threshold, + int plane_ransac_max_iterations, + float valve_handle_offset) + : yolo_img_width_(yolo_img_width), + yolo_img_height_(yolo_img_height), + annulus_radius_ratio_(annulus_radius_ratio), + plane_ransac_threshold_(plane_ransac_threshold), + plane_ransac_max_iterations_(plane_ransac_max_iterations), + valve_handle_offset_(valve_handle_offset) {} + +// Stores color camera intrinsics and image dimensions. +void PoseEstimator::set_color_image_properties(const ImageProperties& props) { + color_image_properties_ = props; +} + +// Stores depth camera intrinsics and image dimensions. +void PoseEstimator::set_depth_image_properties(const ImageProperties& props) { + depth_image_properties_ = props; + has_depth_props_ = (props.intr.fx > 0.0); +} + +// Stores the depth-to-color extrinsic transform. +void PoseEstimator::set_depth_color_extrinsic(const DepthColorExtrinsic& extr) { + depth_color_extrinsic_ = extr; +} + +// Computes letterbox scale factor and padding from the color image and YOLO +// dimensions. +void PoseEstimator::calculate_letterbox_padding() { + int org_image_width = color_image_properties_.dim.width; + int org_image_height = color_image_properties_.dim.height; + + letterbox_scale_factor_ = + std::min(static_cast(yolo_img_width_) / org_image_width, + static_cast(yolo_img_height_) / org_image_height); + + double resized_width = org_image_width * letterbox_scale_factor_; + double resized_height = org_image_height * letterbox_scale_factor_; + + letterbox_pad_x_ = (yolo_img_width_ - resized_width) / 2.0; + letterbox_pad_y_ = (yolo_img_height_ - resized_height) / 2.0; +} + +// Remaps a bounding box from YOLO letterbox space back to original image +// coordinates. +BoundingBox PoseEstimator::transform_bounding_box( + const BoundingBox& bbox) const { + BoundingBox transformed = bbox; + transformed.center_x = + (bbox.center_x - letterbox_pad_x_) / letterbox_scale_factor_; + transformed.center_y = + (bbox.center_y - letterbox_pad_y_) / letterbox_scale_factor_; + transformed.size_x /= letterbox_scale_factor_; + transformed.size_y /= letterbox_scale_factor_; + return transformed; +} + +// Fits a plane to the point cloud using RANSAC; returns false if no inliers are +// found. +bool PoseEstimator::segment_plane( + const pcl::PointCloud::Ptr& cloud, + pcl::ModelCoefficients::Ptr& coefficients, + pcl::PointIndices::Ptr& inliers) const { + if (cloud->points.empty()) + return false; + + pcl::SACSegmentation seg; + seg.setOptimizeCoefficients(true); + seg.setModelType(pcl::SACMODEL_PLANE); + seg.setMethodType(pcl::SAC_RANSAC); + seg.setDistanceThreshold(plane_ransac_threshold_); + seg.setMaxIterations(plane_ransac_max_iterations_); + seg.setInputCloud(cloud); + seg.segment(*inliers, *coefficients); + + return !inliers->indices.empty(); +} + +// Returns the normalized 3D ray direction from the camera origin through the +// bbox center. +Eigen::Vector3f PoseEstimator::get_ray_direction( + const BoundingBox& bbox) const { + const float xc = + (bbox.center_x - static_cast(color_image_properties_.intr.cx)) / + static_cast(color_image_properties_.intr.fx); + const float yc = + (bbox.center_y - static_cast(color_image_properties_.intr.cy)) / + static_cast(color_image_properties_.intr.fy); + return Eigen::Vector3f(xc, yc, 1.0f).normalized(); +} + +// Extracts and normalizes the plane normal, flipping it to face the camera. +Eigen::Vector3f PoseEstimator::compute_plane_normal( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction) const { + if (!coefficients || coefficients->values.size() < 3) + return Eigen::Vector3f::Zero(); + + Eigen::Vector3f normal(coefficients->values[0], coefficients->values[1], + coefficients->values[2]); + normal.normalize(); + + if (normal.dot(ray_direction) > 0.0f) + normal = -normal; + return normal; +} + +// Finds the 3D point where a ray intersects the fitted plane. +// ray_origin defaults to the camera origin (zero), which is correct for the +// color-frame path. Pass the actual origin when the ray does not start at +// the frame origin (e.g. the color camera origin expressed in depth frame). +Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& ray_direction, + const Eigen::Vector3f& ray_origin) const { + // A plane ax+by+cz+d=0 has 4 coefficients: [a, b, c, d]. + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Vector3f::Zero(); + + const Eigen::Vector3f plane_normal(coefficients->values[0], + coefficients->values[1], + coefficients->values[2]); + const float D = coefficients->values[3]; + const float denom = plane_normal.dot(ray_direction); + + if (std::abs(denom) < 1e-6f) + return Eigen::Vector3f::Zero(); + + const float lambda = -(plane_normal.dot(ray_origin) + D) / denom; + return ray_origin + lambda * ray_direction; +} + +// Shifts a 3D point along the plane normal by the valve handle offset. +Eigen::Vector3f PoseEstimator::shift_point_along_normal( + const Eigen::Vector3f& intersection_point, + const Eigen::Vector3f& plane_normal) const { + return intersection_point + (plane_normal * valve_handle_offset_); +} + +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected bbox +// angle (X). +Eigen::Matrix3f PoseEstimator::create_rotation_matrix( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back project points to rays. + const Eigen::Vector3f r1 = (Kinv * p1).normalized(); + const Eigen::Vector3f r2 = (Kinv * p2).normalized(); + + // Compute intersections of rays with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f X1 = (-D / denom1) * r1; + const Eigen::Vector3f X2 = (-D / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + + // Project onto the plane (for numerical stability). + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping frame between frames). + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; // X_obj: direction of the image line + rot.col(1) = y_axis; // Y_obj: perpendicular in-plane + rot.col(2) = z_axis; // Z_obj: plane normal + return rot; +} + +// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected +// bbox angle (X), working entirely in the depth camera frame. Color-image +// rays are rotated into depth frame before intersecting the plane, and +// ray_origin is the color camera origin expressed in depth frame. +Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( + const pcl::ModelCoefficients::Ptr& coefficients, + const Eigen::Vector3f& plane_normal, + float angle, + const Eigen::Vector3f& ray_origin, + const Eigen::Matrix3f& R_dc) const { + if (!coefficients || coefficients->values.size() < 4) + return Eigen::Matrix3f::Identity(); + + const Eigen::Vector3f z_axis = plane_normal; + const float D = coefficients->values[3]; + const float fx = static_cast(color_image_properties_.intr.fx); + const float fy = static_cast(color_image_properties_.intr.fy); + const float cx = static_cast(color_image_properties_.intr.cx); + const float cy = static_cast(color_image_properties_.intr.cy); + + Eigen::Matrix3f K; + K << fx, 0, cx, 0, fy, cy, 0, 0, 1; + const Eigen::Matrix3f Kinv = K.inverse(); + + // Two image points along the bbox angle through the principal point. + const float len = 50.0f; + const Eigen::Vector3f p1(cx, cy, 1.f); + const Eigen::Vector3f p2(cx + len * std::cos(angle), + cy + len * std::sin(angle), 1.f); + + // Back-project color pixels to rays, then rotate into depth frame. + const Eigen::Vector3f r1 = (R_dc * (Kinv * p1)).normalized(); + const Eigen::Vector3f r2 = (R_dc * (Kinv * p2)).normalized(); + + // Intersect each ray (from color origin in depth frame) with the plane. + const float denom1 = z_axis.dot(r1); + const float denom2 = z_axis.dot(r2); + if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) + return Eigen::Matrix3f::Identity(); + + const float n_dot_O = z_axis.dot(ray_origin); + const Eigen::Vector3f X1 = ray_origin + (-(n_dot_O + D) / denom1) * r1; + const Eigen::Vector3f X2 = ray_origin + (-(n_dot_O + D) / denom2) * r2; + + // Compute in-plane direction corresponding to the image line angle. + Eigen::Vector3f x_axis = (X2 - X1).normalized(); + x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); + + // Ensure consistent direction (avoid flipping between frames). + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + filter_direction_ = x_axis; + + const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); + x_axis = y_axis.cross(z_axis).normalized(); + + Eigen::Matrix3f rot; + rot.col(0) = x_axis; + rot.col(1) = y_axis; + rot.col(2) = z_axis; + return rot; +} + +// Extracts a point cloud from the depth image, fits a plane, and returns the +// valve pose. +PoseResult PoseEstimator::compute_pose_from_depth( + const cv::Mat& depth_image, + const BoundingBox& bbox_org, + pcl::PointCloud::Ptr annulus_dbg, + pcl::PointCloud::Ptr plane_dbg, + bool debug_visualize) const { + pcl::PointCloud::Ptr cloud( + new pcl::PointCloud); + + if (has_depth_props_) { + // Extract points directly in the depth camera frame; only the OBB + // membership test is done in the color frame (a per-pixel projection + // that cannot be avoided without approximating the bbox in depth + // space). + extract_bbox_pcl_depth(depth_image, bbox_org, color_image_properties_, + depth_image_properties_, depth_color_extrinsic_, + cloud); + } else { + extract_annulus_pcl(depth_image, bbox_org, color_image_properties_, + annulus_radius_ratio_, cloud); + } + + if (cloud->points.size() < 4) + return {}; + + if (debug_visualize && annulus_dbg) + *annulus_dbg += *cloud; + + pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); + pcl::PointIndices::Ptr inliers(new pcl::PointIndices); + if (!segment_plane(cloud, coeff, inliers)) + return {}; + + if (debug_visualize && plane_dbg) { + for (int idx : inliers->indices) + plane_dbg->points.push_back(cloud->points[idx]); + plane_dbg->width = static_cast(plane_dbg->points.size()); + plane_dbg->height = 1; + plane_dbg->is_dense = false; + } + + PoseResult out; + + if (has_depth_props_) { + // The color bbox center defines a ray that originates at the color + // camera, not the depth camera. Express that ray in depth frame: + // origin = -R^T * t (color camera origin in depth frame) + // direction = R^T * K_c⁻¹ * [cx, cy, 1]ᵀ (normalized) + const Eigen::Matrix3f R_dc = depth_color_extrinsic_.R.transpose(); + const Eigen::Vector3f O_d = -(R_dc * depth_color_extrinsic_.t); + + const float fx_c = static_cast(color_image_properties_.intr.fx); + const float fy_c = static_cast(color_image_properties_.intr.fy); + const float cx_c = static_cast(color_image_properties_.intr.cx); + const float cy_c = static_cast(color_image_properties_.intr.cy); + const Eigen::Vector3f r_c((bbox_org.center_x - cx_c) / fx_c, + (bbox_org.center_y - cy_c) / fy_c, 1.0f); + const Eigen::Vector3f ray = (R_dc * r_c).normalized(); + + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = + find_ray_plane_intersection(coeff, ray, O_d); + if (pos.isZero()) + return {}; + + const Eigen::Vector3f pos_shifted = + shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = create_rotation_matrix_depth( + coeff, normal, bbox_org.theta, O_d, R_dc); + out.result = Pose::from_eigen( + pos_shifted.cast(), + Eigen::Quaternionf(rot).normalized().cast()); + } else { + const Eigen::Vector3f ray = get_ray_direction(bbox_org); + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); + if (pos.isZero()) + return {}; + + const Eigen::Vector3f pos_shifted = + shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = + create_rotation_matrix(coeff, normal, bbox_org.theta); + out.result = Pose::from_eigen( + pos_shifted.cast(), + Eigen::Quaternionf(rot).normalized().cast()); + } + + out.result_valid = true; + return out; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp new file mode 100644 index 0000000..7ecfb58 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp @@ -0,0 +1,82 @@ +#include "valve_detection_ros/ros_utils.hpp" + +#include +#include +#include "vortex_msgs/msg/landmark.hpp" + +namespace valve_detection { + +BoundingBox to_bbox(const vision_msgs::msg::BoundingBox2D& b) { + BoundingBox o; + o.center_x = static_cast(b.center.position.x); + o.center_y = static_cast(b.center.position.y); + o.size_x = static_cast(b.size_x); + o.size_y = static_cast(b.size_y); + o.theta = static_cast(b.center.theta); // radians + return o; +} + +geometry_msgs::msg::PoseArray make_pose_array( + const std::vector& poses, + const std_msgs::msg::Header& header) { + geometry_msgs::msg::PoseArray msg; + msg.header = header; + msg.poses.reserve(poses.size()); + for (const auto& p : poses) { + geometry_msgs::msg::Pose po; + po.position.x = p.x; + po.position.y = p.y; + po.position.z = p.z; + po.orientation.x = p.qx; + po.orientation.y = p.qy; + po.orientation.z = p.qz; + po.orientation.w = p.qw; + msg.poses.push_back(po); + } + return msg; +} + +vortex_msgs::msg::LandmarkArray make_landmark_array( + const std::vector& poses, + const std_msgs::msg::Header& header, + int type) { + vortex_msgs::msg::LandmarkArray out; + out.header = header; + out.landmarks.reserve(poses.size()); + for (size_t i = 0; i < poses.size(); ++i) { + vortex_msgs::msg::Landmark lm; + lm.header = header; + lm.id = static_cast(i); + lm.type.value = type; + lm.subtype.value = 0; // unset — resolved by valve_subtype_resolver + lm.pose.pose.position.x = poses[i].x; + lm.pose.pose.position.y = poses[i].y; + lm.pose.pose.position.z = poses[i].z; + lm.pose.pose.orientation.x = poses[i].qx; + lm.pose.pose.orientation.y = poses[i].qy; + lm.pose.pose.orientation.z = poses[i].qz; + lm.pose.pose.orientation.w = poses[i].qw; + out.landmarks.push_back(lm); + } + return out; +} + +cv::Mat decode_depth_to_float( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) { + cv::Mat depth_img; + // RealSense publishes depth as 16UC1 (uint16 millimetres). + // cv_bridge type-casts without scaling, so we must divide by 1000. + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || + depth->encoding == "16UC1") { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "16UC1"); + cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); + } else { + cv_bridge::CvImageConstPtr cv_depth = + cv_bridge::toCvShare(depth, "32FC1"); + depth_img = cv_depth->image.clone(); + } + return depth_img; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp new file mode 100644 index 0000000..a684963 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -0,0 +1,336 @@ +// valve_pose_ros.cpp +// ROS node: subscribes to depth and detections; publishes valve poses +// and visualizations. +#include "valve_detection_ros/valve_pose_ros.hpp" +#include "valve_detection_ros/ros_utils.hpp" + +#include + +#include + +namespace valve_detection { + +using std::placeholders::_1; +using std::placeholders::_2; + +ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) + : Node("valve_pose_node", options) { + debug_visualize_ = declare_parameter("debug_visualize"); + iou_duplicate_threshold_ = static_cast( + declare_parameter("iou_duplicate_threshold")); + const std::string drone = declare_parameter("drone", "moby"); + const std::string frame_base = + declare_parameter("output_frame_id"); + output_frame_id_ = frame_base.empty() ? "" : drone + "/" + frame_base; + landmark_type_ = declare_parameter("landmark_type"); + + setup_estimator(); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + setup_publishers(qos); + setup_subscribers(qos); +} + +void ValvePoseNode::setup_estimator() { + const int yolo_w = declare_parameter("yolo_img_width"); + const int yolo_h = declare_parameter("yolo_img_height"); + const float annulus_ratio = + declare_parameter("annulus_radius_ratio"); + const float ransac_thresh = + declare_parameter("plane_ransac_threshold"); + const int ransac_iters = + declare_parameter("plane_ransac_max_iterations"); + const float handle_off = declare_parameter("valve_handle_offset"); + + detector_ = std::make_unique( + yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); + + depth_color_extrinsic_.R = Eigen::Matrix3f::Identity(); + depth_color_extrinsic_.t.x() = + static_cast(declare_parameter("depth_to_color_tx")); + depth_color_extrinsic_.t.y() = + static_cast(declare_parameter("depth_to_color_ty")); + depth_color_extrinsic_.t.z() = + static_cast(declare_parameter("depth_to_color_tz")); + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + + color_props_.intr.fx = declare_parameter("color_fx"); + color_props_.intr.fy = declare_parameter("color_fy"); + color_props_.intr.cx = declare_parameter("color_cx"); + color_props_.intr.cy = declare_parameter("color_cy"); + color_props_.dim.width = declare_parameter("color_image_width"); + color_props_.dim.height = declare_parameter("color_image_height"); + color_props_.intr.dist[0] = declare_parameter("color_d1"); + color_props_.intr.dist[1] = declare_parameter("color_d2"); + color_props_.intr.dist[2] = declare_parameter("color_d3"); + color_props_.intr.dist[3] = declare_parameter("color_d4"); + color_props_.intr.dist[4] = declare_parameter("color_d5"); + detector_->set_color_image_properties(color_props_); + detector_->calculate_letterbox_padding(); + RCLCPP_INFO(get_logger(), + "Color intrinsics loaded from config as fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, + color_props_.intr.cx, color_props_.intr.cy); + + depth_props_.intr.fx = declare_parameter("depth_fx"); + depth_props_.intr.fy = declare_parameter("depth_fy"); + depth_props_.intr.cx = declare_parameter("depth_cx"); + depth_props_.intr.cy = declare_parameter("depth_cy"); + depth_props_.dim.width = declare_parameter("depth_image_width"); + depth_props_.dim.height = declare_parameter("depth_image_height"); + detector_->set_depth_image_properties(depth_props_); + RCLCPP_INFO(get_logger(), + "Depth intrinsics loaded from config as fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + depth_props_.intr.fx, depth_props_.intr.fy, + depth_props_.intr.cx, depth_props_.intr.cy); +} + +void ValvePoseNode::setup_publishers(const rclcpp::QoS& qos) { + const auto lm_topic = declare_parameter("landmarks_pub_topic"); + landmark_pub_ = + create_publisher(lm_topic, qos); + + if (!debug_visualize_) + return; + + const auto pose_topic = + declare_parameter("debug.valve_poses_pub_topic"); + const auto depth_cloud_topic = + declare_parameter("debug.depth_cloud_pub_topic"); + const auto depth_color_topic = + declare_parameter("debug.depth_colormap_pub_topic"); + const auto ann_topic = + declare_parameter("debug.annulus_pub_topic"); + const auto pln_topic = + declare_parameter("debug.plane_pub_topic"); + depth_colormap_vmin_ = static_cast( + declare_parameter("debug.depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast( + declare_parameter("debug.depth_colormap_value_max")); + + pose_pub_ = + create_publisher(pose_topic, qos); + depth_cloud_pub_ = + create_publisher(depth_cloud_topic, qos); + depth_colormap_pub_ = + create_publisher(depth_color_topic, qos); + annulus_pub_ = + create_publisher(ann_topic, qos); + plane_pub_ = + create_publisher(pln_topic, qos); +} + +void ValvePoseNode::setup_subscribers(const rclcpp::QoS& qos) { + const auto depth_topic = + declare_parameter("depth_image_sub_topic"); + const auto det_topic = + declare_parameter("detections_sub_topic"); + const auto depth_info_topic = + declare_parameter("depth_image_info_topic"); + const auto color_info_topic = + declare_parameter("color_image_info_topic"); + + const auto info_qos = + rclcpp::QoS(rclcpp::KeepLast(1)) + .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) + .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); + + color_cam_info_sub_ = create_subscription( + color_info_topic, info_qos, + std::bind(&ValvePoseNode::color_camera_info_cb, this, _1)); + depth_cam_info_sub_ = create_subscription( + depth_info_topic, info_qos, + std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); + + depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); + det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); + + sync_ = std::make_shared>( + SyncPolicy(10), depth_sub_, det_sub_); + sync_->registerCallback(std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); +} + +// One-shot callback that overrides color intrinsics and distortion from the +// camera_info topic. +void ValvePoseNode::color_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + color_props_.intr.fx = msg->k[0]; + color_props_.intr.fy = msg->k[4]; + color_props_.intr.cx = msg->k[2]; + color_props_.intr.cy = msg->k[5]; + color_props_.dim.width = static_cast(msg->width); + color_props_.dim.height = static_cast(msg->height); + + if (msg->d.size() >= 5) { + for (size_t i = 0; i < 5; ++i) + color_props_.intr.dist[i] = msg->d[i]; + } + + detector_->set_color_image_properties(color_props_); + detector_->calculate_letterbox_padding(); + + color_cam_info_sub_.reset(); // one-shot + RCLCPP_INFO(get_logger(), + "Color camera_info received, overriding config fallback " + "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy, + color_props_.intr.cx, color_props_.intr.cy); +} + +// One-shot callback that overrides depth intrinsics from the camera_info topic. +void ValvePoseNode::depth_camera_info_cb( + const sensor_msgs::msg::CameraInfo::SharedPtr msg) { + depth_props_.intr.fx = msg->k[0]; + depth_props_.intr.fy = msg->k[4]; + depth_props_.intr.cx = msg->k[2]; + depth_props_.intr.cy = msg->k[5]; + depth_props_.dim.width = static_cast(msg->width); + depth_props_.dim.height = static_cast(msg->height); + + detector_->set_depth_image_properties(depth_props_); + + depth_cam_info_sub_.reset(); // one-shot + RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", + depth_props_.intr.fx, depth_props_.intr.fy); +} + +// Main synchronized callback: runs NMS, estimates poses, and publishes all +// outputs. +void ValvePoseNode::sync_cb( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) { + if (!depth || !det) + return; + + cv::Mat depth_color; + const bool publish_colormap = + debug_visualize_ && depth_colormap_pub_ && + depth_colormap_pub_->get_subscription_count() > 0; + if (publish_colormap) { + cv_bridge::CvImageConstPtr cv_raw = + cv_bridge::toCvShare(depth, "16UC1"); + cv::Mat depth_f; + cv_raw->image.convertTo(depth_f, CV_32FC1); + const float scale = + 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); + cv::Mat depth_8u; + depth_f.convertTo(depth_8u, CV_8UC1, scale, + -depth_colormap_vmin_ * scale); + cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); + } + + if (det->detections.empty()) { + // Publish empty arrays to clear stale data from previous detections. + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array({}, depth->header)); + landmark_pub_->publish( + make_landmark_array({}, depth->header, landmark_type_)); + if (publish_colormap) { + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color) + .toImageMsg()); + } + return; + } + + std::vector> scored_boxes; + scored_boxes.reserve(det->detections.size()); + for (const auto& d : det->detections) { + float score = 0.0f; + if (!d.results.empty()) { + score = static_cast(d.results[0].hypothesis.score); + } else { + score = static_cast(d.bbox.size_x * d.bbox.size_y); + } + scored_boxes.emplace_back(score, to_bbox(d.bbox)); + } + const std::vector kept = + filter_duplicate_detections(scored_boxes, iou_duplicate_threshold_); + + const cv::Mat depth_img = decode_depth_to_float(depth); + + pcl::PointCloud::Ptr ann_dbg( + new pcl::PointCloud); + pcl::PointCloud::Ptr pln_dbg( + new pcl::PointCloud); + + std::vector raw_boxes; + std::vector poses; + + for (size_t idx : kept) { + const BoundingBox& yolo_box = scored_boxes[idx].second; + BoundingBox org_box = undistort_bbox(yolo_box, color_props_.intr); + raw_boxes.push_back(yolo_box); + + const auto pose_result = detector_->compute_pose_from_depth( + depth_img, org_box, ann_dbg, pln_dbg, true); + if (pose_result.result_valid) + poses.push_back(pose_result.result); + } + + if (debug_visualize_ && depth_cloud_pub_ && + depth_cloud_pub_->get_subscription_count() > 0) { + sensor_msgs::msg::PointCloud2 cloud_msg; + pcl::toROSMsg(*ann_dbg, cloud_msg); + cloud_msg.header = depth->header; + depth_cloud_pub_->publish(cloud_msg); + } + + if (publish_colormap) { + for (size_t i = 0; i < raw_boxes.size(); ++i) { + const auto& box = raw_boxes[i]; + const float Z = (i < poses.size() && poses[i].z > 0.0) + ? static_cast(poses[i].z) + : 0.0f; + + const float angle_deg = + box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), + angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + + if (Z > 0.0f) { + for (auto& c : corners) { + c = project_color_pixel_to_depth(c.x, c.y, Z, color_props_, + depth_props_, + depth_color_extrinsic_); + } + } + for (int j = 0; j < 4; ++j) { + cv::line(depth_color, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); + } + } + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", depth_color) + .toImageMsg()); + } + + if (debug_visualize_ && annulus_pub_ && plane_pub_) { + sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; + pcl::toROSMsg(*ann_dbg, ann_msg); + pcl::toROSMsg(*pln_dbg, pln_msg); + ann_msg.header = depth->header; + pln_msg.header = depth->header; + annulus_pub_->publish(ann_msg); + plane_pub_->publish(pln_msg); + } + + std_msgs::msg::Header pose_header = depth->header; + if (!output_frame_id_.empty()) + pose_header.frame_id = output_frame_id_; + + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array(poses, pose_header)); + landmark_pub_->publish( + make_landmark_array(poses, pose_header, landmark_type_)); +} + +} // namespace valve_detection + +RCLCPP_COMPONENTS_REGISTER_NODE(valve_detection::ValvePoseNode) diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt b/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt new file mode 100644 index 0000000..6f4ee13 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/CMakeLists.txt @@ -0,0 +1,57 @@ +cmake_minimum_required(VERSION 3.8) +project(valve_subtype_resolver) + +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) +endif() + +add_compile_options(-Wall -Wextra -Wpedantic) + +find_package(ament_cmake REQUIRED) +find_package(rclcpp REQUIRED) +find_package(rclcpp_components REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(std_msgs REQUIRED) +find_package(tf2 REQUIRED) +find_package(tf2_ros REQUIRED) +find_package(tf2_geometry_msgs REQUIRED) +find_package(vortex_msgs REQUIRED) + +set(LIB_NAME "${PROJECT_NAME}_component") + +add_library(${LIB_NAME} SHARED + src/valve_subtype_resolver.cpp +) + +target_include_directories(${LIB_NAME} PUBLIC + $ + $ +) + +ament_target_dependencies(${LIB_NAME} PUBLIC + rclcpp + rclcpp_components + geometry_msgs + std_msgs + tf2 + tf2_ros + tf2_geometry_msgs + vortex_msgs +) + +rclcpp_components_register_node( + ${LIB_NAME} + PLUGIN "valve_subtype_resolver::ValveSubtypeResolverNode" + EXECUTABLE ${PROJECT_NAME}_node +) + +install(TARGETS ${LIB_NAME} + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(DIRECTORY include/ DESTINATION include) +install(DIRECTORY config launch DESTINATION share/${PROJECT_NAME}) + +ament_package() diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml b/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml new file mode 100644 index 0000000..1f03aa2 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/config/valve_subtype_resolver_params.yaml @@ -0,0 +1,32 @@ +/**: + ros__parameters: + # ── Inputs ──────────────────────────────────────────────────────────────── + # Landmarks published by valve_detection (subtype unset, 0). + landmarks_sub_topic: "/valve_landmarks" + + # ── Output ──────────────────────────────────────────────────────────────── + # Republished landmarks with subtype resolved. + landmarks_pub_topic: "/valve_landmarks_typed" + + # ── Robot identity ──────────────────────────────────────────────────────── + # Prepended to all TF frame names (e.g. "moby" → "moby/odom"). + # Override at launch time to switch between robots. + drone: "moby" + + # ── TF ──────────────────────────────────────────────────────────────────── + # Base name of the world/odometry frame (without drone prefix). + # The full frame used for TF lookup is: /. + # The full chain front_camera_depth_optical → base_link → odom must be + # available in the TF tree (static + dynamic). + odom_frame: "odom" + + # ── Subtype classification ───────────────────────────────────────────── + # Threshold on |world_normal.z|. + # + # The valve plane normal is rotated into the world frame. If its Z + # component exceeds this threshold the valve face is classified as: + # VALVE_VERTICAL (1) — normal points mostly up/down (wheel axis is vertical) + # VALVE_HORIZONTAL (2) — normal is mostly in the XY plane (wheel axis is horizontal) + # + # 0.5 corresponds to a 45-degree tilt boundary. + vertical_threshold: 0.5 diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp b/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp new file mode 100644 index 0000000..335ef65 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/include/valve_subtype_resolver/valve_subtype_resolver.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include + +namespace valve_subtype_resolver { + +class ValveSubtypeResolverNode : public rclcpp::Node { + public: + explicit ValveSubtypeResolverNode(const rclcpp::NodeOptions& options); + + private: + void landmarks_cb(const vortex_msgs::msg::LandmarkArray::SharedPtr msg); + + // Uses the TF tree to rotate the valve plane normal into the world frame, + // then returns VALVE_VERTICAL (1), VALVE_HORIZONTAL (2), or 0 (unknown). + int resolve_subtype(const vortex_msgs::msg::Landmark& landmark); + + // TF2 + std::shared_ptr tf_buffer_; + std::shared_ptr tf_listener_; + + rclcpp::Subscription::SharedPtr + landmark_sub_; + rclcpp::Publisher::SharedPtr landmark_pub_; + + std::string world_frame_; + + // Threshold on |world_normal.z|: above this the valve is VALVE_VERTICAL + // (normal points up/down), below it VALVE_HORIZONTAL (normal in XY plane). + double vertical_threshold_; +}; + +} // namespace valve_subtype_resolver diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py b/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py new file mode 100644 index 0000000..5649d23 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/launch/valve_subtype_resolver.launch.py @@ -0,0 +1,34 @@ +import os + +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch_ros.actions import Node + + +def generate_launch_description(): + config = os.path.join( + get_package_share_directory("valve_subtype_resolver"), + "config", + "valve_subtype_resolver_params.yaml", + ) + + drone_arg = DeclareLaunchArgument( + "drone", + default_value="moby", + description="Robot name, prepended to TF frame IDs (e.g. moby, orca)", + ) + + return LaunchDescription( + [ + drone_arg, + Node( + package="valve_subtype_resolver", + executable="valve_subtype_resolver_node", + name="valve_subtype_resolver_node", + parameters=[config, {"drone": LaunchConfiguration("drone")}], + output="screen", + ), + ] + ) diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml b/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml new file mode 100644 index 0000000..a9924dd --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/package.xml @@ -0,0 +1,26 @@ + + + + valve_subtype_resolver + 0.1.0 + Subscribes to untyped valve landmarks; uses the TF tree to rotate the valve normal into the world frame and republishes with the resolved subtype (VALVE_VERTICAL / VALVE_HORIZONTAL). + you + Apache-2.0 + + ament_cmake + + rclcpp + rclcpp_components + geometry_msgs + std_msgs + tf2 + tf2_ros + tf2_geometry_msgs + vortex_msgs + + rosidl_default_runtime + + + ament_cmake + + diff --git a/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp b/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp new file mode 100644 index 0000000..7f24e1c --- /dev/null +++ b/mission/tacc/visual_inspection/valve_subtype_resolver/src/valve_subtype_resolver.cpp @@ -0,0 +1,108 @@ +// valve_subtype_resolver.cpp +// Subscribes to valve landmarks (subtype unset) published by valve_detection. +// For each landmark, looks up the full transform from the landmark frame to the +// world frame via TF2, rotates the valve plane normal into the world frame, and +// classifies the valve as VALVE_VERTICAL or VALVE_HORIZONTAL based on how much +// the normal deviates from the world Z-axis. Republishes with subtype set. +#include "valve_subtype_resolver/valve_subtype_resolver.hpp" + +#include +#include + +namespace valve_subtype_resolver { + +using std::placeholders::_1; + +ValveSubtypeResolverNode::ValveSubtypeResolverNode( + const rclcpp::NodeOptions& options) + : Node("valve_subtype_resolver_node", options) { + const std::string drone = declare_parameter("drone", "moby"); + const std::string odom_frame_base = + declare_parameter("odom_frame", "odom"); + world_frame_ = drone + "/" + odom_frame_base; + vertical_threshold_ = declare_parameter("vertical_threshold", 0.5); + + const auto landmarks_in = declare_parameter( + "landmarks_sub_topic", "/valve_landmarks"); + const auto landmarks_out = declare_parameter( + "landmarks_pub_topic", "/valve_landmarks_typed"); + + tf_buffer_ = std::make_shared(get_clock()); + tf_listener_ = std::make_shared(*tf_buffer_); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + + landmark_sub_ = create_subscription( + landmarks_in, qos, + std::bind(&ValveSubtypeResolverNode::landmarks_cb, this, _1)); + + landmark_pub_ = + create_publisher(landmarks_out, qos); +} + +void ValveSubtypeResolverNode::landmarks_cb( + const vortex_msgs::msg::LandmarkArray::SharedPtr msg) { + vortex_msgs::msg::LandmarkArray out = *msg; + for (auto& lm : out.landmarks) + lm.subtype.value = static_cast(resolve_subtype(lm)); + landmark_pub_->publish(out); +} + +// Resolves the valve subtype by rotating the valve plane normal into the world +// frame and checking its Z component. +// +// The valve plane normal is the Z-axis of the landmark orientation quaternion. +// It is transformed from the landmark frame into the world frame via a TF2 +// lookup that traverses the full chain: +// /front_camera_depth_optical → /front_camera_depth +// → /front_camera_link → /base_link → /odom +// +// Classification: +// |world_nz| >= vertical_threshold → VALVE_VERTICAL (1) +// The normal points mostly up or down; the valve face is horizontal, +// meaning the valve wheel spins around a vertical axis. +// |world_nz| < vertical_threshold → VALVE_HORIZONTAL (2) +// The normal is mostly in the XY plane; the valve face is vertical, +// meaning the valve wheel spins around a horizontal axis. +// +// Returns 0 (unknown) if the TF lookup fails. +int ValveSubtypeResolverNode::resolve_subtype( + const vortex_msgs::msg::Landmark& landmark) { + geometry_msgs::msg::TransformStamped transform; + try { + transform = tf_buffer_->lookupTransform( + world_frame_, landmark.header.frame_id, tf2::TimePointZero); + } catch (const tf2::TransformException& ex) { + RCLCPP_WARN_THROTTLE(get_logger(), *get_clock(), 1000, + "TF lookup failed: %s", ex.what()); + return 0; + } + + // Extract the valve plane normal (Z-axis of the landmark orientation). + // Rotating [0,0,1] by quaternion q gives: + // nx = 2(qx*qz + qw*qy) + // ny = 2(qy*qz - qw*qx) + // nz = 1 - 2(qx^2 + qy^2) + const auto& lq = landmark.pose.pose.orientation; + geometry_msgs::msg::Vector3Stamped normal_cam; + normal_cam.header = landmark.header; + normal_cam.vector.x = 2.0 * (lq.x * lq.z + lq.w * lq.y); + normal_cam.vector.y = 2.0 * (lq.y * lq.z - lq.w * lq.x); + normal_cam.vector.z = 1.0 - 2.0 * (lq.x * lq.x + lq.y * lq.y); + + // Rotate the normal into the world frame. + // tf2::doTransform on a Vector3Stamped applies only the rotation. + geometry_msgs::msg::Vector3Stamped normal_world; + tf2::doTransform(normal_cam, normal_world, transform); + + using ST = vortex_msgs::msg::LandmarkSubtype; + if (std::abs(normal_world.vector.z) >= vertical_threshold_) + return ST::VALVE_VERTICAL; + return ST::VALVE_HORIZONTAL; +} + +} // namespace valve_subtype_resolver + +RCLCPP_COMPONENTS_REGISTER_NODE( + valve_subtype_resolver::ValveSubtypeResolverNode) From ef5161ae33e067f169dd91a39a1c6cc3808ebbc1 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Sat, 28 Mar 2026 03:28:13 +0100 Subject: [PATCH 2/7] refactor: pose estimation and ROS integration for valve detection - Updated PoseEstimator class to improve depth image handling and ray-plane intersection logic. - Renamed methods for clarity, including changing `calculate_letterbox_padding` to `compute_letterbox_transform`. - Enhanced the `compute_pose_from_depth` method to return a structured `DetectionResult` instead of `PoseResult`. - Simplified the handling of bounding boxes and their transformations between color and depth images. - Improved the ValvePoseNode class by restructuring parameter declarations and initialization. - Added robust error handling for TF lookups and improved logging for extrinsic transformations. - Streamlined the synchronization of depth and detection messages, ensuring better performance and clarity in the processing pipeline. - Updated debug visualization features to provide clearer outputs during development. --- .gitignore | 2 +- .../valve_detection/CMakeLists.txt | 7 + .../config/valve_detection_params.yaml | 84 +-- .../depth_image_processing.hpp | 98 +--- .../valve_detection/detection_utils.hpp | 33 ++ .../valve_detection/pcl_extraction.hpp | 48 ++ .../valve_detection/pose_estimator.hpp | 53 +- .../include/valve_detection/types.hpp | 9 +- .../include/valve_detection_ros/ros_utils.hpp | 3 +- .../valve_detection_ros/valve_pose_ros.hpp | 47 +- .../launch/valve_detection.launch.py | 11 +- .../valve_detection/package.xml | 2 + .../src/depth_image_processing.cpp | 458 +--------------- .../valve_detection/src/detection_utils.cpp | 110 ++++ .../valve_detection/src/pcl_extraction.cpp | 296 +++++++++++ .../valve_detection/src/pose_estimator.cpp | 233 +++----- .../valve_detection/src/ros_utils.cpp | 9 +- .../valve_detection/src/valve_pose_ros.cpp | 496 ++++++++++-------- 18 files changed, 975 insertions(+), 1024 deletions(-) create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp create mode 100644 mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp create mode 100644 mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp diff --git a/.gitignore b/.gitignore index 2a027b3..bb8bd5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # VSCode -.vscode/* +.vscode/ !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json diff --git a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt index 21c3cd5..94069eb 100644 --- a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt +++ b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt @@ -23,6 +23,9 @@ find_package(OpenCV REQUIRED) find_package(PCL REQUIRED) find_package(pcl_conversions REQUIRED) +find_package(tf2 REQUIRED) +find_package(tf2_ros REQUIRED) + find_package(vortex_msgs REQUIRED) find_package(vortex_utils REQUIRED) @@ -32,6 +35,8 @@ set(LIB_NAME "${PROJECT_NAME}_component") add_library(${LIB_NAME} SHARED src/depth_image_processing.cpp + src/detection_utils.cpp + src/pcl_extraction.cpp src/pose_estimator.cpp src/ros_utils.cpp src/valve_pose_ros.cpp @@ -60,6 +65,8 @@ ament_target_dependencies(${LIB_NAME} PUBLIC cv_bridge OpenCV pcl_conversions + tf2 + tf2_ros vortex_msgs vortex_utils ) diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml index fc44fa6..c68676d 100644 --- a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -2,49 +2,21 @@ ros__parameters: # ── Inputs ──────────────────────────────────────────────────────────────── # depth_image_sub_topic: "/realsense/D555_409122300281_Depth" - depth_image_sub_topic: "/camera/camera/depth/image_rect_raw" - detections_sub_topic: "/yolo_obb_object_detection/detections" - - # Depth camera-info topic (optional – used to load depth intrinsics if published). - # Using the realsense depth camera_info since the depth image is from the realsense bag. - depth_image_info_topic: "/camera/camera/depth/camera_info" - - # Color camera-info topic (optional – overrides the fallback params below if published). - color_image_info_topic: "/moby/front_camera/camera_info" - - # Color camera intrinsics – fallback values used when color_image_info_topic is not published. - # Source: /camera/camera/color/camera_info - color_fx: 452.121521 - color_fy: 451.737976 - color_cx: 438.254059 - color_cy: 248.703217 - color_image_width: 896 - color_image_height: 504 - - # Distortion coefficients [k1, k2, p1, p2, k3] (plumb_bob model) - color_d1: -0.0548105500638485 - color_d2: 0.0597584918141365 - color_d3: 0.000486430013552308 - color_d4: 0.00031599000794813 - color_d5: -0.0192314591258764 - - # Depth camera intrinsics (fallback when depth camera_info is not published) - # Source: /camera/camera/depth/camera_info - depth_fx: 449.449707 - depth_fy: 449.449707 - depth_cx: 444.819946 - depth_cy: 248.226578 - depth_image_width: 896 - depth_image_height: 504 + depth_image_sub_topic: "/realsense_d555/depth/image" + detections_sub_topic: "/obb_detections_output" + + # Depth camera-info topic (intrinsics loaded from this topic). + depth_image_info_topic: "/realsense_d555/depth/camera_info" + + # Color camera-info topic (intrinsics and distortion loaded from this topic). + color_image_info_topic: "/yolo_obb_encoder/internal/crop/camera_info" # ── Depth-to-color extrinsic ────────────────────────────────────────────── - # Translation (metres) of the depth origin expressed in the color camera - # frame. Rotation is assumed to be identity (both optical frames share the - # same orientation for the RealSense D555). - # Source: ros2 topic echo /realsense/extrinsics/depth_to_color - depth_to_color_tx: -0.059 - depth_to_color_ty: 0.0 - depth_to_color_tz: 0.0 + # TF frame names (without drone prefix) used to look up the depth-to-color + # transform from /tf_static. The node will wait until this transform is + # available before initialising the detector. + depth_frame_id: "front_camera_depth_optical" + color_frame_id: "front_camera_color_optical" # ── Outputs ─────────────────────────────────────────────────────────────── # Primary output consumed by downstream nodes (e.g. EKF, mission planner). @@ -83,8 +55,19 @@ # face frame and the inside length of the handle is 0.065 m. valve_handle_offset: 0.065 + # ── Output frame ────────────────────────────────────────────────────────── + # TF frame used as the frame_id for all published poses and landmarks. + # Must match the depth optical frame in the robot's TF tree. + # Depth is from the realsense bag, pretending to be nautilus/depth_camera/image_depth. + # See: auv_setup/description/nautilus.urdf.xacro + output_frame_id: "front_camera_depth_optical" + + # Prepended to all TF frame names (e.g. "moby" → "moby/front_camera_depth_optical"). + # Override at launch time to switch between robots: drone:=orca + drone: "nautilus" + # ── Behaviour toggles ───────────────────────────────────────────────────── - debug_visualize: false # enable all debug visualization topics + debug_visualize: true # enable all debug visualization topics # ── Debug visualization (only active when debug_visualize: true) ────────── debug: @@ -115,20 +98,3 @@ # – useful for checking that the fitted plane normal (and therefore the # final pose orientation) is correct. plane_pub_topic: "/annulus_plane_pcl" - - # ── Landmark fields ─────────────────────────────────────────────────────── - landmark_type: 5 # VALVE - # subtype is intentionally unset here (0 = UNKNOWN); resolved downstream - # by the valve_subtype_resolver node running on the PI. - - # ── Output frame ────────────────────────────────────────────────────────── - # TF frame used as the frame_id for all published poses and landmarks. - # Must match the depth optical frame in the robot's TF tree. - # Depth is from the realsense bag, pretending to be moby/depth_camera/image_depth. - # See: auv_setup/description/moby.urdf.xacro - output_frame_id: "front_camera_depth_optical" - input_color_frame_id: "front_camera_color_optical" - - # Prepended to all TF frame names (e.g. "moby" → "moby/front_camera_depth_optical"). - # Override at launch time to switch between robots: drone:=orca - drone: "moby" diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp index ae6e582..c51303b 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -1,84 +1,38 @@ -#pragma once +#ifndef VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ +#define VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ #include #include #include -#include -#include #include "valve_detection/types.hpp" namespace valve_detection { -void project_pixel_to_point(const int u, - const int v, - const float depth, - const double fx, - const double fy, - const double cx, - const double cy, - pcl::PointXYZ& out); - -void extract_annulus_pcl( - const cv::Mat& depth_image, // CV_32FC1 meters - const BoundingBox& bbox, // in ORIGINAL image pixels - const ImageProperties& img_props, - const float annulus_radius_ratio, // inner radius = outer*ratio - pcl::PointCloud::Ptr& out); - -// Iterates depth pixels, back-projects each with depth intrinsics, applies -// the extrinsic transform, then checks whether the resulting color-frame -// projection falls inside the elliptic annulus defined by color_bbox. -// Output points are in the color camera frame. -void extract_annulus_pcl_aligned( - const cv::Mat& depth_image, // CV_32FC1 meters, depth frame - const BoundingBox& color_bbox, // annulus defined in color pixels - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extrinsic, - const float annulus_radius_ratio, - pcl::PointCloud::Ptr& out); - -// Extracts all valid depth points whose color-frame projection falls inside -// the oriented bounding box. Output points are in the color camera frame. -void extract_bbox_pcl_aligned( - const cv::Mat& depth_image, // CV_32FC1 meters, depth frame - const BoundingBox& color_bbox, // OBB defined in color pixels - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extrinsic, - pcl::PointCloud::Ptr& out); - -// Projects the 4 corners of the color OBB into depth image space once, fits -// an OBB to those projected corners, then tests depth pixels directly against -// that depth-image OBB — no per-pixel matrix multiply needed. Output points -// are stored in the depth camera frame. -void extract_bbox_pcl_depth( - const cv::Mat& depth_image, // CV_32FC1 meters, depth frame - const BoundingBox& color_bbox, // OBB defined in color pixels - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extrinsic, - pcl::PointCloud::Ptr& out); - -// Project a color image pixel to depth image coordinates. +/** + * @brief Back-projects a depth pixel to a 3D point in the camera frame. + * + * Assumes no lens distortion — this is valid for depth images, which are + * typically rectified before publication by the camera driver. + */ +void project_pixel_to_point(int u, + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& out); + +// Projects a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). -cv::Point2f project_color_pixel_to_depth(const float u_c, - const float v_c, - const float Z, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr); - -// Returns undistorted copy of bbox (center_x/y corrected for lens distortion). -BoundingBox undistort_bbox(const BoundingBox& bbox, - const CameraIntrinsics& intr); - -// Greedy NMS: returns indices of kept detections (max 2). -// scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin -// (intersection / min-area) or IoU exceeds iou_duplicate_threshold. -std::vector filter_duplicate_detections( - const std::vector>& scored_boxes, - float iou_duplicate_threshold); +cv::Point2f project_color_pixel_to_depth(float u_c, + float v_c, + float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); } // namespace valve_detection + +#endif // VALVE_DETECTION__DEPTH_IMAGE_PROCESSING_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp new file mode 100644 index 0000000..27f0470 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp @@ -0,0 +1,33 @@ +#ifndef VALVE_DETECTION__DETECTION_UTILS_HPP_ +#define VALVE_DETECTION__DETECTION_UTILS_HPP_ + +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +/** + * @brief Returns an undistorted copy of bbox. + * + * Undistorts the 4 edge midpoints of the OBB and the center point using the + * given intrinsics, then refits an oriented bounding box to the result. Using + * edge midpoints (rather than corners) produces a better fit around circular + * objects such as valves. + */ +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr); + +/** + * @brief Greedy NMS: returns indices of kept detections (max 2). + * + * scored_boxes: (score, bbox) pairs. Two boxes are duplicates when IoMin + * (intersection / min-area) or IoU exceeds iou_duplicate_threshold. + */ +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold); + +} // namespace valve_detection + +#endif // VALVE_DETECTION__DETECTION_UTILS_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp new file mode 100644 index 0000000..9967d55 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pcl_extraction.hpp @@ -0,0 +1,48 @@ +#ifndef VALVE_DETECTION__PCL_EXTRACTION_HPP_ +#define VALVE_DETECTION__PCL_EXTRACTION_HPP_ + +#include +#include +#include +#include "valve_detection/types.hpp" + +namespace valve_detection { + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, then checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // annulus defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + float annulus_radius_ratio, + pcl::PointCloud::Ptr& out); + +// Extracts all valid depth points whose color-frame projection falls inside +// the oriented bounding box. Output points are in the color camera frame. +void extract_bbox_pcl_aligned( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are in the depth camera frame. +void extract_bbox_pcl_depth( + const cv::Mat& depth_image, // CV_32FC1 metres, depth frame + const BoundingBox& color_bbox, // OBB defined in color pixels + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out); + +} // namespace valve_detection + +#endif // VALVE_DETECTION__PCL_EXTRACTION_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp index d734a3d..f5c739c 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp @@ -1,6 +1,7 @@ -#pragma once +#ifndef VALVE_DETECTION__POSE_ESTIMATOR_HPP_ +#define VALVE_DETECTION__POSE_ESTIMATOR_HPP_ -#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/pcl_extraction.hpp" #include "valve_detection/types.hpp" #include @@ -10,12 +11,26 @@ #include #include #include -#include #include -#include +#include namespace valve_detection { +enum class DetectorMode { standard, debug }; + +struct DebugOutput { + pcl::PointCloud::Ptr annulus_cloud; + pcl::PointCloud::Ptr plane_cloud; +}; + +struct DetectionResult { + Pose pose; + bool valid{false}; + // Populated when DetectorMode::debug is used: + pcl::PointCloud::Ptr annulus_cloud; + pcl::PointCloud::Ptr plane_cloud; +}; + class PoseEstimator { public: PoseEstimator(int yolo_img_width, @@ -29,22 +44,23 @@ class PoseEstimator { void set_depth_image_properties(const ImageProperties& props); void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); - void calculate_letterbox_padding(); - BoundingBox transform_bounding_box(const BoundingBox& bbox) const; + /// @brief Computes letterbox scale and padding from color image dimensions and YOLO input size. + /// Must be called after set_color_image_properties(). + void compute_letterbox_transform(); - PoseResult compute_pose_from_depth( - const cv::Mat& depth_image, // CV_32FC1 meters - const BoundingBox& bbox_org, // in original image pixels - pcl::PointCloud::Ptr annulus_dbg, - pcl::PointCloud::Ptr plane_dbg, - bool debug_visualize) const; + /// @brief Remaps a bounding box from YOLO letterbox coordinates to original image coordinates. + BoundingBox letterbox_to_image_coords(const BoundingBox& bbox) const; + + DetectionResult compute_pose_from_depth( + const cv::Mat& depth_image, + const BoundingBox& bbox_org, + DetectorMode mode = DetectorMode::standard) const; private: bool segment_plane(const pcl::PointCloud::Ptr& cloud, pcl::ModelCoefficients::Ptr& coeff, pcl::PointIndices::Ptr& inliers) const; - Eigen::Vector3f get_ray_direction(const BoundingBox& bbox) const; Eigen::Vector3f compute_plane_normal( const pcl::ModelCoefficients::Ptr& coeff, const Eigen::Vector3f& ray_direction) const; @@ -55,23 +71,18 @@ class PoseEstimator { Eigen::Vector3f shift_point_along_normal( const Eigen::Vector3f& intersection_point, const Eigen::Vector3f& plane_normal) const; - Eigen::Matrix3f create_rotation_matrix( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& plane_normal, - float angle) const; // Variant that works entirely in depth frame: color rays are rotated by - // R_dc = R^T before intersecting the depth-frame plane. + // R_depth_from_color = R^T before intersecting the depth-frame plane. Eigen::Matrix3f create_rotation_matrix_depth( const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& plane_normal, float angle, const Eigen::Vector3f& ray_origin, - const Eigen::Matrix3f& R_dc) const; + const Eigen::Matrix3f& R_depth_from_color) const; ImageProperties color_image_properties_{}; ImageProperties depth_image_properties_{}; DepthColorExtrinsic depth_color_extrinsic_{}; - bool has_depth_props_{false}; int yolo_img_width_; int yolo_img_height_; @@ -88,3 +99,5 @@ class PoseEstimator { }; } // namespace valve_detection + +#endif // VALVE_DETECTION__POSE_ESTIMATOR_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp index c2c8067..32a0cce 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp @@ -1,6 +1,7 @@ -#pragma once +#ifndef VALVE_DETECTION__TYPES_HPP_ +#define VALVE_DETECTION__TYPES_HPP_ + #include -#include #include #include @@ -9,7 +10,7 @@ namespace valve_detection { struct CameraIntrinsics { double fx{0}, fy{0}, cx{0}, cy{0}; // Plumb-bob distortion coefficients [k1, k2, p1, p2, k3]. - std::array dist{0, 0, 0, 0, 0}; + double dist_k1{0}, dist_k2{0}, dist_p1{0}, dist_p2{0}, dist_k3{0}; }; struct ImageDimensions { @@ -44,3 +45,5 @@ struct DepthColorExtrinsic { }; } // namespace valve_detection + +#endif // VALVE_DETECTION__TYPES_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp index 6f080bc..e067d7d 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/ros_utils.hpp @@ -25,8 +25,7 @@ geometry_msgs::msg::PoseArray make_pose_array( // Subtype is always 0 (unset); resolved downstream by valve_subtype_resolver. vortex_msgs::msg::LandmarkArray make_landmark_array( const std::vector& poses, - const std_msgs::msg::Header& header, - int type); + const std_msgs::msg::Header& header); // Decodes a ROS depth image to a CV_32FC1 mat in metres. cv::Mat decode_depth_to_float( diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp index 06d6a08..0e9a939 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -1,4 +1,5 @@ -#pragma once +#ifndef VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ +#define VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ #include #include @@ -13,11 +14,15 @@ #include #include +#include +#include + #include #include #include -#include "valve_detection/depth_image_processing.hpp" +#include "valve_detection/detection_utils.hpp" +#include "valve_detection/pcl_extraction.hpp" #include "valve_detection/pose_estimator.hpp" #include "valve_detection/types.hpp" @@ -36,9 +41,10 @@ class ValvePoseNode : public rclcpp::Node { private: // Node setup — called from constructor. - void setup_estimator(); - void setup_publishers(const rclcpp::QoS& qos); - void setup_subscribers(const rclcpp::QoS& qos); + void declare_params(); + void init_subscriptions(); + void try_activate_detector(); + void lookup_extrinsic(); // Camera info callbacks (one-shot, override config fallback). void color_camera_info_cb( @@ -50,6 +56,19 @@ class ValvePoseNode : public rclcpp::Node { void sync_cb(const sensor_msgs::msg::Image::ConstSharedPtr& depth, const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det); + // sync_cb helpers + std::vector> collect_scored_boxes( + const vision_msgs::msg::Detection2DArray& det) const; + void publish_empty_results(const std_msgs::msg::Header& header) const; + cv::Mat build_depth_colormap( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) const; + void publish_debug( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const std::vector& boxes, + const std::vector& poses, + const pcl::PointCloud& ann_cloud, + const pcl::PointCloud& pln_cloud) const; + using SyncPolicy = message_filters::sync_policies::ApproximateTime< sensor_msgs::msg::Image, vision_msgs::msg::Detection2DArray>; @@ -58,12 +77,26 @@ class ValvePoseNode : public rclcpp::Node { bool debug_visualize_; float iou_duplicate_threshold_; std::string output_frame_id_; - int landmark_type_; + + // estimator config params (stored for deferred detector init) + int yolo_w_, yolo_h_; + float annulus_ratio_, ransac_thresh_, handle_offset_; + int ransac_iters_; // camera data (owned by node, passed to estimator and depth functions) ImageProperties color_props_{}; ImageProperties depth_props_{}; DepthColorExtrinsic depth_color_extrinsic_{}; + bool color_props_ready_{false}; + bool depth_props_ready_{false}; + bool extrinsic_ready_{false}; + + // TF2 lookup for depth-to-color extrinsic + std::string depth_frame_id_; + std::string color_frame_id_; + std::shared_ptr tf_buffer_; + std::shared_ptr tf_listener_; + rclcpp::TimerBase::SharedPtr extrinsic_timer_; // estimator std::unique_ptr detector_; @@ -91,3 +124,5 @@ class ValvePoseNode : public rclcpp::Node { }; } // namespace valve_detection + +#endif // VALVE_DETECTION_ROS__VALVE_POSE_ROS_HPP_ diff --git a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py index e62e731..bc8c08a 100644 --- a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py +++ b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py @@ -17,16 +17,10 @@ def generate_launch_description(): drone_arg = DeclareLaunchArgument( 'drone', - default_value='moby', + default_value='nautilus', description='Robot name, prepended to TF frame IDs (e.g. moby, orca)', ) - debug_visualize_arg = DeclareLaunchArgument( - 'debug_visualize', - default_value='false', - description='Enable debug visualization topics (valve_pose, valve_poses, annulus, plane)', - ) - container = ComposableNodeContainer( name='valve_detection_container', namespace='', @@ -41,7 +35,6 @@ def generate_launch_description(): cfg, { 'drone': LaunchConfiguration('drone'), - 'debug_visualize': LaunchConfiguration('debug_visualize'), }, ], ) @@ -49,4 +42,4 @@ def generate_launch_description(): output='screen', ) - return LaunchDescription([drone_arg, debug_visualize_arg, container]) + return LaunchDescription([drone_arg, container]) diff --git a/mission/tacc/visual_inspection/valve_detection/package.xml b/mission/tacc/visual_inspection/valve_detection/package.xml index 9be782b..643f6ec 100644 --- a/mission/tacc/visual_inspection/valve_detection/package.xml +++ b/mission/tacc/visual_inspection/valve_detection/package.xml @@ -19,6 +19,8 @@ std_msgs cv_bridge pcl_conversions + tf2 + tf2_ros vortex_msgs vortex_utils diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp index 2c3feae..3b9ec2a 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -1,13 +1,8 @@ // depth_image_processing.cpp -// Depth-to-3D back-projection, point cloud extraction, and color-to-depth pixel -// projection. +// Depth-to-3D back-projection and color-to-depth pixel projection. #include "valve_detection/depth_image_processing.hpp" #include -#include -#include #include -#include -#include namespace valve_detection { @@ -30,362 +25,6 @@ void project_pixel_to_point(const int u, out.z = depth; } -// Extracts depth points that fall inside an elliptic annulus around the bbox -// center (no alignment). -void extract_annulus_pcl(const cv::Mat& depth_image, - const BoundingBox& bbox, - const ImageProperties& img_props, - const float annulus_radius_ratio, - pcl::PointCloud::Ptr& out) { - out->clear(); - - const float cx = bbox.center_x; - const float cy = bbox.center_y; - // Outer ellipse half-extents match the bounding box. - const float outer_rx = bbox.size_x * 0.5f; - const float outer_ry = bbox.size_y * 0.5f; - - // Require at least 4 pixels in each dimension (2 px half-extent) before - // sampling — smaller boxes contain no usable depth points. - if (outer_rx < 2.0f || outer_ry < 2.0f) - return; - - // Inner ellipse is scaled down by annulus_radius_ratio, creating a ring - // that avoids the central hub and samples only the valve rim. - const float inner_rx = outer_rx * annulus_radius_ratio; - const float inner_ry = outer_ry * annulus_radius_ratio; - - // Bounding rectangle of the outer ellipse in pixel space. - const int u0 = static_cast(std::floor(cx - outer_rx)); - const int u1 = static_cast(std::ceil(cx + outer_rx)); - const int v0 = static_cast(std::floor(cy - outer_ry)); - const int v1 = static_cast(std::ceil(cy + outer_ry)); - - for (int v = v0; v <= v1; ++v) { - if (v < 0 || v >= depth_image.rows) - continue; - for (int u = u0; u <= u1; ++u) { - if (u < 0 || u >= depth_image.cols) - continue; - - // Normalise pixel offset by the outer half-extents; the point is - // inside the outer ellipse when the sum of squares ≤ 1. - const float dxo = (u - cx) / outer_rx; - const float dyo = (v - cy) / outer_ry; - const bool inside_outer = (dxo * dxo + dyo * dyo) <= 1.0f; - - // Same test for the inner ellipse; keep only points outside it - // to form the annular ring. - const float dxi = (u - cx) / inner_rx; - const float dyi = (v - cy) / inner_ry; - const bool outside_inner = (dxi * dxi + dyi * dyi) > 1.0f; - - if (!inside_outer || !outside_inner) - continue; - - // Back-project the depth pixel to a 3-D point in camera space. - const float z = depth_image.at(v, u); - pcl::PointXYZ p; - project_pixel_to_point(u, v, z, img_props.intr.fx, - img_props.intr.fy, img_props.intr.cx, - img_props.intr.cy, p); - - // Discard invalid points (zero/NaN depth produces NaN coords). - if (!std::isnan(p.x) && !std::isnan(p.y) && !std::isnan(p.z)) { - out->points.push_back(p); - } - } - } - - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; -} - -// Iterates depth pixels, back-projects each with depth intrinsics, applies -// the extrinsic transform, and checks whether the resulting color-frame -// projection falls inside the elliptic annulus defined by color_bbox. -// Output points are in the color camera frame. -void extract_annulus_pcl_aligned(const cv::Mat& depth_image, - const BoundingBox& color_bbox, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr, - const float annulus_radius_ratio, - pcl::PointCloud::Ptr& out) { - out->clear(); - - // Annulus defined in color-image coordinates. - const float cx_c = color_bbox.center_x; - const float cy_c = color_bbox.center_y; - const float outer_rx = color_bbox.size_x * 0.5f; - const float outer_ry = color_bbox.size_y * 0.5f; - - // Require at least 4 pixels in each dimension (2 px half-extent) before - // sampling — smaller boxes contain no usable depth points. - if (outer_rx < 2.0f || outer_ry < 2.0f) - return; - - // Inner ellipse scaled down by annulus_radius_ratio, forming the ring. - const float inner_rx = outer_rx * annulus_radius_ratio; - const float inner_ry = outer_ry * annulus_radius_ratio; - - // The depth and color images have different resolutions and focal lengths. - // Scale color-space bbox bounds into depth-image coordinates to get a - // coarse candidate region, then add a margin to cover the lateral shift - // introduced by the depth-to-color translation. - const float scale = - (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) - ? static_cast(depth_props.intr.fx / color_props.intr.fx) - : 1.0f; - constexpr int kMargin = 30; - - const int u0_d = - std::max(0, static_cast((cx_c - outer_rx) * scale) - kMargin); - const int u1_d = - std::min(depth_image.cols - 1, - static_cast((cx_c + outer_rx) * scale) + kMargin); - const int v0_d = - std::max(0, static_cast((cy_c - outer_ry) * scale) - kMargin); - const int v1_d = - std::min(depth_image.rows - 1, - static_cast((cy_c + outer_ry) * scale) + kMargin); - - for (int v_d = v0_d; v_d <= v1_d; ++v_d) { - for (int u_d = u0_d; u_d <= u1_d; ++u_d) { - const float z_d = depth_image.at(v_d, u_d); - if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) - continue; - - // Back-project depth pixel to 3-D point in depth camera frame. - Eigen::Vector3f P_d; - P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / - depth_props.intr.fx); - P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / - depth_props.intr.fy); - P_d.z() = z_d; - - // Apply extrinsic to move the point into the color camera frame, - // where the annulus is defined. - const Eigen::Vector3f P_c = extr.R * P_d + extr.t; - if (P_c.z() <= 0.0f) - continue; - - // Project the color-frame point onto the color image plane. - const float u_c = static_cast( - color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); - const float v_c = static_cast( - color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); - - // Normalise offset by outer half-extents; keep only points inside - // the outer ellipse (sum of squares ≤ 1). - const float dxo = (u_c - cx_c) / outer_rx; - const float dyo = (v_c - cy_c) / outer_ry; - if (dxo * dxo + dyo * dyo > 1.0f) - continue; // outside outer ellipse - - // Discard points inside the inner ellipse to form the ring. - const float dxi = (u_c - cx_c) / inner_rx; - const float dyi = (v_c - cy_c) / inner_ry; - if (dxi * dxi + dyi * dyi <= 1.0f) - continue; // inside inner ellipse - - // Store the point in the color camera frame. - pcl::PointXYZ p; - p.x = P_c.x(); - p.y = P_c.y(); - p.z = P_c.z(); - out->points.push_back(p); - } - } - - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; -} - -// Extracts depth points whose color-frame projection falls inside the oriented -// bounding box. -void extract_bbox_pcl_aligned(const cv::Mat& depth_image, - const BoundingBox& color_bbox, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr, - pcl::PointCloud::Ptr& out) { - out->clear(); - - // Pre-compute the OBB axes in color-image space for a fast rotated-rect - // test. - const float cos_t = std::cos(color_bbox.theta); - const float sin_t = std::sin(color_bbox.theta); - const float half_w = color_bbox.size_x * 0.5f; - const float half_h = color_bbox.size_y * 0.5f; - - // Approximate search region in depth-image space. - const float scale = - (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) - ? static_cast(depth_props.intr.fx / color_props.intr.fx) - : 1.0f; - const float r = std::sqrt(half_w * half_w + half_h * half_h); - constexpr int kMargin = 30; - - const int u0_d = std::max( - 0, static_cast((color_bbox.center_x - r) * scale) - kMargin); - const int u1_d = - std::min(depth_image.cols - 1, - static_cast((color_bbox.center_x + r) * scale) + kMargin); - const int v0_d = std::max( - 0, static_cast((color_bbox.center_y - r) * scale) - kMargin); - const int v1_d = - std::min(depth_image.rows - 1, - static_cast((color_bbox.center_y + r) * scale) + kMargin); - - for (int v_d = v0_d; v_d <= v1_d; ++v_d) { - for (int u_d = u0_d; u_d <= u1_d; ++u_d) { - const float z_d = depth_image.at(v_d, u_d); - if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) - continue; - - // Back-project to depth camera frame. - Eigen::Vector3f P_d; - P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / - depth_props.intr.fx); - P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / - depth_props.intr.fy); - P_d.z() = z_d; - - // Transform to color camera frame. - const Eigen::Vector3f P_c = extr.R * P_d + extr.t; - if (P_c.z() <= 0.0f) - continue; - - // Project onto color image plane. - const float u_c = static_cast( - color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); - const float v_c = static_cast( - color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); - - // Rotate into the OBB local frame and test against the - // half-extents. - const float dx = u_c - color_bbox.center_x; - const float dy = v_c - color_bbox.center_y; - const float local_x = cos_t * dx + sin_t * dy; - const float local_y = -sin_t * dx + cos_t * dy; - - if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) - continue; - - out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); - } - } - - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; -} - -// Projects the 4 corners of the color OBB into depth image space once, fits -// an OBB to those projected corners, then tests depth pixels directly against -// that depth-image OBB — no per-pixel matrix multiply needed. Output points -// are stored in the depth camera frame. -void extract_bbox_pcl_depth(const cv::Mat& depth_image, - const BoundingBox& color_bbox, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr, - pcl::PointCloud::Ptr& out) { - out->clear(); - - // Sample a representative depth near the bbox center to project the color - // OBB corners into depth image space. The valve is roughly flat, so a - // single Z gives a good approximation for all 4 corners. - const float scale = - (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) - ? static_cast(depth_props.intr.fx / color_props.intr.fx) - : 1.0f; - const int u_c_d = std::clamp(static_cast(color_bbox.center_x * scale), - 0, depth_image.cols - 1); - const int v_c_d = std::clamp(static_cast(color_bbox.center_y * scale), - 0, depth_image.rows - 1); - - float Z_est = 0.0f; - constexpr int kSampleRadius = 5; - for (int dv = -kSampleRadius; dv <= kSampleRadius && Z_est <= 0.0f; ++dv) { - for (int du = -kSampleRadius; du <= kSampleRadius && Z_est <= 0.0f; - ++du) { - const int u = std::clamp(u_c_d + du, 0, depth_image.cols - 1); - const int v = std::clamp(v_c_d + dv, 0, depth_image.rows - 1); - const float z = depth_image.at(v, u); - if (z > 0.0f && !std::isnan(z) && !std::isinf(z)) - Z_est = z; - } - } - if (Z_est <= 0.0f) - return; - - // Project the 4 color OBB corners into depth image coordinates. - const float angle_deg = - color_bbox.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect color_rrect( - cv::Point2f(color_bbox.center_x, color_bbox.center_y), - cv::Size2f(color_bbox.size_x, color_bbox.size_y), angle_deg); - cv::Point2f color_corners[4]; - color_rrect.points(color_corners); - - std::vector depth_corners(4); - for (int i = 0; i < 4; ++i) { - depth_corners[i] = - project_color_pixel_to_depth(color_corners[i].x, color_corners[i].y, - Z_est, color_props, depth_props, extr); - } - - // Fit a rotated rect to the 4 projected corners in depth image space. - const cv::RotatedRect depth_rrect = cv::minAreaRect(depth_corners); - const float angle_d = depth_rrect.angle * static_cast(M_PI) / 180.0f; - const float cos_d = std::cos(angle_d); - const float sin_d = std::sin(angle_d); - const float half_w_d = depth_rrect.size.width * 0.5f; - const float half_h_d = depth_rrect.size.height * 0.5f; - const float cx_d = depth_rrect.center.x; - const float cy_d = depth_rrect.center.y; - - // Bounding rectangle of the depth OBB. - const float r_d = std::sqrt(half_w_d * half_w_d + half_h_d * half_h_d); - const int u0_d = std::max(0, static_cast(cx_d - r_d) - 1); - const int u1_d = - std::min(depth_image.cols - 1, static_cast(cx_d + r_d) + 1); - const int v0_d = std::max(0, static_cast(cy_d - r_d) - 1); - const int v1_d = - std::min(depth_image.rows - 1, static_cast(cy_d + r_d) + 1); - - for (int v_d = v0_d; v_d <= v1_d; ++v_d) { - for (int u_d = u0_d; u_d <= u1_d; ++u_d) { - // Test against the depth-image OBB — no matrix multiply. - const float dx = u_d - cx_d; - const float dy = v_d - cy_d; - if (std::abs(cos_d * dx + sin_d * dy) > half_w_d || - std::abs(-sin_d * dx + cos_d * dy) > half_h_d) - continue; - - const float z_d = depth_image.at(v_d, u_d); - if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) - continue; - - // Back-project to 3D depth frame. - out->points.push_back( - {static_cast((u_d - depth_props.intr.cx) * z_d / - depth_props.intr.fx), - static_cast((v_d - depth_props.intr.cy) * z_d / - depth_props.intr.fy), - z_d}); - } - } - - out->width = static_cast(out->points.size()); - out->height = 1; - out->is_dense = false; -} - // Projects a color image pixel to depth image coordinates using the full // intrinsic + extrinsic pipeline. cv::Point2f project_color_pixel_to_depth(const float u_c, @@ -417,99 +56,4 @@ cv::Point2f project_color_pixel_to_depth(const float u_c, return {u_d, v_d}; } -// Corrects the bbox center for lens distortion using the given intrinsics. -BoundingBox undistort_bbox(const BoundingBox& bbox, - const CameraIntrinsics& intr) { - const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, - intr.fy, intr.cy, 0, 0, 1); - const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], - intr.dist[2], intr.dist[3], intr.dist[4]); - // Build a RotatedRect and extract all 4 corners. - const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), - cv::Size2f(bbox.size_x, bbox.size_y), angle_deg); - cv::Point2f corners[4]; - rrect.points(corners); - - // Undistort all 4 corners. - std::vector pts(corners, corners + 4); - std::vector undistorted; - cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); - - // Refit an OBB to the undistorted corners. - cv::RotatedRect fitted = cv::minAreaRect(undistorted); - - BoundingBox result = bbox; - result.center_x = fitted.center.x; - result.center_y = fitted.center.y; - result.size_x = fitted.size.width; - result.size_y = fitted.size.height; - result.theta = fitted.angle * static_cast(M_PI) / 180.0f; - return result; -} - -// Greedy NMS: sorts by score descending, keeps at most 2 non-overlapping boxes. -std::vector filter_duplicate_detections( - const std::vector>& scored_boxes, - float iou_duplicate_threshold) { - const size_t n = scored_boxes.size(); - if (n == 0) - return {}; - - std::vector> order; - order.reserve(n); - for (size_t i = 0; i < n; ++i) - order.emplace_back(scored_boxes[i].first, i); - std::sort(order.begin(), order.end(), - [](const auto& a, const auto& b) { return a.first > b.first; }); - - std::vector kept; - std::vector suppressed(n, false); - - for (size_t si = 0; si < order.size() && kept.size() < 2; ++si) { - const size_t i = order[si].second; - if (suppressed[i]) - continue; - - kept.push_back(i); - - const BoundingBox& bi = scored_boxes[i].second; - const float ai = bi.size_x * bi.size_y; - const float bx1i = bi.center_x - bi.size_x * 0.5f; - const float by1i = bi.center_y - bi.size_y * 0.5f; - const float bx2i = bi.center_x + bi.size_x * 0.5f; - const float by2i = bi.center_y + bi.size_y * 0.5f; - - for (size_t sj = si + 1; sj < order.size(); ++sj) { - const size_t j = order[sj].second; - if (suppressed[j]) - continue; - - const BoundingBox& bj = scored_boxes[j].second; - const float aj = bj.size_x * bj.size_y; - const float bx1j = bj.center_x - bj.size_x * 0.5f; - const float by1j = bj.center_y - bj.size_y * 0.5f; - const float bx2j = bj.center_x + bj.size_x * 0.5f; - const float by2j = bj.center_y + bj.size_y * 0.5f; - - const float ix1 = std::max(bx1i, bx1j); - const float iy1 = std::max(by1i, by1j); - const float ix2 = std::min(bx2i, bx2j); - const float iy2 = std::min(by2i, by2j); - - if (ix2 <= ix1 || iy2 <= iy1) - continue; - - const float inter = (ix2 - ix1) * (iy2 - iy1); - const float iou = inter / (ai + aj - inter); - const float iom = inter / std::min(ai, aj); - - if (iou > iou_duplicate_threshold || iom > 0.7f) - suppressed[j] = true; - } - } - - return kept; -} - } // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp new file mode 100644 index 0000000..c379e8d --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp @@ -0,0 +1,110 @@ +#include "valve_detection/detection_utils.hpp" + +#include +#include +#include +#include + +namespace valve_detection { + +BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, + 0, intr.fy, intr.cy, + 0, 0, 1); + const cv::Mat D = (cv::Mat_(5, 1) << intr.dist_k1, intr.dist_k2, + intr.dist_p1, intr.dist_p2, + intr.dist_k3); + + const float cos_t = std::cos(bbox.theta); + const float sin_t = std::sin(bbox.theta); + const float hx = bbox.size_x * 0.5f; + const float hy = bbox.size_y * 0.5f; + const cv::Point2f c(bbox.center_x, bbox.center_y); + + // 4 edge midpoints of the OBB + center to anchor the undistorted box. + std::vector pts = { + c, + c + cv::Point2f( hx * cos_t, hx * sin_t), + c + cv::Point2f(-hx * cos_t, -hx * sin_t), + c + cv::Point2f(-hy * sin_t, hy * cos_t), + c + cv::Point2f( hy * sin_t, -hy * cos_t), + }; + + std::vector undistorted; + cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); + + cv::RotatedRect fitted = cv::minAreaRect(undistorted); + + BoundingBox result = bbox; + result.center_x = fitted.center.x; + result.center_y = fitted.center.y; + result.size_x = fitted.size.width; + result.size_y = fitted.size.height; + result.theta = fitted.angle * static_cast(M_PI) / 180.0f; + return result; +} + +std::vector filter_duplicate_detections( + const std::vector>& scored_boxes, + float iou_duplicate_threshold) { + const size_t n = scored_boxes.size(); + if (n == 0) + return {}; + + std::vector> order; + order.reserve(n); + for (size_t i = 0; i < n; ++i) + order.emplace_back(scored_boxes[i].first, i); + std::sort(order.begin(), order.end(), + [](const auto& a, const auto& b) { return a.first > b.first; }); + + std::vector kept; + std::vector suppressed(n, false); + + for (size_t si = 0; si < order.size() && kept.size() < 2; ++si) { + const size_t i = order[si].second; + if (suppressed[i]) + continue; + + kept.push_back(i); + + const BoundingBox& bi = scored_boxes[i].second; + const float ai = bi.size_x * bi.size_y; + const float bx1i = bi.center_x - bi.size_x * 0.5f; + const float by1i = bi.center_y - bi.size_y * 0.5f; + const float bx2i = bi.center_x + bi.size_x * 0.5f; + const float by2i = bi.center_y + bi.size_y * 0.5f; + + for (size_t sj = si + 1; sj < order.size(); ++sj) { + const size_t j = order[sj].second; + if (suppressed[j]) + continue; + + const BoundingBox& bj = scored_boxes[j].second; + const float aj = bj.size_x * bj.size_y; + const float bx1j = bj.center_x - bj.size_x * 0.5f; + const float by1j = bj.center_y - bj.size_y * 0.5f; + const float bx2j = bj.center_x + bj.size_x * 0.5f; + const float by2j = bj.center_y + bj.size_y * 0.5f; + + const float ix1 = std::max(bx1i, bx1j); + const float iy1 = std::max(by1i, by1j); + const float ix2 = std::min(bx2i, bx2j); + const float iy2 = std::min(by2i, by2j); + + if (ix2 <= ix1 || iy2 <= iy1) + continue; + + const float inter = (ix2 - ix1) * (iy2 - iy1); + const float iou = inter / (ai + aj - inter); + const float iom = inter / std::min(ai, aj); + + if (iou > iou_duplicate_threshold || iom > 0.7f) + suppressed[j] = true; + } + } + + return kept; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp new file mode 100644 index 0000000..f0ff58f --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp @@ -0,0 +1,296 @@ +#include "valve_detection/pcl_extraction.hpp" +#include "valve_detection/depth_image_processing.hpp" + +#include +#include +#include +#include +#include + +namespace valve_detection { + +// Iterates depth pixels, back-projects each with depth intrinsics, applies +// the extrinsic transform, and checks whether the resulting color-frame +// projection falls inside the elliptic annulus defined by color_bbox. +// Output points are in the color camera frame. +void extract_annulus_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + const float annulus_radius_ratio, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Annulus defined in color-image coordinates. + const float cx_c = color_bbox.center_x; + const float cy_c = color_bbox.center_y; + const float outer_rx = color_bbox.size_x * 0.5f; + const float outer_ry = color_bbox.size_y * 0.5f; + + // Require at least 4 pixels in each dimension (2 px half-extent) before + // sampling — smaller boxes contain no usable depth points. + if (outer_rx < 2.0f || outer_ry < 2.0f) + return; + + // Inner ellipse scaled down by annulus_radius_ratio, forming the ring. + const float inner_rx = outer_rx * annulus_radius_ratio; + const float inner_ry = outer_ry * annulus_radius_ratio; + + // The depth and color images have different resolutions and focal lengths. + // Scale color-space bbox bounds into depth-image coordinates to get a + // coarse candidate region, then add a margin to cover the lateral shift + // introduced by the depth-to-color translation. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + constexpr int kMargin = 30; + + const int u0_d = + std::max(0, static_cast((cx_c - outer_rx) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((cx_c + outer_rx) * scale) + kMargin); + const int v0_d = + std::max(0, static_cast((cy_c - outer_ry) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((cy_c + outer_ry) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project depth pixel to 3-D point in depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Apply extrinsic to move the point into the color camera frame, + // where the annulus is defined. + const Eigen::Vector3f P_c = extrinsic.R * P_d + extrinsic.t; + if (P_c.z() <= 0.0f) + continue; + + // Project the color-frame point onto the color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Normalise offset by outer half-extents; keep only points inside + // the outer ellipse (sum of squares ≤ 1). + const float dxo = (u_c - cx_c) / outer_rx; + const float dyo = (v_c - cy_c) / outer_ry; + if (dxo * dxo + dyo * dyo > 1.0f) + continue; // outside outer ellipse + + // Discard points inside the inner ellipse to form the ring. + const float dxi = (u_c - cx_c) / inner_rx; + const float dyi = (v_c - cy_c) / inner_ry; + if (dxi * dxi + dyi * dyi <= 1.0f) + continue; // inside inner ellipse + + // Store the point in the color camera frame. + pcl::PointXYZ p; + p.x = P_c.x(); + p.y = P_c.y(); + p.z = P_c.z(); + out->points.push_back(p); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Extracts depth points whose color-frame projection falls inside the oriented +// bounding box. +void extract_bbox_pcl_aligned(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Pre-compute the OBB axes in color-image space for a fast rotated-rect + // test. + const float cos_t = std::cos(color_bbox.theta); + const float sin_t = std::sin(color_bbox.theta); + const float half_w = color_bbox.size_x * 0.5f; + const float half_h = color_bbox.size_y * 0.5f; + + // Approximate search region in depth-image space. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const float r = std::sqrt(half_w * half_w + half_h * half_h); + constexpr int kMargin = 30; + + const int u0_d = std::max( + 0, static_cast((color_bbox.center_x - r) * scale) - kMargin); + const int u1_d = + std::min(depth_image.cols - 1, + static_cast((color_bbox.center_x + r) * scale) + kMargin); + const int v0_d = std::max( + 0, static_cast((color_bbox.center_y - r) * scale) - kMargin); + const int v1_d = + std::min(depth_image.rows - 1, + static_cast((color_bbox.center_y + r) * scale) + kMargin); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to depth camera frame. + Eigen::Vector3f P_d; + P_d.x() = static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx); + P_d.y() = static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy); + P_d.z() = z_d; + + // Transform to color camera frame. + const Eigen::Vector3f P_c = extrinsic.R * P_d + extrinsic.t; + if (P_c.z() <= 0.0f) + continue; + + // Project onto color image plane. + const float u_c = static_cast( + color_props.intr.fx * P_c.x() / P_c.z() + color_props.intr.cx); + const float v_c = static_cast( + color_props.intr.fy * P_c.y() / P_c.z() + color_props.intr.cy); + + // Rotate into the OBB local frame and test against the + // half-extents. + const float dx = u_c - color_bbox.center_x; + const float dy = v_c - color_bbox.center_y; + const float local_x = cos_t * dx + sin_t * dy; + const float local_y = -sin_t * dx + cos_t * dy; + + if (std::abs(local_x) > half_w || std::abs(local_y) > half_h) + continue; + + out->points.push_back({P_c.x(), P_c.y(), P_c.z()}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +// Projects the 4 corners of the color OBB into depth image space once, fits +// an OBB to those projected corners, then tests depth pixels directly against +// that depth-image OBB — no per-pixel matrix multiply needed. Output points +// are stored in the depth camera frame. +void extract_bbox_pcl_depth(const cv::Mat& depth_image, + const BoundingBox& color_bbox, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extrinsic, + pcl::PointCloud::Ptr& out) { + out->clear(); + + // Sample a representative depth near the bbox center to project the color + // OBB corners into depth image space. The valve is roughly flat, so a + // single Z gives a good approximation for all 4 corners. + const float scale = + (depth_props.intr.fx > 0.0 && color_props.intr.fx > 0.0) + ? static_cast(depth_props.intr.fx / color_props.intr.fx) + : 1.0f; + const int u_c_d = std::clamp(static_cast(color_bbox.center_x * scale), + 0, depth_image.cols - 1); + const int v_c_d = std::clamp(static_cast(color_bbox.center_y * scale), + 0, depth_image.rows - 1); + + float Z_est = 0.0f; + constexpr int kSampleRadius = 5; + for (int dv = -kSampleRadius; dv <= kSampleRadius && Z_est <= 0.0f; ++dv) { + for (int du = -kSampleRadius; du <= kSampleRadius && Z_est <= 0.0f; + ++du) { + const int u = std::clamp(u_c_d + du, 0, depth_image.cols - 1); + const int v = std::clamp(v_c_d + dv, 0, depth_image.rows - 1); + const float z = depth_image.at(v, u); + if (z > 0.0f && !std::isnan(z) && !std::isinf(z)) + Z_est = z; + } + } + if (Z_est <= 0.0f) + return; + + // Project the 4 color OBB corners into depth image coordinates. + const float angle_deg = + color_bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect color_rrect( + cv::Point2f(color_bbox.center_x, color_bbox.center_y), + cv::Size2f(color_bbox.size_x, color_bbox.size_y), angle_deg); + cv::Point2f color_corners[4]; + color_rrect.points(color_corners); + + std::vector depth_corners(4); + for (int i = 0; i < 4; ++i) { + depth_corners[i] = + project_color_pixel_to_depth(color_corners[i].x, color_corners[i].y, + Z_est, color_props, depth_props, extrinsic); + } + + // Fit a rotated rect to the 4 projected corners in depth image space. + const cv::RotatedRect depth_rrect = cv::minAreaRect(depth_corners); + const float angle_d = depth_rrect.angle * static_cast(M_PI) / 180.0f; + const float cos_d = std::cos(angle_d); + const float sin_d = std::sin(angle_d); + const float half_w_d = depth_rrect.size.width * 0.5f; + const float half_h_d = depth_rrect.size.height * 0.5f; + const float cx_d = depth_rrect.center.x; + const float cy_d = depth_rrect.center.y; + + // Bounding rectangle of the depth OBB. + const float r_d = std::sqrt(half_w_d * half_w_d + half_h_d * half_h_d); + const int u0_d = std::max(0, static_cast(cx_d - r_d) - 1); + const int u1_d = + std::min(depth_image.cols - 1, static_cast(cx_d + r_d) + 1); + const int v0_d = std::max(0, static_cast(cy_d - r_d) - 1); + const int v1_d = + std::min(depth_image.rows - 1, static_cast(cy_d + r_d) + 1); + + for (int v_d = v0_d; v_d <= v1_d; ++v_d) { + for (int u_d = u0_d; u_d <= u1_d; ++u_d) { + // Test against the depth-image OBB — no matrix multiply. + const float dx = u_d - cx_d; + const float dy = v_d - cy_d; + if (std::abs(cos_d * dx + sin_d * dy) > half_w_d || + std::abs(-sin_d * dx + cos_d * dy) > half_h_d) + continue; + + const float z_d = depth_image.at(v_d, u_d); + if (z_d <= 0.0f || std::isnan(z_d) || std::isinf(z_d)) + continue; + + // Back-project to 3D depth frame. + out->points.push_back( + {static_cast((u_d - depth_props.intr.cx) * z_d / + depth_props.intr.fx), + static_cast((v_d - depth_props.intr.cy) * z_d / + depth_props.intr.fy), + z_d}); + } + } + + out->width = static_cast(out->points.size()); + out->height = 1; + out->is_dense = false; +} + +} // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp index bfd0b85..ea9bff4 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp @@ -2,6 +2,9 @@ // RANSAC plane fitting, normal/ray-plane intersection, and 3D pose computation. #include "valve_detection/pose_estimator.hpp" +#include +#include + namespace valve_detection { // Initializes YOLO dimensions, annulus ratio, RANSAC parameters, and handle @@ -27,7 +30,6 @@ void PoseEstimator::set_color_image_properties(const ImageProperties& props) { // Stores depth camera intrinsics and image dimensions. void PoseEstimator::set_depth_image_properties(const ImageProperties& props) { depth_image_properties_ = props; - has_depth_props_ = (props.intr.fx > 0.0); } // Stores the depth-to-color extrinsic transform. @@ -37,7 +39,7 @@ void PoseEstimator::set_depth_color_extrinsic(const DepthColorExtrinsic& extr) { // Computes letterbox scale factor and padding from the color image and YOLO // dimensions. -void PoseEstimator::calculate_letterbox_padding() { +void PoseEstimator::compute_letterbox_transform() { int org_image_width = color_image_properties_.dim.width; int org_image_height = color_image_properties_.dim.height; @@ -54,7 +56,7 @@ void PoseEstimator::calculate_letterbox_padding() { // Remaps a bounding box from YOLO letterbox space back to original image // coordinates. -BoundingBox PoseEstimator::transform_bounding_box( +BoundingBox PoseEstimator::letterbox_to_image_coords( const BoundingBox& bbox) const { BoundingBox transformed = bbox; transformed.center_x = @@ -87,19 +89,6 @@ bool PoseEstimator::segment_plane( return !inliers->indices.empty(); } -// Returns the normalized 3D ray direction from the camera origin through the -// bbox center. -Eigen::Vector3f PoseEstimator::get_ray_direction( - const BoundingBox& bbox) const { - const float xc = - (bbox.center_x - static_cast(color_image_properties_.intr.cx)) / - static_cast(color_image_properties_.intr.fx); - const float yc = - (bbox.center_y - static_cast(color_image_properties_.intr.cy)) / - static_cast(color_image_properties_.intr.fy); - return Eigen::Vector3f(xc, yc, 1.0f).normalized(); -} - // Extracts and normalizes the plane normal, flipping it to face the camera. Eigen::Vector3f PoseEstimator::compute_plane_normal( const pcl::ModelCoefficients::Ptr& coefficients, @@ -148,66 +137,6 @@ Eigen::Vector3f PoseEstimator::shift_point_along_normal( return intersection_point + (plane_normal * valve_handle_offset_); } -// Builds a 3×3 rotation matrix from the plane normal (Z) and the projected bbox -// angle (X). -Eigen::Matrix3f PoseEstimator::create_rotation_matrix( - const pcl::ModelCoefficients::Ptr& coefficients, - const Eigen::Vector3f& plane_normal, - float angle) const { - if (!coefficients || coefficients->values.size() < 4) - return Eigen::Matrix3f::Identity(); - - const Eigen::Vector3f z_axis = plane_normal; - const float D = coefficients->values[3]; - const float fx = static_cast(color_image_properties_.intr.fx); - const float fy = static_cast(color_image_properties_.intr.fy); - const float cx = static_cast(color_image_properties_.intr.cx); - const float cy = static_cast(color_image_properties_.intr.cy); - - Eigen::Matrix3f K; - K << fx, 0, cx, 0, fy, cy, 0, 0, 1; - const Eigen::Matrix3f Kinv = K.inverse(); - - // Two image points along the bbox angle through the principal point. - const float len = 50.0f; - const Eigen::Vector3f p1(cx, cy, 1.f); - const Eigen::Vector3f p2(cx + len * std::cos(angle), - cy + len * std::sin(angle), 1.f); - - // Back project points to rays. - const Eigen::Vector3f r1 = (Kinv * p1).normalized(); - const Eigen::Vector3f r2 = (Kinv * p2).normalized(); - - // Compute intersections of rays with the plane. - const float denom1 = z_axis.dot(r1); - const float denom2 = z_axis.dot(r2); - if (std::abs(denom1) < 1e-6f || std::abs(denom2) < 1e-6f) - return Eigen::Matrix3f::Identity(); - - const Eigen::Vector3f X1 = (-D / denom1) * r1; - const Eigen::Vector3f X2 = (-D / denom2) * r2; - - // Compute in-plane direction corresponding to the image line angle. - Eigen::Vector3f x_axis = (X2 - X1).normalized(); - - // Project onto the plane (for numerical stability). - x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); - - // Ensure consistent direction (avoid flipping frame between frames). - if (filter_direction_.dot(x_axis) < 0) - x_axis = -x_axis; - filter_direction_ = x_axis; - - const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); - x_axis = y_axis.cross(z_axis).normalized(); - - Eigen::Matrix3f rot; - rot.col(0) = x_axis; // X_obj: direction of the image line - rot.col(1) = y_axis; // Y_obj: perpendicular in-plane - rot.col(2) = z_axis; // Z_obj: plane normal - return rot; -} - // Builds a 3×3 rotation matrix from the plane normal (Z) and the projected // bbox angle (X), working entirely in the depth camera frame. Color-image // rays are rotated into depth frame before intersecting the plane, and @@ -217,7 +146,7 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( const Eigen::Vector3f& plane_normal, float angle, const Eigen::Vector3f& ray_origin, - const Eigen::Matrix3f& R_dc) const { + const Eigen::Matrix3f& R_depth_from_color) const { if (!coefficients || coefficients->values.size() < 4) return Eigen::Matrix3f::Identity(); @@ -239,8 +168,8 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( cy + len * std::sin(angle), 1.f); // Back-project color pixels to rays, then rotate into depth frame. - const Eigen::Vector3f r1 = (R_dc * (Kinv * p1)).normalized(); - const Eigen::Vector3f r2 = (R_dc * (Kinv * p2)).normalized(); + const Eigen::Vector3f r1 = (R_depth_from_color * (Kinv * p1)).normalized(); + const Eigen::Vector3f r2 = (R_depth_from_color * (Kinv * p2)).normalized(); // Intersect each ray (from color origin in depth frame) with the plane. const float denom1 = z_axis.dot(r1); @@ -273,102 +202,94 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( // Extracts a point cloud from the depth image, fits a plane, and returns the // valve pose. -PoseResult PoseEstimator::compute_pose_from_depth( +DetectionResult PoseEstimator::compute_pose_from_depth( const cv::Mat& depth_image, const BoundingBox& bbox_org, - pcl::PointCloud::Ptr annulus_dbg, - pcl::PointCloud::Ptr plane_dbg, - bool debug_visualize) const { + DetectorMode mode) const { + // Reject bounding boxes whose center projects entirely outside the depth + // image. The scale factor maps from color pixel space to depth pixel space. + const float scale = + (depth_image_properties_.intr.fx > 0.0 && color_image_properties_.intr.fx > 0.0) + ? static_cast(depth_image_properties_.intr.fx / color_image_properties_.intr.fx) + : 1.0f; + const float u_d = bbox_org.center_x * scale; + const float v_d = bbox_org.center_y * scale; + if (u_d < 0.0f || u_d >= static_cast(depth_image.cols) || + v_d < 0.0f || v_d >= static_cast(depth_image.rows)) + return {}; + pcl::PointCloud::Ptr cloud( new pcl::PointCloud); - if (has_depth_props_) { - // Extract points directly in the depth camera frame; only the OBB - // membership test is done in the color frame (a per-pixel projection - // that cannot be avoided without approximating the bbox in depth - // space). - extract_bbox_pcl_depth(depth_image, bbox_org, color_image_properties_, - depth_image_properties_, depth_color_extrinsic_, - cloud); - } else { - extract_annulus_pcl(depth_image, bbox_org, color_image_properties_, - annulus_radius_ratio_, cloud); - } + // Extract points directly in the depth camera frame; only the OBB + // membership test is done in the color frame (a per-pixel projection + // that cannot be avoided without approximating the bbox in depth space). + extract_bbox_pcl_depth(depth_image, bbox_org, color_image_properties_, + depth_image_properties_, depth_color_extrinsic_, + cloud); if (cloud->points.size() < 4) return {}; - if (debug_visualize && annulus_dbg) - *annulus_dbg += *cloud; + DetectionResult result; + + if (mode == DetectorMode::debug) { + result.annulus_cloud = + std::make_shared>(*cloud); + } pcl::ModelCoefficients::Ptr coeff(new pcl::ModelCoefficients); pcl::PointIndices::Ptr inliers(new pcl::PointIndices); if (!segment_plane(cloud, coeff, inliers)) return {}; - if (debug_visualize && plane_dbg) { + if (mode == DetectorMode::debug) { + result.plane_cloud = + std::make_shared>(); for (int idx : inliers->indices) - plane_dbg->points.push_back(cloud->points[idx]); - plane_dbg->width = static_cast(plane_dbg->points.size()); - plane_dbg->height = 1; - plane_dbg->is_dense = false; + result.plane_cloud->points.push_back(cloud->points[idx]); + result.plane_cloud->width = + static_cast(result.plane_cloud->points.size()); + result.plane_cloud->height = 1; + result.plane_cloud->is_dense = false; } - PoseResult out; - - if (has_depth_props_) { - // The color bbox center defines a ray that originates at the color - // camera, not the depth camera. Express that ray in depth frame: - // origin = -R^T * t (color camera origin in depth frame) - // direction = R^T * K_c⁻¹ * [cx, cy, 1]ᵀ (normalized) - const Eigen::Matrix3f R_dc = depth_color_extrinsic_.R.transpose(); - const Eigen::Vector3f O_d = -(R_dc * depth_color_extrinsic_.t); - - const float fx_c = static_cast(color_image_properties_.intr.fx); - const float fy_c = static_cast(color_image_properties_.intr.fy); - const float cx_c = static_cast(color_image_properties_.intr.cx); - const float cy_c = static_cast(color_image_properties_.intr.cy); - const Eigen::Vector3f r_c((bbox_org.center_x - cx_c) / fx_c, - (bbox_org.center_y - cy_c) / fy_c, 1.0f); - const Eigen::Vector3f ray = (R_dc * r_c).normalized(); - - const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); - if (normal.isZero()) - return {}; - - const Eigen::Vector3f pos = - find_ray_plane_intersection(coeff, ray, O_d); - if (pos.isZero()) - return {}; - - const Eigen::Vector3f pos_shifted = - shift_point_along_normal(pos, normal); - const Eigen::Matrix3f rot = create_rotation_matrix_depth( - coeff, normal, bbox_org.theta, O_d, R_dc); - out.result = Pose::from_eigen( - pos_shifted.cast(), - Eigen::Quaternionf(rot).normalized().cast()); - } else { - const Eigen::Vector3f ray = get_ray_direction(bbox_org); - const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); - if (normal.isZero()) - return {}; - - const Eigen::Vector3f pos = find_ray_plane_intersection(coeff, ray); - if (pos.isZero()) - return {}; - - const Eigen::Vector3f pos_shifted = - shift_point_along_normal(pos, normal); - const Eigen::Matrix3f rot = - create_rotation_matrix(coeff, normal, bbox_org.theta); - out.result = Pose::from_eigen( - pos_shifted.cast(), - Eigen::Quaternionf(rot).normalized().cast()); - } + // The color bbox center defines a ray that originates at the color + // camera, not the depth camera. Express that ray in depth frame: + // origin = -R^T * t (color camera origin in depth frame) + // direction = R^T * K_c⁻¹ * [cx, cy, 1]ᵀ (normalized) + const Eigen::Matrix3f R_depth_from_color = + depth_color_extrinsic_.R.transpose(); + const Eigen::Vector3f color_origin_in_depth_frame = + -(R_depth_from_color * depth_color_extrinsic_.t); + + const float fx_c = static_cast(color_image_properties_.intr.fx); + const float fy_c = static_cast(color_image_properties_.intr.fy); + const float cx_c = static_cast(color_image_properties_.intr.cx); + const float cy_c = static_cast(color_image_properties_.intr.cy); + const Eigen::Vector3f r_c((bbox_org.center_x - cx_c) / fx_c, + (bbox_org.center_y - cy_c) / fy_c, 1.0f); + const Eigen::Vector3f ray = (R_depth_from_color * r_c).normalized(); + + const Eigen::Vector3f normal = compute_plane_normal(coeff, ray); + if (normal.isZero()) + return {}; + + const Eigen::Vector3f pos = + find_ray_plane_intersection(coeff, ray, color_origin_in_depth_frame); + if (pos.isZero()) + return {}; - out.result_valid = true; - return out; + const Eigen::Vector3f pos_shifted = + shift_point_along_normal(pos, normal); + const Eigen::Matrix3f rot = create_rotation_matrix_depth( + coeff, normal, bbox_org.theta, color_origin_in_depth_frame, + R_depth_from_color); + result.pose = Pose::from_eigen( + pos_shifted.cast(), + Eigen::Quaternionf(rot).normalized().cast()); + result.valid = true; + return result; } } // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp index 7ecfb58..fc44bfd 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/ros_utils.cpp @@ -38,8 +38,7 @@ geometry_msgs::msg::PoseArray make_pose_array( vortex_msgs::msg::LandmarkArray make_landmark_array( const std::vector& poses, - const std_msgs::msg::Header& header, - int type) { + const std_msgs::msg::Header& header) { vortex_msgs::msg::LandmarkArray out; out.header = header; out.landmarks.reserve(poses.size()); @@ -47,8 +46,7 @@ vortex_msgs::msg::LandmarkArray make_landmark_array( vortex_msgs::msg::Landmark lm; lm.header = header; lm.id = static_cast(i); - lm.type.value = type; - lm.subtype.value = 0; // unset — resolved by valve_subtype_resolver + lm.type.value = vortex_msgs::msg::LandmarkType::VALVE; lm.pose.pose.position.x = poses[i].x; lm.pose.pose.position.y = poses[i].y; lm.pose.pose.position.z = poses[i].z; @@ -66,8 +64,7 @@ cv::Mat decode_depth_to_float( cv::Mat depth_img; // RealSense publishes depth as 16UC1 (uint16 millimetres). // cv_bridge type-casts without scaling, so we must divide by 1000. - if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1 || - depth->encoding == "16UC1") { + if (depth->encoding == sensor_msgs::image_encodings::TYPE_16UC1) { cv_bridge::CvImageConstPtr cv_depth = cv_bridge::toCvShare(depth, "16UC1"); cv_depth->image.convertTo(depth_img, CV_32FC1, 0.001); diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp index a684963..30f41b8 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -2,8 +2,11 @@ // ROS node: subscribes to depth and detections; publishes valve poses // and visualizations. #include "valve_detection_ros/valve_pose_ros.hpp" +#include "valve_detection/depth_image_processing.hpp" #include "valve_detection_ros/ros_utils.hpp" +#include + #include #include @@ -15,128 +18,130 @@ using std::placeholders::_2; ValvePoseNode::ValvePoseNode(const rclcpp::NodeOptions& options) : Node("valve_pose_node", options) { + declare_params(); + init_subscriptions(); +} + +void ValvePoseNode::declare_params() { debug_visualize_ = declare_parameter("debug_visualize"); iou_duplicate_threshold_ = static_cast( declare_parameter("iou_duplicate_threshold")); - const std::string drone = declare_parameter("drone", "moby"); - const std::string frame_base = - declare_parameter("output_frame_id"); - output_frame_id_ = frame_base.empty() ? "" : drone + "/" + frame_base; - landmark_type_ = declare_parameter("landmark_type"); - setup_estimator(); + const std::string frame_base = declare_parameter("output_frame_id"); + if (frame_base.empty()) { + throw std::runtime_error("output_frame_id must not be empty"); + } + const std::string drone = declare_parameter("drone", "nautilus"); + output_frame_id_ = drone + "/" + frame_base; + + // Estimator config params + yolo_w_ = declare_parameter("yolo_img_width"); + yolo_h_ = declare_parameter("yolo_img_height"); + annulus_ratio_ = declare_parameter("annulus_radius_ratio"); + ransac_thresh_ = declare_parameter("plane_ransac_threshold"); + ransac_iters_ = declare_parameter("plane_ransac_max_iterations"); + handle_offset_ = declare_parameter("valve_handle_offset"); + + // TF frame IDs for the depth-to-color extrinsic lookup. + const std::string depth_frame_base = declare_parameter("depth_frame_id"); + const std::string color_frame_base = declare_parameter("color_frame_id"); + depth_frame_id_ = drone + "/" + depth_frame_base; + color_frame_id_ = drone + "/" + color_frame_base; + + // TF2 buffer and listener for extrinsic lookup. + tf_buffer_ = std::make_shared(get_clock()); + tf_listener_ = std::make_shared(*tf_buffer_); + + // Periodically attempt to look up the static transform until it arrives. + extrinsic_timer_ = create_wall_timer( + std::chrono::milliseconds(500), + std::bind(&ValvePoseNode::lookup_extrinsic, this)); + + if (debug_visualize_) { + const auto pose_topic = declare_parameter("debug.valve_poses_pub_topic"); + const auto depth_cloud_topic = declare_parameter("debug.depth_cloud_pub_topic"); + const auto depth_color_topic = declare_parameter("debug.depth_colormap_pub_topic"); + const auto ann_topic = declare_parameter("debug.annulus_pub_topic"); + const auto pln_topic = declare_parameter("debug.plane_pub_topic"); + depth_colormap_vmin_ = static_cast(declare_parameter("debug.depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast(declare_parameter("debug.depth_colormap_value_max")); + + const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + pose_pub_ = create_publisher(pose_topic, qos); + depth_cloud_pub_ = create_publisher(depth_cloud_topic, qos); + depth_colormap_pub_ = create_publisher(depth_color_topic, qos); + annulus_pub_ = create_publisher(ann_topic, qos); + plane_pub_ = create_publisher(pln_topic, qos); + } + const auto lm_topic = declare_parameter("landmarks_pub_topic"); const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - setup_publishers(qos); - setup_subscribers(qos); -} + landmark_pub_ = create_publisher(lm_topic, qos); -void ValvePoseNode::setup_estimator() { - const int yolo_w = declare_parameter("yolo_img_width"); - const int yolo_h = declare_parameter("yolo_img_height"); - const float annulus_ratio = - declare_parameter("annulus_radius_ratio"); - const float ransac_thresh = - declare_parameter("plane_ransac_threshold"); - const int ransac_iters = - declare_parameter("plane_ransac_max_iterations"); - const float handle_off = declare_parameter("valve_handle_offset"); + try_activate_detector(); +} - detector_ = std::make_unique( - yolo_w, yolo_h, annulus_ratio, ransac_thresh, ransac_iters, handle_off); - - depth_color_extrinsic_.R = Eigen::Matrix3f::Identity(); - depth_color_extrinsic_.t.x() = - static_cast(declare_parameter("depth_to_color_tx")); - depth_color_extrinsic_.t.y() = - static_cast(declare_parameter("depth_to_color_ty")); - depth_color_extrinsic_.t.z() = - static_cast(declare_parameter("depth_to_color_tz")); - detector_->set_depth_color_extrinsic(depth_color_extrinsic_); +void ValvePoseNode::lookup_extrinsic() { + if (extrinsic_ready_) + return; - color_props_.intr.fx = declare_parameter("color_fx"); - color_props_.intr.fy = declare_parameter("color_fy"); - color_props_.intr.cx = declare_parameter("color_cx"); - color_props_.intr.cy = declare_parameter("color_cy"); - color_props_.dim.width = declare_parameter("color_image_width"); - color_props_.dim.height = declare_parameter("color_image_height"); - color_props_.intr.dist[0] = declare_parameter("color_d1"); - color_props_.intr.dist[1] = declare_parameter("color_d2"); - color_props_.intr.dist[2] = declare_parameter("color_d3"); - color_props_.intr.dist[3] = declare_parameter("color_d4"); - color_props_.intr.dist[4] = declare_parameter("color_d5"); - detector_->set_color_image_properties(color_props_); - detector_->calculate_letterbox_padding(); - RCLCPP_INFO(get_logger(), - "Color intrinsics loaded from config as fallback " - "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - color_props_.intr.fx, color_props_.intr.fy, - color_props_.intr.cx, color_props_.intr.cy); - - depth_props_.intr.fx = declare_parameter("depth_fx"); - depth_props_.intr.fy = declare_parameter("depth_fy"); - depth_props_.intr.cx = declare_parameter("depth_cx"); - depth_props_.intr.cy = declare_parameter("depth_cy"); - depth_props_.dim.width = declare_parameter("depth_image_width"); - depth_props_.dim.height = declare_parameter("depth_image_height"); - detector_->set_depth_image_properties(depth_props_); - RCLCPP_INFO(get_logger(), - "Depth intrinsics loaded from config as fallback " - "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - depth_props_.intr.fx, depth_props_.intr.fy, - depth_props_.intr.cx, depth_props_.intr.cy); + try { + const auto tf = tf_buffer_->lookupTransform( + color_frame_id_, depth_frame_id_, tf2::TimePointZero); + const auto& t = tf.transform.translation; + const auto& q = tf.transform.rotation; + + const Eigen::Quaternionf quat( + static_cast(q.w), static_cast(q.x), + static_cast(q.y), static_cast(q.z)); + depth_color_extrinsic_.R = quat.toRotationMatrix(); + depth_color_extrinsic_.t = Eigen::Vector3f( + static_cast(t.x), static_cast(t.y), + static_cast(t.z)); + + extrinsic_ready_ = true; + extrinsic_timer_->cancel(); + RCLCPP_INFO(get_logger(), + "Depth-to-color extrinsic loaded from TF (%s -> %s, t=[%.4f, %.4f, %.4f])", + depth_frame_id_.c_str(), color_frame_id_.c_str(), + t.x, t.y, t.z); + + if (detector_) { + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + } else { + try_activate_detector(); + } + } catch (const tf2::TransformException& ex) { + RCLCPP_WARN_THROTTLE(get_logger(), *get_clock(), 5000, + "Waiting for TF: %s -> %s: %s", + depth_frame_id_.c_str(), color_frame_id_.c_str(), + ex.what()); + } } -void ValvePoseNode::setup_publishers(const rclcpp::QoS& qos) { - const auto lm_topic = declare_parameter("landmarks_pub_topic"); - landmark_pub_ = - create_publisher(lm_topic, qos); - - if (!debug_visualize_) +void ValvePoseNode::try_activate_detector() { + if (!color_props_ready_ || !depth_props_ready_ || !extrinsic_ready_ || detector_) return; - const auto pose_topic = - declare_parameter("debug.valve_poses_pub_topic"); - const auto depth_cloud_topic = - declare_parameter("debug.depth_cloud_pub_topic"); - const auto depth_color_topic = - declare_parameter("debug.depth_colormap_pub_topic"); - const auto ann_topic = - declare_parameter("debug.annulus_pub_topic"); - const auto pln_topic = - declare_parameter("debug.plane_pub_topic"); - depth_colormap_vmin_ = static_cast( - declare_parameter("debug.depth_colormap_value_min")); - depth_colormap_vmax_ = static_cast( - declare_parameter("debug.depth_colormap_value_max")); - - pose_pub_ = - create_publisher(pose_topic, qos); - depth_cloud_pub_ = - create_publisher(depth_cloud_topic, qos); - depth_colormap_pub_ = - create_publisher(depth_color_topic, qos); - annulus_pub_ = - create_publisher(ann_topic, qos); - plane_pub_ = - create_publisher(pln_topic, qos); + detector_ = std::make_unique( + yolo_w_, yolo_h_, annulus_ratio_, ransac_thresh_, ransac_iters_, handle_offset_); + detector_->set_color_image_properties(color_props_); + detector_->set_depth_image_properties(depth_props_); + detector_->set_depth_color_extrinsic(depth_color_extrinsic_); + detector_->compute_letterbox_transform(); + RCLCPP_INFO(get_logger(), "Detector initialised"); } -void ValvePoseNode::setup_subscribers(const rclcpp::QoS& qos) { - const auto depth_topic = - declare_parameter("depth_image_sub_topic"); - const auto det_topic = - declare_parameter("detections_sub_topic"); - const auto depth_info_topic = - declare_parameter("depth_image_info_topic"); - const auto color_info_topic = - declare_parameter("color_image_info_topic"); - - const auto info_qos = - rclcpp::QoS(rclcpp::KeepLast(1)) - .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE) - .durability(RMW_QOS_POLICY_DURABILITY_TRANSIENT_LOCAL); +void ValvePoseNode::init_subscriptions() { + const auto depth_topic = declare_parameter("depth_image_sub_topic"); + const auto det_topic = declare_parameter("detections_sub_topic"); + const auto depth_info_topic = declare_parameter("depth_image_info_topic"); + const auto color_info_topic = declare_parameter("color_image_info_topic"); + + const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) + .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE); color_cam_info_sub_ = create_subscription( color_info_topic, info_qos, @@ -145,11 +150,16 @@ void ValvePoseNode::setup_subscribers(const rclcpp::QoS& qos) { depth_info_topic, info_qos, std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); - depth_sub_.subscribe(this, depth_topic, qos.get_rmw_qos_profile()); - det_sub_.subscribe(this, det_topic, qos.get_rmw_qos_profile()); + const auto data_qos = rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + depth_sub_.subscribe(this, depth_topic, data_qos.get_rmw_qos_profile()); + det_sub_.subscribe(this, det_topic, data_qos.get_rmw_qos_profile()); + // Slop set to 100 ms to accommodate the detection pipeline latency + // (~66 ms observed between depth and detection timestamps). sync_ = std::make_shared>( - SyncPolicy(10), depth_sub_, det_sub_); + SyncPolicy(20), depth_sub_, det_sub_); + sync_->setMaxIntervalDuration(rclcpp::Duration::from_seconds(0.15)); sync_->registerCallback(std::bind(&ValvePoseNode::sync_cb, this, _1, _2)); } @@ -163,21 +173,23 @@ void ValvePoseNode::color_camera_info_cb( color_props_.intr.cy = msg->k[5]; color_props_.dim.width = static_cast(msg->width); color_props_.dim.height = static_cast(msg->height); - if (msg->d.size() >= 5) { - for (size_t i = 0; i < 5; ++i) - color_props_.intr.dist[i] = msg->d[i]; + color_props_.intr.dist_k1 = msg->d[0]; + color_props_.intr.dist_k2 = msg->d[1]; + color_props_.intr.dist_p1 = msg->d[2]; + color_props_.intr.dist_p2 = msg->d[3]; + color_props_.intr.dist_k3 = msg->d[4]; + } + color_props_ready_ = true; + color_cam_info_sub_.reset(); + RCLCPP_INFO(get_logger(), "Color camera_info received (fx=%.2f fy=%.2f)", + color_props_.intr.fx, color_props_.intr.fy); + if (detector_) { + detector_->set_color_image_properties(color_props_); + detector_->compute_letterbox_transform(); + } else { + try_activate_detector(); } - - detector_->set_color_image_properties(color_props_); - detector_->calculate_letterbox_padding(); - - color_cam_info_sub_.reset(); // one-shot - RCLCPP_INFO(get_logger(), - "Color camera_info received, overriding config fallback " - "(fx=%.2f fy=%.2f cx=%.2f cy=%.2f)", - color_props_.intr.fx, color_props_.intr.fy, - color_props_.intr.cx, color_props_.intr.cy); } // One-shot callback that overrides depth intrinsics from the camera_info topic. @@ -189,12 +201,106 @@ void ValvePoseNode::depth_camera_info_cb( depth_props_.intr.cy = msg->k[5]; depth_props_.dim.width = static_cast(msg->width); depth_props_.dim.height = static_cast(msg->height); - - detector_->set_depth_image_properties(depth_props_); - - depth_cam_info_sub_.reset(); // one-shot + depth_props_ready_ = true; + depth_cam_info_sub_.reset(); RCLCPP_INFO(get_logger(), "Depth camera_info received (fx=%.1f fy=%.1f)", depth_props_.intr.fx, depth_props_.intr.fy); + if (detector_) { + detector_->set_depth_image_properties(depth_props_); + } else { + try_activate_detector(); + } +} + +std::vector> ValvePoseNode::collect_scored_boxes( + const vision_msgs::msg::Detection2DArray& det) const { + std::vector> boxes; + boxes.reserve(det.detections.size()); + for (const auto& d : det.detections) { + const float score = d.results.empty() + ? static_cast(d.bbox.size_x * d.bbox.size_y) + : static_cast(d.results[0].hypothesis.score); + boxes.emplace_back(score, to_bbox(d.bbox)); + } + return boxes; +} + +void ValvePoseNode::publish_empty_results( + const std_msgs::msg::Header& header) const { + if (debug_visualize_ && pose_pub_) + pose_pub_->publish(make_pose_array({}, header)); + landmark_pub_->publish(make_landmark_array({}, header)); +} + +cv::Mat ValvePoseNode::build_depth_colormap( + const sensor_msgs::msg::Image::ConstSharedPtr& depth) const { + cv_bridge::CvImageConstPtr cv_raw = cv_bridge::toCvShare(depth, "16UC1"); + cv::Mat depth_f; + cv_raw->image.convertTo(depth_f, CV_32FC1); + const float scale = 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); + cv::Mat depth_8u; + depth_f.convertTo(depth_8u, CV_8UC1, scale, -depth_colormap_vmin_ * scale); + cv::Mat colormap; + cv::applyColorMap(depth_8u, colormap, cv::COLORMAP_TURBO); + return colormap; +} + +void ValvePoseNode::publish_debug( + const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const std::vector& boxes, + const std::vector& poses, + const pcl::PointCloud& ann_cloud, + const pcl::PointCloud& pln_cloud) const { + std_msgs::msg::Header pcl_header = depth->header; + pcl_header.frame_id = output_frame_id_; + + if (depth_cloud_pub_ && depth_cloud_pub_->get_subscription_count() > 0) { + sensor_msgs::msg::PointCloud2 msg; + pcl::toROSMsg(ann_cloud, msg); + msg.header = pcl_header; + depth_cloud_pub_->publish(msg); + } + + if (depth_colormap_pub_) { + cv::Mat colormap = build_depth_colormap(depth); + for (size_t i = 0; i < boxes.size(); ++i) { + const auto& box = boxes[i]; + + // poses[i] is in the depth camera frame. project_color_pixel_to_depth + // needs Z in the color camera frame, so transform the 3-D point first. + const Eigen::Vector3f P_depth(static_cast(poses[i].x), + static_cast(poses[i].y), + static_cast(poses[i].z)); + const Eigen::Vector3f P_color = + depth_color_extrinsic_.R * P_depth + depth_color_extrinsic_.t; + const float Z_color = P_color.z(); + if (Z_color <= 0.0f) + continue; + + const float angle_deg = box.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), + cv::Size2f(box.size_x, box.size_y), angle_deg); + cv::Point2f corners[4]; + rrect.points(corners); + for (auto& c : corners) + c = project_color_pixel_to_depth(c.x, c.y, Z_color, color_props_, + depth_props_, depth_color_extrinsic_); + for (int j = 0; j < 4; ++j) + cv::line(colormap, corners[j], corners[(j + 1) % 4], cv::Scalar(0, 255, 0), 2); + } + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", colormap).toImageMsg()); + } + + if (annulus_pub_ && plane_pub_) { + sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; + pcl::toROSMsg(ann_cloud, ann_msg); + pcl::toROSMsg(pln_cloud, pln_msg); + ann_msg.header = pcl_header; + pln_msg.header = pcl_header; + annulus_pub_->publish(ann_msg); + plane_pub_->publish(pln_msg); + } } // Main synchronized callback: runs NMS, estimates poses, and publishes all @@ -202,133 +308,57 @@ void ValvePoseNode::depth_camera_info_cb( void ValvePoseNode::sync_cb( const sensor_msgs::msg::Image::ConstSharedPtr& depth, const vision_msgs::msg::Detection2DArray::ConstSharedPtr& det) { - if (!depth || !det) + if (!depth || !det || !detector_) return; - cv::Mat depth_color; - const bool publish_colormap = - debug_visualize_ && depth_colormap_pub_ && - depth_colormap_pub_->get_subscription_count() > 0; - if (publish_colormap) { - cv_bridge::CvImageConstPtr cv_raw = - cv_bridge::toCvShare(depth, "16UC1"); - cv::Mat depth_f; - cv_raw->image.convertTo(depth_f, CV_32FC1); - const float scale = - 255.0f / (depth_colormap_vmax_ - depth_colormap_vmin_); - cv::Mat depth_8u; - depth_f.convertTo(depth_8u, CV_8UC1, scale, - -depth_colormap_vmin_ * scale); - cv::applyColorMap(depth_8u, depth_color, cv::COLORMAP_TURBO); - } + const auto scored_boxes = collect_scored_boxes(*det); + const std::vector kept = + filter_duplicate_detections(scored_boxes, iou_duplicate_threshold_); + + std_msgs::msg::Header pose_header = depth->header; + pose_header.frame_id = output_frame_id_; if (det->detections.empty()) { - // Publish empty arrays to clear stale data from previous detections. - if (debug_visualize_ && pose_pub_) - pose_pub_->publish(make_pose_array({}, depth->header)); - landmark_pub_->publish( - make_landmark_array({}, depth->header, landmark_type_)); - if (publish_colormap) { - depth_colormap_pub_->publish( - *cv_bridge::CvImage(depth->header, "bgr8", depth_color) - .toImageMsg()); - } + publish_empty_results(pose_header); + if (debug_visualize_ && depth_colormap_pub_) + depth_colormap_pub_->publish(*cv_bridge::CvImage( + depth->header, "bgr8", build_depth_colormap(depth)).toImageMsg()); return; } - std::vector> scored_boxes; - scored_boxes.reserve(det->detections.size()); - for (const auto& d : det->detections) { - float score = 0.0f; - if (!d.results.empty()) { - score = static_cast(d.results[0].hypothesis.score); - } else { - score = static_cast(d.bbox.size_x * d.bbox.size_y); - } - scored_boxes.emplace_back(score, to_bbox(d.bbox)); - } - const std::vector kept = - filter_duplicate_detections(scored_boxes, iou_duplicate_threshold_); - const cv::Mat depth_img = decode_depth_to_float(depth); - - pcl::PointCloud::Ptr ann_dbg( - new pcl::PointCloud); - pcl::PointCloud::Ptr pln_dbg( - new pcl::PointCloud); + const DetectorMode mode = debug_visualize_ ? DetectorMode::debug : DetectorMode::standard; std::vector raw_boxes; std::vector poses; + pcl::PointCloud ann_dbg, pln_dbg; for (size_t idx : kept) { const BoundingBox& yolo_box = scored_boxes[idx].second; - BoundingBox org_box = undistort_bbox(yolo_box, color_props_.intr); - raw_boxes.push_back(yolo_box); - - const auto pose_result = detector_->compute_pose_from_depth( - depth_img, org_box, ann_dbg, pln_dbg, true); - if (pose_result.result_valid) - poses.push_back(pose_result.result); - } - - if (debug_visualize_ && depth_cloud_pub_ && - depth_cloud_pub_->get_subscription_count() > 0) { - sensor_msgs::msg::PointCloud2 cloud_msg; - pcl::toROSMsg(*ann_dbg, cloud_msg); - cloud_msg.header = depth->header; - depth_cloud_pub_->publish(cloud_msg); - } - - if (publish_colormap) { - for (size_t i = 0; i < raw_boxes.size(); ++i) { - const auto& box = raw_boxes[i]; - const float Z = (i < poses.size() && poses[i].z > 0.0) - ? static_cast(poses[i].z) - : 0.0f; - - const float angle_deg = - box.theta * 180.0f / static_cast(M_PI); - cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), - cv::Size2f(box.size_x, box.size_y), - angle_deg); - cv::Point2f corners[4]; - rrect.points(corners); - - if (Z > 0.0f) { - for (auto& c : corners) { - c = project_color_pixel_to_depth(c.x, c.y, Z, color_props_, - depth_props_, - depth_color_extrinsic_); - } - } - for (int j = 0; j < 4; ++j) { - cv::line(depth_color, corners[j], corners[(j + 1) % 4], - cv::Scalar(0, 255, 0), 2); - } + // YOLO outputs in 640×640 letterbox space — convert to color image space first, + // then undistort. Both pcl extraction and ray casting use color intrinsics. + const BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); + const BoundingBox org_box = undistort_bbox(color_box, color_props_.intr); + + const auto result = detector_->compute_pose_from_depth(depth_img, org_box, mode); + if (!result.valid) + continue; + + // Keep raw_boxes and poses aligned: only push when pose succeeded. + raw_boxes.push_back(color_box); + poses.push_back(result.pose); + if (mode == DetectorMode::debug) { + if (result.annulus_cloud) ann_dbg += *result.annulus_cloud; + if (result.plane_cloud) pln_dbg += *result.plane_cloud; } - depth_colormap_pub_->publish( - *cv_bridge::CvImage(depth->header, "bgr8", depth_color) - .toImageMsg()); - } - - if (debug_visualize_ && annulus_pub_ && plane_pub_) { - sensor_msgs::msg::PointCloud2 ann_msg, pln_msg; - pcl::toROSMsg(*ann_dbg, ann_msg); - pcl::toROSMsg(*pln_dbg, pln_msg); - ann_msg.header = depth->header; - pln_msg.header = depth->header; - annulus_pub_->publish(ann_msg); - plane_pub_->publish(pln_msg); } - std_msgs::msg::Header pose_header = depth->header; - if (!output_frame_id_.empty()) - pose_header.frame_id = output_frame_id_; + if (debug_visualize_) + publish_debug(depth, raw_boxes, poses, ann_dbg, pln_dbg); if (debug_visualize_ && pose_pub_) pose_pub_->publish(make_pose_array(poses, pose_header)); - landmark_pub_->publish( - make_landmark_array(poses, pose_header, landmark_type_)); + landmark_pub_->publish(make_landmark_array(poses, pose_header)); } } // namespace valve_detection From 09079eaa940b4cb7ecd2e93643b2ac9f32cd2db2 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Sat, 28 Mar 2026 03:30:23 +0100 Subject: [PATCH 3/7] refactor: improve code formatting and readability across multiple files --- .../depth_image_processing.hpp | 24 +-- .../valve_detection/pose_estimator.hpp | 7 +- .../valve_detection_ros/valve_pose_ros.hpp | 12 +- .../valve_detection/src/detection_utils.cpp | 17 +- .../valve_detection/src/pcl_extraction.cpp | 6 +- .../valve_detection/src/pose_estimator.cpp | 21 +-- .../valve_detection/src/valve_pose_ros.cpp | 166 +++++++++++------- 7 files changed, 147 insertions(+), 106 deletions(-) diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp index c51303b..e892f4d 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -15,23 +15,23 @@ namespace valve_detection { * typically rectified before publication by the camera driver. */ void project_pixel_to_point(int u, - int v, - float depth, - double fx, - double fy, - double cx, - double cy, - pcl::PointXYZ& out); + int v, + float depth, + double fx, + double fy, + double cx, + double cy, + pcl::PointXYZ& out); // Projects a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). cv::Point2f project_color_pixel_to_depth(float u_c, - float v_c, - float Z, - const ImageProperties& color_props, - const ImageProperties& depth_props, - const DepthColorExtrinsic& extr); + float v_c, + float Z, + const ImageProperties& color_props, + const ImageProperties& depth_props, + const DepthColorExtrinsic& extr); } // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp index f5c739c..cddc091 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp @@ -44,11 +44,12 @@ class PoseEstimator { void set_depth_image_properties(const ImageProperties& props); void set_depth_color_extrinsic(const DepthColorExtrinsic& extr); - /// @brief Computes letterbox scale and padding from color image dimensions and YOLO input size. - /// Must be called after set_color_image_properties(). + /// @brief Computes letterbox scale and padding from color image dimensions + /// and YOLO input size. Must be called after set_color_image_properties(). void compute_letterbox_transform(); - /// @brief Remaps a bounding box from YOLO letterbox coordinates to original image coordinates. + /// @brief Remaps a bounding box from YOLO letterbox coordinates to original + /// image coordinates. BoundingBox letterbox_to_image_coords(const BoundingBox& bbox) const; DetectionResult compute_pose_from_depth( diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp index 0e9a939..c8b8ea0 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -62,12 +63,11 @@ class ValvePoseNode : public rclcpp::Node { void publish_empty_results(const std_msgs::msg::Header& header) const; cv::Mat build_depth_colormap( const sensor_msgs::msg::Image::ConstSharedPtr& depth) const; - void publish_debug( - const sensor_msgs::msg::Image::ConstSharedPtr& depth, - const std::vector& boxes, - const std::vector& poses, - const pcl::PointCloud& ann_cloud, - const pcl::PointCloud& pln_cloud) const; + void publish_debug(const sensor_msgs::msg::Image::ConstSharedPtr& depth, + const std::vector& boxes, + const std::vector& poses, + const pcl::PointCloud& ann_cloud, + const pcl::PointCloud& pln_cloud) const; using SyncPolicy = message_filters::sync_policies::ApproximateTime< sensor_msgs::msg::Image, diff --git a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp index c379e8d..9f60854 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp @@ -7,13 +7,12 @@ namespace valve_detection { -BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr) { - const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, - 0, intr.fy, intr.cy, - 0, 0, 1); +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, + intr.fy, intr.cy, 0, 0, 1); const cv::Mat D = (cv::Mat_(5, 1) << intr.dist_k1, intr.dist_k2, - intr.dist_p1, intr.dist_p2, - intr.dist_k3); + intr.dist_p1, intr.dist_p2, intr.dist_k3); const float cos_t = std::cos(bbox.theta); const float sin_t = std::sin(bbox.theta); @@ -24,10 +23,10 @@ BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr // 4 edge midpoints of the OBB + center to anchor the undistorted box. std::vector pts = { c, - c + cv::Point2f( hx * cos_t, hx * sin_t), + c + cv::Point2f(hx * cos_t, hx * sin_t), c + cv::Point2f(-hx * cos_t, -hx * sin_t), - c + cv::Point2f(-hy * sin_t, hy * cos_t), - c + cv::Point2f( hy * sin_t, -hy * cos_t), + c + cv::Point2f(-hy * sin_t, hy * cos_t), + c + cv::Point2f(hy * sin_t, -hy * cos_t), }; std::vector undistorted; diff --git a/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp index f0ff58f..24e5ea6 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/pcl_extraction.cpp @@ -241,9 +241,9 @@ void extract_bbox_pcl_depth(const cv::Mat& depth_image, std::vector depth_corners(4); for (int i = 0; i < 4; ++i) { - depth_corners[i] = - project_color_pixel_to_depth(color_corners[i].x, color_corners[i].y, - Z_est, color_props, depth_props, extrinsic); + depth_corners[i] = project_color_pixel_to_depth( + color_corners[i].x, color_corners[i].y, Z_est, color_props, + depth_props, extrinsic); } // Fit a rotated rect to the 4 projected corners in depth image space. diff --git a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp index ea9bff4..7457674 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp @@ -207,10 +207,13 @@ DetectionResult PoseEstimator::compute_pose_from_depth( const BoundingBox& bbox_org, DetectorMode mode) const { // Reject bounding boxes whose center projects entirely outside the depth - // image. The scale factor maps from color pixel space to depth pixel space. + // image. The scale factor maps from color pixel space to depth pixel + // space. const float scale = - (depth_image_properties_.intr.fx > 0.0 && color_image_properties_.intr.fx > 0.0) - ? static_cast(depth_image_properties_.intr.fx / color_image_properties_.intr.fx) + (depth_image_properties_.intr.fx > 0.0 && + color_image_properties_.intr.fx > 0.0) + ? static_cast(depth_image_properties_.intr.fx / + color_image_properties_.intr.fx) : 1.0f; const float u_d = bbox_org.center_x * scale; const float v_d = bbox_org.center_y * scale; @@ -244,8 +247,7 @@ DetectionResult PoseEstimator::compute_pose_from_depth( return {}; if (mode == DetectorMode::debug) { - result.plane_cloud = - std::make_shared>(); + result.plane_cloud = std::make_shared>(); for (int idx : inliers->indices) result.plane_cloud->points.push_back(cloud->points[idx]); result.plane_cloud->width = @@ -280,14 +282,13 @@ DetectionResult PoseEstimator::compute_pose_from_depth( if (pos.isZero()) return {}; - const Eigen::Vector3f pos_shifted = - shift_point_along_normal(pos, normal); + const Eigen::Vector3f pos_shifted = shift_point_along_normal(pos, normal); const Eigen::Matrix3f rot = create_rotation_matrix_depth( coeff, normal, bbox_org.theta, color_origin_in_depth_frame, R_depth_from_color); - result.pose = Pose::from_eigen( - pos_shifted.cast(), - Eigen::Quaternionf(rot).normalized().cast()); + result.pose = + Pose::from_eigen(pos_shifted.cast(), + Eigen::Quaternionf(rot).normalized().cast()); result.valid = true; return result; } diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp index 30f41b8..7b99539 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -27,11 +27,13 @@ void ValvePoseNode::declare_params() { iou_duplicate_threshold_ = static_cast( declare_parameter("iou_duplicate_threshold")); - const std::string frame_base = declare_parameter("output_frame_id"); + const std::string frame_base = + declare_parameter("output_frame_id"); if (frame_base.empty()) { throw std::runtime_error("output_frame_id must not be empty"); } - const std::string drone = declare_parameter("drone", "nautilus"); + const std::string drone = + declare_parameter("drone", "nautilus"); output_frame_id_ = drone + "/" + frame_base; // Estimator config params @@ -43,8 +45,10 @@ void ValvePoseNode::declare_params() { handle_offset_ = declare_parameter("valve_handle_offset"); // TF frame IDs for the depth-to-color extrinsic lookup. - const std::string depth_frame_base = declare_parameter("depth_frame_id"); - const std::string color_frame_base = declare_parameter("color_frame_id"); + const std::string depth_frame_base = + declare_parameter("depth_frame_id"); + const std::string color_frame_base = + declare_parameter("color_frame_id"); depth_frame_id_ = drone + "/" + depth_frame_base; color_frame_id_ = drone + "/" + color_frame_base; @@ -53,32 +57,46 @@ void ValvePoseNode::declare_params() { tf_listener_ = std::make_shared(*tf_buffer_); // Periodically attempt to look up the static transform until it arrives. - extrinsic_timer_ = create_wall_timer( - std::chrono::milliseconds(500), - std::bind(&ValvePoseNode::lookup_extrinsic, this)); + extrinsic_timer_ = + create_wall_timer(std::chrono::milliseconds(500), + std::bind(&ValvePoseNode::lookup_extrinsic, this)); if (debug_visualize_) { - const auto pose_topic = declare_parameter("debug.valve_poses_pub_topic"); - const auto depth_cloud_topic = declare_parameter("debug.depth_cloud_pub_topic"); - const auto depth_color_topic = declare_parameter("debug.depth_colormap_pub_topic"); - const auto ann_topic = declare_parameter("debug.annulus_pub_topic"); - const auto pln_topic = declare_parameter("debug.plane_pub_topic"); - depth_colormap_vmin_ = static_cast(declare_parameter("debug.depth_colormap_value_min")); - depth_colormap_vmax_ = static_cast(declare_parameter("debug.depth_colormap_value_max")); - - const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - pose_pub_ = create_publisher(pose_topic, qos); - depth_cloud_pub_ = create_publisher(depth_cloud_topic, qos); - depth_colormap_pub_ = create_publisher(depth_color_topic, qos); - annulus_pub_ = create_publisher(ann_topic, qos); - plane_pub_ = create_publisher(pln_topic, qos); + const auto pose_topic = + declare_parameter("debug.valve_poses_pub_topic"); + const auto depth_cloud_topic = + declare_parameter("debug.depth_cloud_pub_topic"); + const auto depth_color_topic = + declare_parameter("debug.depth_colormap_pub_topic"); + const auto ann_topic = + declare_parameter("debug.annulus_pub_topic"); + const auto pln_topic = + declare_parameter("debug.plane_pub_topic"); + depth_colormap_vmin_ = static_cast( + declare_parameter("debug.depth_colormap_value_min")); + depth_colormap_vmax_ = static_cast( + declare_parameter("debug.depth_colormap_value_max")); + + const auto qos = + rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + pose_pub_ = + create_publisher(pose_topic, qos); + depth_cloud_pub_ = create_publisher( + depth_cloud_topic, qos); + depth_colormap_pub_ = + create_publisher(depth_color_topic, qos); + annulus_pub_ = + create_publisher(ann_topic, qos); + plane_pub_ = + create_publisher(pln_topic, qos); } const auto lm_topic = declare_parameter("landmarks_pub_topic"); const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - landmark_pub_ = create_publisher(lm_topic, qos); + landmark_pub_ = + create_publisher(lm_topic, qos); try_activate_detector(); } @@ -97,16 +115,17 @@ void ValvePoseNode::lookup_extrinsic() { static_cast(q.w), static_cast(q.x), static_cast(q.y), static_cast(q.z)); depth_color_extrinsic_.R = quat.toRotationMatrix(); - depth_color_extrinsic_.t = Eigen::Vector3f( - static_cast(t.x), static_cast(t.y), - static_cast(t.z)); + depth_color_extrinsic_.t = + Eigen::Vector3f(static_cast(t.x), static_cast(t.y), + static_cast(t.z)); extrinsic_ready_ = true; extrinsic_timer_->cancel(); RCLCPP_INFO(get_logger(), - "Depth-to-color extrinsic loaded from TF (%s -> %s, t=[%.4f, %.4f, %.4f])", - depth_frame_id_.c_str(), color_frame_id_.c_str(), - t.x, t.y, t.z); + "Depth-to-color extrinsic loaded from TF (%s -> %s, " + "t=[%.4f, %.4f, %.4f])", + depth_frame_id_.c_str(), color_frame_id_.c_str(), t.x, t.y, + t.z); if (detector_) { detector_->set_depth_color_extrinsic(depth_color_extrinsic_); @@ -114,19 +133,20 @@ void ValvePoseNode::lookup_extrinsic() { try_activate_detector(); } } catch (const tf2::TransformException& ex) { - RCLCPP_WARN_THROTTLE(get_logger(), *get_clock(), 5000, - "Waiting for TF: %s -> %s: %s", - depth_frame_id_.c_str(), color_frame_id_.c_str(), - ex.what()); + RCLCPP_WARN_THROTTLE( + get_logger(), *get_clock(), 5000, "Waiting for TF: %s -> %s: %s", + depth_frame_id_.c_str(), color_frame_id_.c_str(), ex.what()); } } void ValvePoseNode::try_activate_detector() { - if (!color_props_ready_ || !depth_props_ready_ || !extrinsic_ready_ || detector_) + if (!color_props_ready_ || !depth_props_ready_ || !extrinsic_ready_ || + detector_) return; - detector_ = std::make_unique( - yolo_w_, yolo_h_, annulus_ratio_, ransac_thresh_, ransac_iters_, handle_offset_); + detector_ = std::make_unique(yolo_w_, yolo_h_, + annulus_ratio_, ransac_thresh_, + ransac_iters_, handle_offset_); detector_->set_color_image_properties(color_props_); detector_->set_depth_image_properties(depth_props_); detector_->set_depth_color_extrinsic(depth_color_extrinsic_); @@ -135,10 +155,14 @@ void ValvePoseNode::try_activate_detector() { } void ValvePoseNode::init_subscriptions() { - const auto depth_topic = declare_parameter("depth_image_sub_topic"); - const auto det_topic = declare_parameter("detections_sub_topic"); - const auto depth_info_topic = declare_parameter("depth_image_info_topic"); - const auto color_info_topic = declare_parameter("color_image_info_topic"); + const auto depth_topic = + declare_parameter("depth_image_sub_topic"); + const auto det_topic = + declare_parameter("detections_sub_topic"); + const auto depth_info_topic = + declare_parameter("depth_image_info_topic"); + const auto color_info_topic = + declare_parameter("color_image_info_topic"); const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE); @@ -150,8 +174,9 @@ void ValvePoseNode::init_subscriptions() { depth_info_topic, info_qos, std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); - const auto data_qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + const auto data_qos = + rclcpp::QoS(rclcpp::KeepLast(10)) + .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); depth_sub_.subscribe(this, depth_topic, data_qos.get_rmw_qos_profile()); det_sub_.subscribe(this, det_topic, data_qos.get_rmw_qos_profile()); @@ -217,9 +242,10 @@ std::vector> ValvePoseNode::collect_scored_boxes( std::vector> boxes; boxes.reserve(det.detections.size()); for (const auto& d : det.detections) { - const float score = d.results.empty() - ? static_cast(d.bbox.size_x * d.bbox.size_y) - : static_cast(d.results[0].hypothesis.score); + const float score = + d.results.empty() + ? static_cast(d.bbox.size_x * d.bbox.size_y) + : static_cast(d.results[0].hypothesis.score); boxes.emplace_back(score, to_bbox(d.bbox)); } return boxes; @@ -266,8 +292,9 @@ void ValvePoseNode::publish_debug( for (size_t i = 0; i < boxes.size(); ++i) { const auto& box = boxes[i]; - // poses[i] is in the depth camera frame. project_color_pixel_to_depth - // needs Z in the color camera frame, so transform the 3-D point first. + // poses[i] is in the depth camera frame. + // project_color_pixel_to_depth needs Z in the color camera frame, + // so transform the 3-D point first. const Eigen::Vector3f P_depth(static_cast(poses[i].x), static_cast(poses[i].y), static_cast(poses[i].z)); @@ -277,16 +304,20 @@ void ValvePoseNode::publish_debug( if (Z_color <= 0.0f) continue; - const float angle_deg = box.theta * 180.0f / static_cast(M_PI); + const float angle_deg = + box.theta * 180.0f / static_cast(M_PI); cv::RotatedRect rrect(cv::Point2f(box.center_x, box.center_y), - cv::Size2f(box.size_x, box.size_y), angle_deg); + cv::Size2f(box.size_x, box.size_y), + angle_deg); cv::Point2f corners[4]; rrect.points(corners); for (auto& c : corners) - c = project_color_pixel_to_depth(c.x, c.y, Z_color, color_props_, - depth_props_, depth_color_extrinsic_); + c = project_color_pixel_to_depth(c.x, c.y, Z_color, + color_props_, depth_props_, + depth_color_extrinsic_); for (int j = 0; j < 4; ++j) - cv::line(colormap, corners[j], corners[(j + 1) % 4], cv::Scalar(0, 255, 0), 2); + cv::line(colormap, corners[j], corners[(j + 1) % 4], + cv::Scalar(0, 255, 0), 2); } depth_colormap_pub_->publish( *cv_bridge::CvImage(depth->header, "bgr8", colormap).toImageMsg()); @@ -321,13 +352,16 @@ void ValvePoseNode::sync_cb( if (det->detections.empty()) { publish_empty_results(pose_header); if (debug_visualize_ && depth_colormap_pub_) - depth_colormap_pub_->publish(*cv_bridge::CvImage( - depth->header, "bgr8", build_depth_colormap(depth)).toImageMsg()); + depth_colormap_pub_->publish( + *cv_bridge::CvImage(depth->header, "bgr8", + build_depth_colormap(depth)) + .toImageMsg()); return; } const cv::Mat depth_img = decode_depth_to_float(depth); - const DetectorMode mode = debug_visualize_ ? DetectorMode::debug : DetectorMode::standard; + const DetectorMode mode = + debug_visualize_ ? DetectorMode::debug : DetectorMode::standard; std::vector raw_boxes; std::vector poses; @@ -335,12 +369,16 @@ void ValvePoseNode::sync_cb( for (size_t idx : kept) { const BoundingBox& yolo_box = scored_boxes[idx].second; - // YOLO outputs in 640×640 letterbox space — convert to color image space first, - // then undistort. Both pcl extraction and ray casting use color intrinsics. - const BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); - const BoundingBox org_box = undistort_bbox(color_box, color_props_.intr); - - const auto result = detector_->compute_pose_from_depth(depth_img, org_box, mode); + // YOLO outputs in 640×640 letterbox space — convert to color image + // space first, then undistort. Both pcl extraction and ray casting use + // color intrinsics. + const BoundingBox color_box = + detector_->letterbox_to_image_coords(yolo_box); + const BoundingBox org_box = + undistort_bbox(color_box, color_props_.intr); + + const auto result = + detector_->compute_pose_from_depth(depth_img, org_box, mode); if (!result.valid) continue; @@ -348,8 +386,10 @@ void ValvePoseNode::sync_cb( raw_boxes.push_back(color_box); poses.push_back(result.pose); if (mode == DetectorMode::debug) { - if (result.annulus_cloud) ann_dbg += *result.annulus_cloud; - if (result.plane_cloud) pln_dbg += *result.plane_cloud; + if (result.annulus_cloud) + ann_dbg += *result.annulus_cloud; + if (result.plane_cloud) + pln_dbg += *result.plane_cloud; } } From 45105dd7cf75609ad9a49a0424f07748e5347b96 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Sat, 28 Mar 2026 22:19:25 +0100 Subject: [PATCH 4/7] feat: add vortex_utils_ros dependency and improve valve detection parameters and scripts --- .../valve_detection/CMakeLists.txt | 3 + .../config/valve_detection_params.yaml | 55 +++++------ .../valve_detection/detection_utils.hpp | 11 --- .../valve_detection/pose_estimator.hpp | 2 +- .../include/valve_detection/types.hpp | 7 -- .../valve_detection_ros/valve_pose_ros.hpp | 2 +- .../valve_detection/package.xml | 1 + .../valve_detection/scripts/theta_monitor.py | 95 +++++++++++++++++++ .../src/depth_image_processing.cpp | 4 +- .../valve_detection/src/detection_utils.cpp | 55 ++++------- .../valve_detection/src/pose_estimator.cpp | 25 ++++- .../valve_detection/src/valve_pose_ros.cpp | 53 ++++------- 12 files changed, 184 insertions(+), 129 deletions(-) create mode 100755 mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py diff --git a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt index 94069eb..34eff57 100644 --- a/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt +++ b/mission/tacc/visual_inspection/valve_detection/CMakeLists.txt @@ -29,6 +29,8 @@ find_package(tf2_ros REQUIRED) find_package(vortex_msgs REQUIRED) find_package(vortex_utils REQUIRED) +find_package(vortex_utils_ros REQUIRED) + include_directories(include) set(LIB_NAME "${PROJECT_NAME}_component") @@ -69,6 +71,7 @@ ament_target_dependencies(${LIB_NAME} PUBLIC tf2_ros vortex_msgs vortex_utils + vortex_utils_ros ) rclcpp_components_register_node( diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml index c68676d..981d96c 100644 --- a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -1,32 +1,37 @@ /**: ros__parameters: - # ── Inputs ──────────────────────────────────────────────────────────────── - # depth_image_sub_topic: "/realsense/D555_409122300281_Depth" + # Inputs depth_image_sub_topic: "/realsense_d555/depth/image" detections_sub_topic: "/obb_detections_output" - - # Depth camera-info topic (intrinsics loaded from this topic). depth_image_info_topic: "/realsense_d555/depth/camera_info" + color_image_info_topic: "/yolo_obb_encoder/internal/resize/camera_info" - # Color camera-info topic (intrinsics and distortion loaded from this topic). - color_image_info_topic: "/yolo_obb_encoder/internal/crop/camera_info" - - # ── Depth-to-color extrinsic ────────────────────────────────────────────── + # Depth-to-color extrinsic # TF frame names (without drone prefix) used to look up the depth-to-color # transform from /tf_static. The node will wait until this transform is # available before initialising the detector. + # NOTE: When launched via valve_intervention.launch.py these are overridden + # from cameras.yaml. Values here are standalone-launch defaults. depth_frame_id: "front_camera_depth_optical" color_frame_id: "front_camera_color_optical" - # ── Outputs ─────────────────────────────────────────────────────────────── - # Primary output consumed by downstream nodes (e.g. EKF, mission planner). + # Outputs landmarks_pub_topic: "/valve_landmarks" - # ── YOLO letterbox reference size ───────────────────────────────────────── + # TF frame used as the frame_id for all published poses and landmarks. + # Must match the depth optical frame in the robot's TF tree. + # NOTE: When launched via valve_intervention.launch.py this is set from + # cameras.yaml depth_frame_id. + output_frame_id: "front_camera_depth_optical" + + # Prepended to all TF frame names (e.g. "nautilus" -> "nautilus/front_camera_depth_optical"). + drone: "nautilus" + + # YOLO letterbox reference size yolo_img_width: 640 yolo_img_height: 640 - # ── Annulus and plane fit ───────────────────────────────────────────────── + # Annulus and plane fit # Fraction of the bounding-box half-size used as the annulus ring radius # (unitless, 0–1). At 0.8 the ring samples the outer 80 % of the box, # avoiding the central hub while staying inside the valve rim. @@ -38,38 +43,26 @@ # at ~0.5–1 m range without accepting gross outliers. plane_ransac_threshold: 0.01 - # Number of RANSAC iterations. 50 gives >99 % probability of finding a - # good plane when the inlier ratio is ≥ 50 %, while keeping CPU load low. - plane_ransac_max_iterations: 50 + # Number of RANSAC iterations. 150 gives >99 % probability of finding a + # good plane even when the inlier ratio drops to ~30 %. + plane_ransac_max_iterations: 150 - # ── Duplicate-detection suppression ────────────────────────────────────── + # Duplicate-detection suppression # Two bounding boxes are considered the same valve when their # axis-aligned IoU exceeds this threshold OR when the smaller box # overlaps the larger by more than 70 % (intersection-over-minimum). # Maximum 2 landmarks are published per frame. iou_duplicate_threshold: 0.5 - # ── Pose offset ─────────────────────────────────────────────────────────── + # Pose offset # Shift the estimated position along the plane normal by this amount (metres). # Value taken from the official valve CAD file: distance between the valve # face frame and the inside length of the handle is 0.065 m. valve_handle_offset: 0.065 - # ── Output frame ────────────────────────────────────────────────────────── - # TF frame used as the frame_id for all published poses and landmarks. - # Must match the depth optical frame in the robot's TF tree. - # Depth is from the realsense bag, pretending to be nautilus/depth_camera/image_depth. - # See: auv_setup/description/nautilus.urdf.xacro - output_frame_id: "front_camera_depth_optical" - - # Prepended to all TF frame names (e.g. "moby" → "moby/front_camera_depth_optical"). - # Override at launch time to switch between robots: drone:=orca - drone: "nautilus" - - # ── Behaviour toggles ───────────────────────────────────────────────────── - debug_visualize: true # enable all debug visualization topics + debug_visualize: true # enable all debug visualization topics - # ── Debug visualization (only active when debug_visualize: true) ────────── + # Debug visualization only active when debug_visualize: true debug: # All detected valve poses in one PoseArray – useful for visualising # multiple simultaneous detections in Foxglove. diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp index 27f0470..f849923 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/detection_utils.hpp @@ -7,17 +7,6 @@ namespace valve_detection { -/** - * @brief Returns an undistorted copy of bbox. - * - * Undistorts the 4 edge midpoints of the OBB and the center point using the - * given intrinsics, then refits an oriented bounding box to the result. Using - * edge midpoints (rather than corners) produces a better fit around circular - * objects such as valves. - */ -BoundingBox undistort_bbox(const BoundingBox& bbox, - const CameraIntrinsics& intr); - /** * @brief Greedy NMS: returns indices of kept detections (max 2). * diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp index cddc091..c8252a3 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/pose_estimator.hpp @@ -96,7 +96,7 @@ class PoseEstimator { double letterbox_pad_x_{0}; double letterbox_pad_y_{0}; - mutable Eigen::Vector3f filter_direction_{1, 0, 0}; + mutable Eigen::Vector3f filter_direction_{Eigen::Vector3f::Zero()}; }; } // namespace valve_detection diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp index 32a0cce..59d3b82 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp @@ -9,8 +9,6 @@ namespace valve_detection { struct CameraIntrinsics { double fx{0}, fy{0}, cx{0}, cy{0}; - // Plumb-bob distortion coefficients [k1, k2, p1, p2, k3]. - double dist_k1{0}, dist_k2{0}, dist_p1{0}, dist_p2{0}, dist_k3{0}; }; struct ImageDimensions { @@ -32,11 +30,6 @@ struct BoundingBox { using Pose = vortex::utils::types::Pose; -struct PoseResult { - Pose result; - bool result_valid{false}; -}; - // Rigid transform from depth camera frame to color camera frame. // Rotation R and translation t satisfy: P_color = R * P_depth + t struct DepthColorExtrinsic { diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp index c8b8ea0..471dc8f 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -47,7 +47,7 @@ class ValvePoseNode : public rclcpp::Node { void try_activate_detector(); void lookup_extrinsic(); - // Camera info callbacks (one-shot, override config fallback). + // Camera info callbacks (one-shot). void color_camera_info_cb( const sensor_msgs::msg::CameraInfo::SharedPtr msg); void depth_camera_info_cb( diff --git a/mission/tacc/visual_inspection/valve_detection/package.xml b/mission/tacc/visual_inspection/valve_detection/package.xml index 643f6ec..1cd75f4 100644 --- a/mission/tacc/visual_inspection/valve_detection/package.xml +++ b/mission/tacc/visual_inspection/valve_detection/package.xml @@ -23,6 +23,7 @@ tf2_ros vortex_msgs vortex_utils + vortex_utils_ros rosidl_default_runtime diff --git a/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py b/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py new file mode 100755 index 0000000..6767ce4 --- /dev/null +++ b/mission/tacc/visual_inspection/valve_detection/scripts/theta_monitor.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +import math + +import rclpy +from geometry_msgs.msg import PoseArray +from rclpy.node import Node +from scipy.spatial.transform import Rotation +from tf2_ros import Buffer, TransformListener +from vision_msgs.msg import Detection2DArray +from vortex_utils_ros.qos_profiles import sensor_data_profile + + +class ThetaMonitor(Node): + def __init__(self): + super().__init__("theta_monitor") + + # TF lookup for depth-to-color extrinsic + self.tf_buffer = Buffer() + self.tf_listener = TransformListener(self.tf_buffer, self) + self.extrinsic_printed = False + self.create_timer(1.0, self.print_extrinsic) + + self.create_subscription( + Detection2DArray, + "/obb_detections_output", + self.obb_cb, + sensor_data_profile(10), + ) + self.create_subscription( + PoseArray, "/valve_poses", self.pose_cb, sensor_data_profile(10) + ) + + def print_extrinsic(self): + if self.extrinsic_printed: + return + try: + tf = self.tf_buffer.lookup_transform( + "nautilus/front_camera_color_optical", + "nautilus/front_camera_depth_optical", + rclpy.time.Time(), + ) + q = tf.transform.rotation + t = tf.transform.translation + r = Rotation.from_quat([q.x, q.y, q.z, q.w]) + rot = r.as_matrix() + roll, pitch, yaw = r.as_euler("xyz", degrees=True) + + self.get_logger().info( + f"\n{'=' * 60}\n" + f"DEPTH-TO-COLOR EXTRINSIC (depth -> color)\n" + f" Translation: [{t.x:.6f}, {t.y:.6f}, {t.z:.6f}]\n" + f" RPY (xyz): roll={roll:.4f}° pitch={pitch:.4f}° yaw={yaw:.4f}°\n" + f" Rotation matrix:\n" + f" [{rot[0, 0]:9.6f} {rot[0, 1]:9.6f} {rot[0, 2]:9.6f}]\n" + f" [{rot[1, 0]:9.6f} {rot[1, 1]:9.6f} {rot[1, 2]:9.6f}]\n" + f" [{rot[2, 0]:9.6f} {rot[2, 1]:9.6f} {rot[2, 2]:9.6f}]\n" + f"\n" + f" R_depth_from_color (R^T, used in pose_estimator):\n" + f" [{rot[0, 0]:9.6f} {rot[1, 0]:9.6f} {rot[2, 0]:9.6f}]\n" + f" [{rot[0, 1]:9.6f} {rot[1, 1]:9.6f} {rot[2, 1]:9.6f}]\n" + f" [{rot[0, 2]:9.6f} {rot[1, 2]:9.6f} {rot[2, 2]:9.6f}]\n" + f"{'=' * 60}" + ) + self.extrinsic_printed = True + except Exception as e: + self.get_logger().warn(f"Waiting for TF: {e}") + + def obb_cb(self, msg: Detection2DArray): + for i, det in enumerate(msg.detections): + theta_rad = det.bbox.center.theta + theta_deg = math.degrees(theta_rad) + self.get_logger().info( + f"[OBB det {i}] theta: {theta_rad:.4f} rad ({theta_deg:.2f} deg)" + ) + + def pose_cb(self, msg: PoseArray): + for i, pose in enumerate(msg.poses): + q = pose.orientation + r = Rotation.from_quat([q.x, q.y, q.z, q.w]) + roll, pitch, yaw = r.as_euler("xyz", degrees=True) + self.get_logger().info( + f"[POSE {i}] " + f"roll: {roll:7.2f}° pitch: {pitch:7.2f}° yaw: {yaw:7.2f}° " + f"pos: ({pose.position.x:.3f}, {pose.position.y:.3f}, {pose.position.z:.3f})" + ) + + +def main(): + rclpy.init() + rclpy.spin(ThetaMonitor()) + rclpy.shutdown() + + +if __name__ == "__main__": + main() diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp index 3b9ec2a..77f3a4a 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -33,14 +33,14 @@ cv::Point2f project_color_pixel_to_depth(const float u_c, const ImageProperties& color_props, const ImageProperties& depth_props, const DepthColorExtrinsic& extr) { - // Back-project color pixel → 3-D point in color camera frame. + // Back-project color pixel -> 3-D point in color camera frame. const float Xc = (u_c - static_cast(color_props.intr.cx)) * Z / static_cast(color_props.intr.fx); const float Yc = (v_c - static_cast(color_props.intr.cy)) * Z / static_cast(color_props.intr.fy); const Eigen::Vector3f Pc(Xc, Yc, Z); - // Transform to depth camera frame: P_depth = R^T * (P_color - t) + // Transform to depth camera frame: P_depth = R^T * (P_color - t) const Eigen::Vector3f Pd = extr.R.transpose() * (Pc - extr.t); if (Pd.z() <= 0.0f) diff --git a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp index 9f60854..c2ed083 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/detection_utils.cpp @@ -2,47 +2,20 @@ #include #include -#include -#include namespace valve_detection { -BoundingBox undistort_bbox(const BoundingBox& bbox, - const CameraIntrinsics& intr) { - const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, - intr.fy, intr.cy, 0, 0, 1); - const cv::Mat D = (cv::Mat_(5, 1) << intr.dist_k1, intr.dist_k2, - intr.dist_p1, intr.dist_p2, intr.dist_k3); - - const float cos_t = std::cos(bbox.theta); - const float sin_t = std::sin(bbox.theta); - const float hx = bbox.size_x * 0.5f; - const float hy = bbox.size_y * 0.5f; - const cv::Point2f c(bbox.center_x, bbox.center_y); - - // 4 edge midpoints of the OBB + center to anchor the undistorted box. - std::vector pts = { - c, - c + cv::Point2f(hx * cos_t, hx * sin_t), - c + cv::Point2f(-hx * cos_t, -hx * sin_t), - c + cv::Point2f(-hy * sin_t, hy * cos_t), - c + cv::Point2f(hy * sin_t, -hy * cos_t), - }; - - std::vector undistorted; - cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); - - cv::RotatedRect fitted = cv::minAreaRect(undistorted); - - BoundingBox result = bbox; - result.center_x = fitted.center.x; - result.center_y = fitted.center.y; - result.size_x = fitted.size.width; - result.size_y = fitted.size.height; - result.theta = fitted.angle * static_cast(M_PI) / 180.0f; - return result; -} - +// Greedy NMS (non-maximum suppression) that keeps at most 2 detections. +// +// Boxes are processed in descending confidence order. For each kept box, +// all remaining boxes that overlap it above the IoU threshold OR whose +// intersection-over-minimum (IoM) exceeds 70 % are suppressed. IoM +// catches the case where a small spurious detection sits inside a +// larger, higher-confidence one. +// +// Uses axis-aligned bounding rectangles for the overlap test (ignoring +// OBB rotation) since the YOLO OBB angles are small and the cost of +// exact OBB intersection is not justified here. std::vector filter_duplicate_detections( const std::vector>& scored_boxes, float iou_duplicate_threshold) { @@ -50,6 +23,7 @@ std::vector filter_duplicate_detections( if (n == 0) return {}; + // Sort indices by descending confidence score. std::vector> order; order.reserve(n); for (size_t i = 0; i < n; ++i) @@ -67,6 +41,7 @@ std::vector filter_duplicate_detections( kept.push_back(i); + // Compute AABB of the kept box for overlap tests. const BoundingBox& bi = scored_boxes[i].second; const float ai = bi.size_x * bi.size_y; const float bx1i = bi.center_x - bi.size_x * 0.5f; @@ -74,6 +49,7 @@ std::vector filter_duplicate_detections( const float bx2i = bi.center_x + bi.size_x * 0.5f; const float by2i = bi.center_y + bi.size_y * 0.5f; + // Suppress lower-confidence boxes that overlap too much. for (size_t sj = si + 1; sj < order.size(); ++sj) { const size_t j = order[sj].second; if (suppressed[j]) @@ -86,13 +62,14 @@ std::vector filter_duplicate_detections( const float bx2j = bj.center_x + bj.size_x * 0.5f; const float by2j = bj.center_y + bj.size_y * 0.5f; + // AABB intersection rectangle. const float ix1 = std::max(bx1i, bx1j); const float iy1 = std::max(by1i, by1j); const float ix2 = std::min(bx2i, bx2j); const float iy2 = std::min(by2i, by2j); if (ix2 <= ix1 || iy2 <= iy1) - continue; + continue; // no overlap const float inter = (ix2 - ix1) * (iy2 - iy1); const float iou = inter / (ai + aj - inter); diff --git a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp index 7457674..d8c4596 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/pose_estimator.cpp @@ -107,7 +107,7 @@ Eigen::Vector3f PoseEstimator::compute_plane_normal( // Finds the 3D point where a ray intersects the fitted plane. // ray_origin defaults to the camera origin (zero), which is correct for the -// color-frame path. Pass the actual origin when the ray does not start at +// color-frame path. Pass the actual origin when the ray does not start at // the frame origin (e.g. the color camera origin expressed in depth frame). Eigen::Vector3f PoseEstimator::find_ray_plane_intersection( const pcl::ModelCoefficients::Ptr& coefficients, @@ -141,6 +141,11 @@ Eigen::Vector3f PoseEstimator::shift_point_along_normal( // bbox angle (X), working entirely in the depth camera frame. Color-image // rays are rotated into depth frame before intersecting the plane, and // ray_origin is the color camera origin expressed in depth frame. +// +// Axis convention (valve body frame): +// X = handle dir -> roll = valve face tilt +// Y = cross product -> pitch = valve face tilt +// Z = plane normal -> yaw = handle rotation around normal Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( const pcl::ModelCoefficients::Ptr& coefficients, const Eigen::Vector3f& plane_normal, @@ -186,8 +191,12 @@ Eigen::Matrix3f PoseEstimator::create_rotation_matrix_depth( x_axis = (x_axis - x_axis.dot(z_axis) * z_axis).normalized(); // Ensure consistent direction (avoid flipping between frames). - if (filter_direction_.dot(x_axis) < 0) - x_axis = -x_axis; + // On the first valid computation filter_direction_ is zero; just accept + // whatever direction the geometry gives us. + if (!filter_direction_.isZero()) { + if (filter_direction_.dot(x_axis) < 0) + x_axis = -x_axis; + } filter_direction_ = x_axis; const Eigen::Vector3f y_axis = z_axis.cross(x_axis).normalized(); @@ -283,8 +292,16 @@ DetectionResult PoseEstimator::compute_pose_from_depth( return {}; const Eigen::Vector3f pos_shifted = shift_point_along_normal(pos, normal); + + // The OBB theta gives the direction of size_x. The valve handle + // aligns with the longer side of the bounding box, so rotate by + // 90° when size_y > size_x. + float handle_angle = bbox_org.theta; + if (bbox_org.size_y > bbox_org.size_x) + handle_angle += static_cast(M_PI / 2.0); + const Eigen::Matrix3f rot = create_rotation_matrix_depth( - coeff, normal, bbox_org.theta, color_origin_in_depth_frame, + coeff, normal, handle_angle, color_origin_in_depth_frame, R_depth_from_color); result.pose = Pose::from_eigen(pos_shifted.cast(), diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp index 7b99539..7a2e8d6 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -11,6 +11,8 @@ #include +#include + namespace valve_detection { using std::placeholders::_1; @@ -77,26 +79,23 @@ void ValvePoseNode::declare_params() { depth_colormap_vmax_ = static_cast( declare_parameter("debug.depth_colormap_value_max")); - const auto qos = - rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - pose_pub_ = - create_publisher(pose_topic, qos); + const auto sensor_qos = + vortex::utils::qos_profiles::reliable_profile(10); + pose_pub_ = create_publisher(pose_topic, + sensor_qos); depth_cloud_pub_ = create_publisher( - depth_cloud_topic, qos); - depth_colormap_pub_ = - create_publisher(depth_color_topic, qos); - annulus_pub_ = - create_publisher(ann_topic, qos); - plane_pub_ = - create_publisher(pln_topic, qos); + depth_cloud_topic, sensor_qos); + depth_colormap_pub_ = create_publisher( + depth_color_topic, sensor_qos); + annulus_pub_ = create_publisher( + ann_topic, sensor_qos); + plane_pub_ = create_publisher( + pln_topic, sensor_qos); } const auto lm_topic = declare_parameter("landmarks_pub_topic"); - const auto qos = rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); - landmark_pub_ = - create_publisher(lm_topic, qos); + landmark_pub_ = create_publisher( + lm_topic, vortex::utils::qos_profiles::reliable_profile(10)); try_activate_detector(); } @@ -164,8 +163,7 @@ void ValvePoseNode::init_subscriptions() { const auto color_info_topic = declare_parameter("color_image_info_topic"); - const auto info_qos = rclcpp::QoS(rclcpp::KeepLast(1)) - .reliability(RMW_QOS_POLICY_RELIABILITY_RELIABLE); + const auto info_qos = vortex::utils::qos_profiles::sensor_data_profile(1); color_cam_info_sub_ = create_subscription( color_info_topic, info_qos, @@ -174,9 +172,7 @@ void ValvePoseNode::init_subscriptions() { depth_info_topic, info_qos, std::bind(&ValvePoseNode::depth_camera_info_cb, this, _1)); - const auto data_qos = - rclcpp::QoS(rclcpp::KeepLast(10)) - .reliability(RMW_QOS_POLICY_RELIABILITY_BEST_EFFORT); + const auto data_qos = vortex::utils::qos_profiles::sensor_data_profile(10); depth_sub_.subscribe(this, depth_topic, data_qos.get_rmw_qos_profile()); det_sub_.subscribe(this, det_topic, data_qos.get_rmw_qos_profile()); @@ -198,13 +194,6 @@ void ValvePoseNode::color_camera_info_cb( color_props_.intr.cy = msg->k[5]; color_props_.dim.width = static_cast(msg->width); color_props_.dim.height = static_cast(msg->height); - if (msg->d.size() >= 5) { - color_props_.intr.dist_k1 = msg->d[0]; - color_props_.intr.dist_k2 = msg->d[1]; - color_props_.intr.dist_p1 = msg->d[2]; - color_props_.intr.dist_p2 = msg->d[3]; - color_props_.intr.dist_k3 = msg->d[4]; - } color_props_ready_ = true; color_cam_info_sub_.reset(); RCLCPP_INFO(get_logger(), "Color camera_info received (fx=%.2f fy=%.2f)", @@ -370,15 +359,13 @@ void ValvePoseNode::sync_cb( for (size_t idx : kept) { const BoundingBox& yolo_box = scored_boxes[idx].second; // YOLO outputs in 640×640 letterbox space — convert to color image - // space first, then undistort. Both pcl extraction and ray casting use - // color intrinsics. + // space. The input image is already undistorted upstream, so no + // lens distortion correction is needed here. const BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); - const BoundingBox org_box = - undistort_bbox(color_box, color_props_.intr); const auto result = - detector_->compute_pose_from_depth(depth_img, org_box, mode); + detector_->compute_pose_from_depth(depth_img, color_box, mode); if (!result.valid) continue; From c19cd920c9a73e5f3c9aa08f658d9985ebda70fe Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Sat, 28 Mar 2026 22:24:54 +0100 Subject: [PATCH 5/7] fix: update valve_handle_offset to correct value based on CAD file --- .../valve_detection/config/valve_detection_params.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml index 981d96c..f773420 100644 --- a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -57,8 +57,8 @@ # Pose offset # Shift the estimated position along the plane normal by this amount (metres). # Value taken from the official valve CAD file: distance between the valve - # face frame and the inside length of the handle is 0.065 m. - valve_handle_offset: 0.065 + # face frame and the inside length of the handle is 0.02 m. + valve_handle_offset: 0.02 debug_visualize: true # enable all debug visualization topics From a5faa683334d3b709fa644d3e8d324947e42f460 Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Thu, 2 Apr 2026 02:25:07 +0200 Subject: [PATCH 6/7] feat: enhance valve detection with undistortion capabilities and updated launch parameters --- .../config/valve_detection_params.yaml | 10 +- .../depth_image_processing.hpp | 8 ++ .../include/valve_detection/types.hpp | 2 + .../valve_detection_ros/valve_pose_ros.hpp | 1 + .../launch/valve_detection.launch.py | 120 ++++++++++++++---- .../src/depth_image_processing.cpp | 58 +++++++++ .../valve_detection/src/valve_pose_ros.cpp | 10 +- 7 files changed, 178 insertions(+), 31 deletions(-) diff --git a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml index f773420..3833a88 100644 --- a/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml +++ b/mission/tacc/visual_inspection/valve_detection/config/valve_detection_params.yaml @@ -1,9 +1,9 @@ /**: ros__parameters: # Inputs - depth_image_sub_topic: "/realsense_d555/depth/image" + depth_image_sub_topic: "/camera/camera/depth/image_rect_raw" detections_sub_topic: "/obb_detections_output" - depth_image_info_topic: "/realsense_d555/depth/camera_info" + depth_image_info_topic: "/camera/camera/depth/camera_info" color_image_info_topic: "/yolo_obb_encoder/internal/resize/camera_info" # Depth-to-color extrinsic @@ -31,6 +31,12 @@ yolo_img_width: 640 yolo_img_height: 640 + # Undistort bounding-box detections using the color camera distortion + # coefficients from camera_info. Samples edge midpoints and center, + # undistorts them, and refits the OBB — gives a tighter fit than + # corner-based undistortion for round objects like valves. + undistort_detections: false + # Annulus and plane fit # Fraction of the bounding-box half-size used as the annulus ring radius # (unitless, 0–1). At 0.8 the ring samples the outer 80 % of the box, diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp index e892f4d..d1f8b54 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "valve_detection/types.hpp" @@ -23,6 +24,13 @@ void project_pixel_to_point(int u, double cy, pcl::PointXYZ& out); +/// @brief Undistorts a bounding box by undistorting the edge midpoints and +/// center, then refitting an oriented bounding box. +/// TODO: Had problems with this, when it undistorted the orientation would be off by a constant offset +/// Workaround is to just disable this and undistort the whole image instead (done upstream in perception_setup) +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr); + // Projects a color image pixel to depth image coordinates. // u_c, v_c: pixel coordinates in the color image. // Z: depth of the point in the color camera frame (metres). diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp index 59d3b82..4cc8e1f 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/types.hpp @@ -2,6 +2,7 @@ #define VALVE_DETECTION__TYPES_HPP_ #include +#include #include #include @@ -9,6 +10,7 @@ namespace valve_detection { struct CameraIntrinsics { double fx{0}, fy{0}, cx{0}, cy{0}; + std::array dist{0, 0, 0, 0, 0}; // k1, k2, p1, p2, k3 }; struct ImageDimensions { diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp index 471dc8f..4b69e9a 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection_ros/valve_pose_ros.hpp @@ -82,6 +82,7 @@ class ValvePoseNode : public rclcpp::Node { int yolo_w_, yolo_h_; float annulus_ratio_, ransac_thresh_, handle_offset_; int ransac_iters_; + bool undistort_detections_{false}; // camera data (owned by node, passed to estimator and depth functions) ImageProperties color_props_{}; diff --git a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py index bc8c08a..e4bee1d 100644 --- a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py +++ b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py @@ -15,31 +15,99 @@ def generate_launch_description(): 'valve_detection_params.yaml', ) - drone_arg = DeclareLaunchArgument( - 'drone', - default_value='nautilus', - description='Robot name, prepended to TF frame IDs (e.g. moby, orca)', - ) - - container = ComposableNodeContainer( - name='valve_detection_container', - namespace='', - package='rclcpp_components', - executable='component_container_mt', - composable_node_descriptions=[ - ComposableNode( - package='valve_detection', - plugin='valve_detection::ValvePoseNode', - name='valve_pose_node', - parameters=[ - cfg, - { - 'drone': LaunchConfiguration('drone'), - }, + return LaunchDescription( + [ + # Input topics + DeclareLaunchArgument( + 'depth_image_sub_topic', + default_value='/camera/camera/depth/image_rect_raw', + description='Depth image topic', + ), + DeclareLaunchArgument( + 'detections_sub_topic', + default_value='/obb_detections_output', + description='YOLO detections topic', + ), + DeclareLaunchArgument( + 'depth_image_info_topic', + default_value='/camera/camera/depth/camera_info', + description='Depth camera info topic', + ), + DeclareLaunchArgument( + 'color_image_info_topic', + default_value='/yolo_obb_encoder/internal/resize/camera_info', + description='Color camera info topic (from DNN encoder)', + ), + # Frame IDs + DeclareLaunchArgument( + 'depth_frame_id', + default_value='front_camera_depth_optical', + description='Depth camera optical frame ID (without drone prefix)', + ), + DeclareLaunchArgument( + 'color_frame_id', + default_value='front_camera_color_optical', + description='Color camera optical frame ID (without drone prefix)', + ), + # Output topic + DeclareLaunchArgument( + 'landmarks_pub_topic', + default_value='/valve_landmarks', + description='Output valve landmarks topic', + ), + DeclareLaunchArgument( + 'output_frame_id', + default_value='front_camera_depth_optical', + description='Output frame ID for published poses (without drone prefix)', + ), + # Drone identifier + DeclareLaunchArgument( + 'drone', + default_value='nautilus', + description='Robot name, prepended to TF frame IDs (e.g. moby, orca)', + ), + # Detection processing + DeclareLaunchArgument( + 'undistort_detections', + default_value='false', + description='Undistort bounding-box detections using color camera distortion', + ), + # Debug visualization + DeclareLaunchArgument( + 'debug_visualize', + default_value='true', + description='Enable all debug visualization topics', + ), + # Node container with parameters from launch arguments + ComposableNodeContainer( + name='valve_detection_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + ComposableNode( + package='valve_detection', + plugin='valve_detection::ValvePoseNode', + name='valve_pose_node', + parameters=[ + cfg, + { + 'depth_image_sub_topic': LaunchConfiguration('depth_image_sub_topic'), + 'detections_sub_topic': LaunchConfiguration('detections_sub_topic'), + 'depth_image_info_topic': LaunchConfiguration('depth_image_info_topic'), + 'color_image_info_topic': LaunchConfiguration('color_image_info_topic'), + 'depth_frame_id': LaunchConfiguration('depth_frame_id'), + 'color_frame_id': LaunchConfiguration('color_frame_id'), + 'landmarks_pub_topic': LaunchConfiguration('landmarks_pub_topic'), + 'output_frame_id': LaunchConfiguration('output_frame_id'), + 'drone': LaunchConfiguration('drone'), + 'undistort_detections': LaunchConfiguration('undistort_detections'), + 'debug_visualize': LaunchConfiguration('debug_visualize'), + }, + ], + ) ], - ) - ], - output='screen', + output='screen', + ), + ] ) - - return LaunchDescription([drone_arg, container]) diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp index 77f3a4a..d9e147a 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -2,10 +2,68 @@ // Depth-to-3D back-projection and color-to-depth pixel projection. #include "valve_detection/depth_image_processing.hpp" #include +#include +#include +#include #include +#include namespace valve_detection { +/// TODO: Had problems with this, when it undistorted the orientation would be off by a constant offset +/// Workaround is to just disable this and undistort the whole image instead (done upstream in perception_setup) +BoundingBox undistort_bbox(const BoundingBox& bbox, + const CameraIntrinsics& intr) { + const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, + intr.fy, intr.cy, 0, 0, 1); + const cv::Mat D = (cv::Mat_(5, 1) << intr.dist[0], intr.dist[1], + intr.dist[2], intr.dist[3], intr.dist[4]); + + const float angle_deg = bbox.theta * 180.0f / static_cast(M_PI); + cv::RotatedRect rrect(cv::Point2f(bbox.center_x, bbox.center_y), + cv::Size2f(bbox.size_x, bbox.size_y), angle_deg); + + // Extract 4 corners to compute edge midpoints. + cv::Point2f corners[4]; + rrect.points(corners); + + // 5 sample points: 4 edge midpoints + bbox center. + std::vector pts; + pts.reserve(5); + for (int i = 0; i < 4; ++i) + pts.emplace_back((corners[i] + corners[(i + 1) % 4]) * 0.5f); + pts.emplace_back(bbox.center_x, bbox.center_y); + + std::vector undistorted; + cv::undistortPoints(pts, undistorted, K, D, cv::noArray(), K); + + // Anchor result to the undistorted center (last point). + const cv::Point2f undist_center = undistorted.back(); + + // Refit an OBB to the 4 undistorted edge midpoints. + std::vector midpoints(undistorted.begin(), + undistorted.begin() + 4); + cv::RotatedRect fitted = cv::minAreaRect(midpoints); + + // minAreaRect returns angles in [-90, 0) and can flip which side it treats + // as the major axis, producing an angle ~90° off from the original for + // nearly-square boxes (e.g. circular valves). Normalise to stay within 90° + // of the input angle and swap width/height accordingly. + float fitted_angle_deg = fitted.angle; + float diff = fitted_angle_deg - angle_deg; + while (diff > 90.0f) { diff -= 180.0f; fitted_angle_deg -= 180.0f; } + while (diff < -90.0f) { diff += 180.0f; fitted_angle_deg += 180.0f; } + const bool swapped = std::abs(fitted_angle_deg - fitted.angle) > 45.0f; + + BoundingBox result = bbox; + result.center_x = undist_center.x; + result.center_y = undist_center.y; + result.size_x = swapped ? fitted.size.height : fitted.size.width; + result.size_y = swapped ? fitted.size.width : fitted.size.height; + result.theta = fitted_angle_deg * static_cast(M_PI) / 180.0f; + return result; +} + // Back-projects a depth pixel (u, v, depth) to a 3D point using camera // intrinsics. void project_pixel_to_point(const int u, diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp index 7a2e8d6..ca17ed0 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -45,6 +45,7 @@ void ValvePoseNode::declare_params() { ransac_thresh_ = declare_parameter("plane_ransac_threshold"); ransac_iters_ = declare_parameter("plane_ransac_max_iterations"); handle_offset_ = declare_parameter("valve_handle_offset"); + undistort_detections_ = declare_parameter("undistort_detections"); // TF frame IDs for the depth-to-color extrinsic lookup. const std::string depth_frame_base = @@ -192,6 +193,8 @@ void ValvePoseNode::color_camera_info_cb( color_props_.intr.fy = msg->k[4]; color_props_.intr.cx = msg->k[2]; color_props_.intr.cy = msg->k[5]; + for (size_t i = 0; i < 5 && i < msg->d.size(); ++i) + color_props_.intr.dist[i] = msg->d[i]; color_props_.dim.width = static_cast(msg->width); color_props_.dim.height = static_cast(msg->height); color_props_ready_ = true; @@ -359,10 +362,11 @@ void ValvePoseNode::sync_cb( for (size_t idx : kept) { const BoundingBox& yolo_box = scored_boxes[idx].second; // YOLO outputs in 640×640 letterbox space — convert to color image - // space. The input image is already undistorted upstream, so no - // lens distortion correction is needed here. - const BoundingBox color_box = + // space, then optionally correct for lens distortion. + BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); + if (undistort_detections_) + color_box = undistort_bbox(color_box, color_props_.intr); const auto result = detector_->compute_pose_from_depth(depth_img, color_box, mode); From b8833a32eed175105bc0638e0ca3c2d2a75ec7df Mon Sep 17 00:00:00 2001 From: Andreas Kluge Svendsrud <89779148+kluge7@users.noreply.github.com> Date: Thu, 2 Apr 2026 02:39:19 +0200 Subject: [PATCH 7/7] fix: improve code formatting and comments in depth image processing and valve pose ROS files --- .../depth_image_processing.hpp | 5 +-- .../launch/valve_detection.launch.py | 32 ++++++++++++++----- .../src/depth_image_processing.cpp | 21 ++++++++---- .../valve_detection/src/valve_pose_ros.cpp | 3 +- 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp index d1f8b54..ad757e8 100644 --- a/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp +++ b/mission/tacc/visual_inspection/valve_detection/include/valve_detection/depth_image_processing.hpp @@ -26,8 +26,9 @@ void project_pixel_to_point(int u, /// @brief Undistorts a bounding box by undistorting the edge midpoints and /// center, then refitting an oriented bounding box. -/// TODO: Had problems with this, when it undistorted the orientation would be off by a constant offset -/// Workaround is to just disable this and undistort the whole image instead (done upstream in perception_setup) +/// TODO: Had problems with this, when it undistorted the orientation would be +/// off by a constant offset Workaround is to just disable this and undistort +/// the whole image instead (done upstream in perception_setup) BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr); diff --git a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py index e4bee1d..3cbba5b 100644 --- a/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py +++ b/mission/tacc/visual_inspection/valve_detection/launch/valve_detection.launch.py @@ -92,17 +92,33 @@ def generate_launch_description(): parameters=[ cfg, { - 'depth_image_sub_topic': LaunchConfiguration('depth_image_sub_topic'), - 'detections_sub_topic': LaunchConfiguration('detections_sub_topic'), - 'depth_image_info_topic': LaunchConfiguration('depth_image_info_topic'), - 'color_image_info_topic': LaunchConfiguration('color_image_info_topic'), + 'depth_image_sub_topic': LaunchConfiguration( + 'depth_image_sub_topic' + ), + 'detections_sub_topic': LaunchConfiguration( + 'detections_sub_topic' + ), + 'depth_image_info_topic': LaunchConfiguration( + 'depth_image_info_topic' + ), + 'color_image_info_topic': LaunchConfiguration( + 'color_image_info_topic' + ), 'depth_frame_id': LaunchConfiguration('depth_frame_id'), 'color_frame_id': LaunchConfiguration('color_frame_id'), - 'landmarks_pub_topic': LaunchConfiguration('landmarks_pub_topic'), - 'output_frame_id': LaunchConfiguration('output_frame_id'), + 'landmarks_pub_topic': LaunchConfiguration( + 'landmarks_pub_topic' + ), + 'output_frame_id': LaunchConfiguration( + 'output_frame_id' + ), 'drone': LaunchConfiguration('drone'), - 'undistort_detections': LaunchConfiguration('undistort_detections'), - 'debug_visualize': LaunchConfiguration('debug_visualize'), + 'undistort_detections': LaunchConfiguration( + 'undistort_detections' + ), + 'debug_visualize': LaunchConfiguration( + 'debug_visualize' + ), }, ], ) diff --git a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp index d9e147a..70802aa 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/depth_image_processing.cpp @@ -2,16 +2,17 @@ // Depth-to-3D back-projection and color-to-depth pixel projection. #include "valve_detection/depth_image_processing.hpp" #include -#include -#include #include #include +#include +#include #include namespace valve_detection { -/// TODO: Had problems with this, when it undistorted the orientation would be off by a constant offset -/// Workaround is to just disable this and undistort the whole image instead (done upstream in perception_setup) +/// TODO: Had problems with this, when it undistorted the orientation would be +/// off by a constant offset Workaround is to just disable this and undistort +/// the whole image instead (done upstream in perception_setup) BoundingBox undistort_bbox(const BoundingBox& bbox, const CameraIntrinsics& intr) { const cv::Mat K = (cv::Mat_(3, 3) << intr.fx, 0, intr.cx, 0, @@ -51,15 +52,21 @@ BoundingBox undistort_bbox(const BoundingBox& bbox, // of the input angle and swap width/height accordingly. float fitted_angle_deg = fitted.angle; float diff = fitted_angle_deg - angle_deg; - while (diff > 90.0f) { diff -= 180.0f; fitted_angle_deg -= 180.0f; } - while (diff < -90.0f) { diff += 180.0f; fitted_angle_deg += 180.0f; } + while (diff > 90.0f) { + diff -= 180.0f; + fitted_angle_deg -= 180.0f; + } + while (diff < -90.0f) { + diff += 180.0f; + fitted_angle_deg += 180.0f; + } const bool swapped = std::abs(fitted_angle_deg - fitted.angle) > 45.0f; BoundingBox result = bbox; result.center_x = undist_center.x; result.center_y = undist_center.y; result.size_x = swapped ? fitted.size.height : fitted.size.width; - result.size_y = swapped ? fitted.size.width : fitted.size.height; + result.size_y = swapped ? fitted.size.width : fitted.size.height; result.theta = fitted_angle_deg * static_cast(M_PI) / 180.0f; return result; } diff --git a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp index ca17ed0..1ce850f 100644 --- a/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp +++ b/mission/tacc/visual_inspection/valve_detection/src/valve_pose_ros.cpp @@ -363,8 +363,7 @@ void ValvePoseNode::sync_cb( const BoundingBox& yolo_box = scored_boxes[idx].second; // YOLO outputs in 640×640 letterbox space — convert to color image // space, then optionally correct for lens distortion. - BoundingBox color_box = - detector_->letterbox_to_image_coords(yolo_box); + BoundingBox color_box = detector_->letterbox_to_image_coords(yolo_box); if (undistort_detections_) color_box = undistort_bbox(color_box, color_props_.intr);