From 833ade73fd4ee957d3b81dc4e7ee3e3d7ef82afd Mon Sep 17 00:00:00 2001 From: "hufei.hofee" Date: Fri, 20 Mar 2026 23:05:18 +0800 Subject: [PATCH] Enhance benchmark configuration and gripper handling: Added 'ee_link_name' and 'action_frequency' to benchmark.yaml, introduced gripper width mapping in policy, and updated inference server to reflect gripper width in actions. --- benchmark.yaml | 6 +- gripper_width_robotiq_2f85_fixed.json | 407 ++++++++++++++++++++++++++ starvla_inference_server.py | 2 +- starvla_policy.py | 77 +++-- 4 files changed, 467 insertions(+), 25 deletions(-) create mode 100644 gripper_width_robotiq_2f85_fixed.json diff --git a/benchmark.yaml b/benchmark.yaml index d66088f..e245ad2 100644 --- a/benchmark.yaml +++ b/benchmark.yaml @@ -73,6 +73,7 @@ scene: - 0.04983056883903615 stereotype: single_gripper_arm_robot source: local + ee_link_name: panda_link8 init_joint_position: panda_joint2: -0.1633 panda_joint4: -1.07 @@ -178,7 +179,6 @@ extension: - stereotype: robot_observer name: Franka observe_ee_pose: true - observe_gripper_state: true observe_gripper_drive_state: true - stereotype: sensor_observer name: Hand_Camera @@ -194,6 +194,7 @@ extension: enable: true stereotype: benchmark data_collector_name: benchmark_data_collect + action_frequency: 15.0 goals: - name: cola on top of book description: check if the cola bottle is on the book @@ -204,8 +205,9 @@ extension: stereotype: starvla robot_name: Franka sensor_names: [Hand_Camera, Left_Camera, Right_Camera] - prompt: pick up the white plug + prompt: pick up the can run_trunk_size: 16 + gripper_width_mapper_file: ./gripper_width_robotiq_2f85_fixed.json recorder: enable: false # set to true to record the data diff --git a/gripper_width_robotiq_2f85_fixed.json b/gripper_width_robotiq_2f85_fixed.json new file mode 100644 index 0000000..08e700e --- /dev/null +++ b/gripper_width_robotiq_2f85_fixed.json @@ -0,0 +1,407 @@ +[ + { + "angel": 0.0, + "width": 0.084621108, + "depth": 0.0 + }, + { + "angel": 0.01, + "width": 0.083915558, + "depth": 0.0002994539999999 + }, + { + "angel": 0.02, + "width": 0.083037448, + "depth": 0.0006653029999999 + }, + { + "angel": 0.03, + "width": 0.082152198, + "depth": 0.0010267499999999 + }, + { + "angel": 0.04, + "width": 0.081259338, + "depth": 0.0013839 + }, + { + "angel": 0.05, + "width": 0.080359768, + "depth": 0.0017362829999999 + }, + { + "angel": 0.06, + "width": 0.079453358, + "depth": 0.002084374 + }, + { + "angel": 0.07, + "width": 0.078539798, + "depth": 0.0024278159999999 + }, + { + "angel": 0.08, + "width": 0.077619328, + "depth": 0.0027666099999999 + }, + { + "angel": 0.09, + "width": 0.076692448, + "depth": 0.0031008719999999 + }, + { + "angel": 0.1, + "width": 0.075758648, + "depth": 0.0034304859999999 + }, + { + "angel": 0.11, + "width": 0.074818208, + "depth": 0.00375545 + }, + { + "angel": 0.12, + "width": 0.0738714219999999, + "depth": 0.0040756459999999 + }, + { + "angel": 0.13, + "width": 0.072918278, + "depth": 0.0043910739999999 + }, + { + "angel": 0.14, + "width": 0.071958938, + "depth": 0.0047017339999999 + }, + { + "angel": 0.15, + "width": 0.0709934479999999, + "depth": 0.0050076249999999 + }, + { + "angel": 0.16, + "width": 0.070021638, + "depth": 0.0053085099999999 + }, + { + "angel": 0.17, + "width": 0.0690442179999999, + "depth": 0.0056048629999999 + }, + { + "angel": 0.18, + "width": 0.068060692, + "depth": 0.0058959759999999 + }, + { + "angel": 0.19, + "width": 0.067071678, + "depth": 0.006182198 + }, + { + "angel": 0.2, + "width": 0.066076608, + "depth": 0.0064635279999999 + }, + { + "angel": 0.21, + "width": 0.065075923, + "depth": 0.006739978 + }, + { + "angel": 0.22, + "width": 0.064070358, + "depth": 0.0070112979999999 + }, + { + "angel": 0.23, + "width": 0.063058788, + "depth": 0.0072776079999999 + }, + { + "angel": 0.24, + "width": 0.062042173, + "depth": 0.0075387999999999 + }, + { + "angel": 0.25, + "width": 0.061020542, + "depth": 0.00779498 + }, + { + "angel": 0.26, + "width": 0.059993598, + "depth": 0.008045916 + }, + { + "angel": 0.27, + "width": 0.058961984, + "depth": 0.0082919639999999 + }, + { + "angel": 0.28, + "width": 0.057924888, + "depth": 0.0085325279999999 + }, + { + "angel": 0.29, + "width": 0.0568837379999999, + "depth": 0.0087680849999999 + }, + { + "angel": 0.3, + "width": 0.055837578, + "depth": 0.0089983979999999 + }, + { + "angel": 0.31, + "width": 0.054786574, + "depth": 0.009223346 + }, + { + "angel": 0.32, + "width": 0.053731928, + "depth": 0.0094431679999999 + }, + { + "angel": 0.33, + "width": 0.052672238, + "depth": 0.0096576249999999 + }, + { + "angel": 0.34, + "width": 0.051608544, + "depth": 0.0098668369999999 + }, + { + "angel": 0.35, + "width": 0.050540848, + "depth": 0.0100706849999999 + }, + { + "angel": 0.36, + "width": 0.0494688929999999, + "depth": 0.010269173 + }, + { + "angel": 0.37, + "width": 0.048393398, + "depth": 0.010462288 + }, + { + "angel": 0.38, + "width": 0.0473134329999999, + "depth": 0.0106499229999999 + }, + { + "angel": 0.39, + "width": 0.046230048, + "depth": 0.010832317 + }, + { + "angel": 0.4, + "width": 0.045143388, + "depth": 0.01100922 + }, + { + "angel": 0.41, + "width": 0.044053133, + "depth": 0.0111807619999999 + }, + { + "angel": 0.42, + "width": 0.0429593079999999, + "depth": 0.0113467 + }, + { + "angel": 0.43, + "width": 0.0418623579999999, + "depth": 0.0115071569999999 + }, + { + "angel": 0.44, + "width": 0.040761988, + "depth": 0.0116622499999999 + }, + { + "angel": 0.45, + "width": 0.0396591779999999, + "depth": 0.0118118559999999 + }, + { + "angel": 0.46, + "width": 0.038553038, + "depth": 0.01195574 + }, + { + "angel": 0.47, + "width": 0.037443828, + "depth": 0.012094144 + }, + { + "angel": 0.48, + "width": 0.036332168, + "depth": 0.0122271849999999 + }, + { + "angel": 0.49, + "width": 0.035218018, + "depth": 0.0123544969999999 + }, + { + "angel": 0.5, + "width": 0.034100888, + "depth": 0.01247633 + }, + { + "angel": 0.51, + "width": 0.032982008, + "depth": 0.0125925599999999 + }, + { + "angel": 0.52, + "width": 0.0318607879999999, + "depth": 0.012702946 + }, + { + "angel": 0.53, + "width": 0.0307372019999999, + "depth": 0.01280809 + }, + { + "angel": 0.54, + "width": 0.0296111279999999, + "depth": 0.0129073929999999 + }, + { + "angel": 0.55, + "width": 0.028483868, + "depth": 0.01300109 + }, + { + "angel": 0.56, + "width": 0.027354508, + "depth": 0.0130891839999999 + }, + { + "angel": 0.57, + "width": 0.0262235439999999, + "depth": 0.01317144 + }, + { + "angel": 0.58, + "width": 0.025090638, + "depth": 0.0132483299999999 + }, + { + "angel": 0.59, + "width": 0.023956728, + "depth": 0.0133193799999999 + }, + { + "angel": 0.6, + "width": 0.022820918, + "depth": 0.0133848269999999 + }, + { + "angel": 0.61, + "width": 0.0216845179999999, + "depth": 0.01344455 + }, + { + "angel": 0.62, + "width": 0.0205465179999999, + "depth": 0.0134985519999999 + }, + { + "angel": 0.63, + "width": 0.0194076179999999, + "depth": 0.0135469499999999 + }, + { + "angel": 0.64, + "width": 0.0182676429999999, + "depth": 0.0135895099999999 + }, + { + "angel": 0.65, + "width": 0.0171270279999999, + "depth": 0.0136265829999999 + }, + { + "angel": 0.66, + "width": 0.015985858, + "depth": 0.0136578159999999 + }, + { + "angel": 0.67, + "width": 0.014844108, + "depth": 0.0136833269999999 + }, + { + "angel": 0.68, + "width": 0.0137018479999999, + "depth": 0.0137031149999999 + }, + { + "angel": 0.69, + "width": 0.012558958, + "depth": 0.013717182 + }, + { + "angel": 0.7, + "width": 0.011416068, + "depth": 0.013725765 + }, + { + "angel": 0.71, + "width": 0.010273298, + "depth": 0.013728388 + }, + { + "angel": 0.72, + "width": 0.009130154, + "depth": 0.013725527 + }, + { + "angel": 0.73, + "width": 0.007987261, + "depth": 0.0137165859999999 + }, + { + "angel": 0.74, + "width": 0.0068444979999999, + "depth": 0.0137022799999999 + }, + { + "angel": 0.75, + "width": 0.0057022379999999, + "depth": 0.0136820149999999 + }, + { + "angel": 0.76, + "width": 0.004560568, + "depth": 0.013656266 + }, + { + "angel": 0.77, + "width": 0.0034195709999999, + "depth": 0.0136245559999999 + }, + { + "angel": 0.78, + "width": 0.0022787859999999, + "depth": 0.0135873629999999 + }, + { + "angel": 0.79, + "width": 0.001139223, + "depth": 0.013544448 + }, + { + "angel": 0.8, + "width": 0.0, + "depth": 0.0134955719999999 + } +] \ No newline at end of file diff --git a/starvla_inference_server.py b/starvla_inference_server.py index 0f432de..a91acea 100644 --- a/starvla_inference_server.py +++ b/starvla_inference_server.py @@ -114,7 +114,7 @@ class StarvlaInferenceServer: actions = actions[0] # (16, 10) return {"ee_delta_position_chunks": actions[:, :3].tolist(), "ee_delta_rot6d_chunks": actions[:, 3:9].tolist(), - "gripper_chunks": actions[:, 9:10].tolist()} + "gripper_width_chunks": actions[:, 9:10].tolist()} def register_routes(self): diff --git a/starvla_policy.py b/starvla_policy.py index 3f03a98..e7844f5 100644 --- a/starvla_policy.py +++ b/starvla_policy.py @@ -1,8 +1,13 @@ import pickle +import time +import json +import numpy as np +import requests from joysim.annotations.config_class import configclass, field from joysim.annotations.stereotype import stereotype -from joysim.controllers.motion_plan_controller import MotionPlanController +from joysim.controllers.spawnable_controller import SpawnableController +from joysim.core.robots.configs.actuator_configs.grippers import GripperDriveJointConfig from joysim.extensions.benchmark.action import RobotAction from joysim.extensions.benchmark.benchmark import ( BenchmarkAction, @@ -12,15 +17,13 @@ from joysim.extensions.benchmark.benchmark import ( from joysim.extensions.benchmark.policy import Policy, PolicyConfig from joysim.utils.log import Log from joysim.utils.pose import Pose -import numpy as np -import requests -import time @configclass @stereotype.register_config("starvla") class StarvlaPolicyConfig(PolicyConfig): robot_name: str = field(default="None", required=True, comment="The name of the robot") + gripper_width_mapper_file: str = field(default="", required=True, comment="The file path to the gripper width mapper") sensor_names: list[str] = field( default=["Hand_Camera", "Left_Camera", "Right_Camera"], required=True, @@ -55,14 +58,24 @@ class StarvlaPolicy(Policy): self.sensor_names = config.sensor_names self.server_url = config.server_url self.prompt = config.prompt - + self.gripper_width_mapper = json.load(open(config.gripper_width_mapper_file, "r")) def reset(self) -> None: self.current_ee_position_state = None self.current_ee_rot6d_state = None - self.current_gripper_state = None + self.current_gripper_width = None self.current_chunk_id = 0 self.current_chunk_result = None self.run_trunk_size = self.config.run_trunk_size + self.drive_joints: dict[str, GripperDriveJointConfig] = SpawnableController.control_robot(self.robot_name, "get_gripper_drive_joints").unwrap() + for joint_name, joint_config in self.drive_joints.items(): + SpawnableController.control_robot(self.robot_name, "set_joint_stiffness", parameters={"joint_names": [joint_name], "stiffness": joint_config.position_control_stiffness}).unwrap() + SpawnableController.control_robot(self.robot_name, "set_joint_damping", parameters={"joint_names": [joint_name], "damping": joint_config.position_control_damping}).unwrap() + SpawnableController.control_robot(self.robot_name, "set_joint_effort_limit", parameters={"joint_names": [joint_name], "effort_limit": joint_config.position_control_effort_limit}).unwrap() + self.max_width = float("-inf") + self.min_width = float("inf") + for entry in self.gripper_width_mapper: + self.max_width = max(self.max_width, entry["width"]) + self.min_width = min(self.min_width, entry["width"]) def warmup(self, benchmark_observation: BenchmarkObservation) -> None: Log.info(f"Waiting for StarVLA inference server to be ready...") @@ -87,10 +100,10 @@ class StarvlaPolicy(Policy): def preprocess_observation(self, benchmark_observation: BenchmarkObservation) -> dict: robot_obs = benchmark_observation.get_robot_observations(self.robot_name)["robot_data"] - ee_pose_base = robot_obs["ee_pose_base"] + ee_pose_base = robot_obs["ee_pose"]["base_frame"] ee_position, ee_rot6d = ee_pose_base["position"],ee_pose_base["rot6d"] - gripper = 0.0 if robot_obs["gripper_state"]["opened"] else 1.0 - state = np.concatenate([ee_position,ee_rot6d,np.array([gripper])]) + normalized_gripper_width = self.__map_joint_position_to_normalized_width(robot_obs["gripper_drive_state"]["position"][0]) + state = np.concatenate([ee_position,ee_rot6d,np.array([normalized_gripper_width])]) rgb_data = {} for sensor_name in self.sensor_names: sensor_obs = benchmark_observation.get_sensor_observations(sensor_name) @@ -103,7 +116,7 @@ class StarvlaPolicy(Policy): if self.current_chunk_result is None: self.current_ee_position_state = np.array(observation["state"][:3]).astype(np.float64) self.current_ee_rot6d_state = np.array(observation["state"][3:9]).astype(np.float64) - self.current_gripper_state = np.array([observation["state"][9]]) + self.current_gripper_width = np.array([observation["state"][9]]) payload = pickle.dumps(observation) response = requests.post( f"{self.server_url}/inference", @@ -122,6 +135,26 @@ class StarvlaPolicy(Policy): result = self.current_chunk_result return result + def __map_joint_position_to_normalized_width(self, joint_position: float) -> float: + for entry in self.gripper_width_mapper: + if round(entry["angel"], 2) == round(joint_position, 2): + return 1-(entry["width"] - self.min_width) / (self.max_width - self.min_width) + + raise ValueError(f"Joint position {joint_position} not found in gripper width mapper") + + def __map_gripper_joint_position(self, normalized_gripper_width: float) -> float: + + joint_positions = [] + joint_names = [] + if normalized_gripper_width > 0.5: + for joint_name, joint_config in self.drive_joints.items(): + joint_positions.append(joint_config.close_position) + joint_names.append(joint_name) + else: + for joint_name, joint_config in self.drive_joints.items(): + joint_positions.append(joint_config.open_position) + joint_names.append(joint_name) + return joint_positions, joint_names def postprocess_action(self, action: dict) -> BenchmarkAction: benchmark_action = BenchmarkAction() @@ -129,24 +162,24 @@ class StarvlaPolicy(Policy): # get base frame end-effector pose delta_ee_pose = Pose(position=action["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action["ee_delta_rot6d_chunks"][self.current_chunk_id]) curr_state_ee_pose = Pose(position=self.current_ee_position_state, rot6d=self.current_ee_rot6d_state) - Log.debug(f"trunck_id: {self.current_chunk_id}, curr_state_ee_pose: {curr_state_ee_pose}") curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state - ik_result = MotionPlanController.solve_ik( - robot_name=self.robot_name, - base_frame_ee_pose=curr_action_ee_pose, - ).unwrap() - if not ik_result["success"]: - Log.error(f"IK failed. Ignore this action.") - return benchmark_action + curr_action_gripper_width = action["gripper_width_chunks"][self.current_chunk_id] - joint_names = ik_result["result"]["plannable_joint_names"] - joint_positions = ik_result["result"]["plannable_joint_positions"][0] + gripper_joint_positions, gripper_joint_names = self.__map_gripper_joint_position(curr_action_gripper_width[0]) + Log.debug(f"action_gripper_joint_positions: {gripper_joint_positions}, action_normalized_gripper_width: {round(curr_action_gripper_width[0], 2)}") benchmark_action.add_robot_action( RobotAction( control_mode=ControlMode.POSITION, robot_name=self.robot_name, - joint_names=joint_names, - joint_positions=joint_positions + joint_names=gripper_joint_names, + joint_positions=gripper_joint_positions + ) + ) + benchmark_action.add_robot_action( + RobotAction( + control_mode=ControlMode.EE_POSE, + robot_name=self.robot_name, + ee_pose=curr_action_ee_pose ) ) self.current_chunk_id += 1