Enhance benchmark configuration and gripper handling: Added 'ee_link_name' and 'action_frequency' to benchmark.yaml, introduced gripper width mapping in policy, and updated inference server to reflect gripper width in actions.

2026-03-20 23:05:18 +08:00
parent 457c26b868
commit 833ade73fd
4 changed files with 467 additions and 25 deletions
--- a/benchmark.yaml
+++ b/benchmark.yaml
@@ -73,6 +73,7 @@ scene:
      - 0.04983056883903615
      stereotype: single_gripper_arm_robot
      source: local
      ee_link_name: panda_link8
      init_joint_position:
        panda_joint2: -0.1633
        panda_joint4: -1.07
@@ -178,7 +179,6 @@ extension:
        - stereotype: robot_observer
          name: Franka
          observe_ee_pose: true
          observe_gripper_state: true
          observe_gripper_drive_state: true
        - stereotype: sensor_observer
          name: Hand_Camera
@@ -194,6 +194,7 @@ extension:
      enable: true
      stereotype: benchmark
      data_collector_name: benchmark_data_collect
      action_frequency: 15.0
      goals:
        - name: cola on top of book
          description: check if the cola bottle is on the book
@@ -204,8 +205,9 @@ extension:
        stereotype: starvla
        robot_name: Franka
        sensor_names: [Hand_Camera, Left_Camera, Right_Camera]
-        prompt: pick up the white plug
+        prompt: pick up the can
        run_trunk_size: 16
        gripper_width_mapper_file: ./gripper_width_robotiq_2f85_fixed.json
    recorder:
      enable: false # set to true to record the data
--- a/gripper_width_robotiq_2f85_fixed.json
+++ b/gripper_width_robotiq_2f85_fixed.json
@@ -0,0 +1,407 @@
 [
    {
        "angel": 0.0,
        "width": 0.084621108,
        "depth": 0.0
    },
    {
        "angel": 0.01,
        "width": 0.083915558,
        "depth": 0.0002994539999999
    },
    {
        "angel": 0.02,
        "width": 0.083037448,
        "depth": 0.0006653029999999
    },
    {
        "angel": 0.03,
        "width": 0.082152198,
        "depth": 0.0010267499999999
    },
    {
        "angel": 0.04,
        "width": 0.081259338,
        "depth": 0.0013839
    },
    {
        "angel": 0.05,
        "width": 0.080359768,
        "depth": 0.0017362829999999
    },
    {
        "angel": 0.06,
        "width": 0.079453358,
        "depth": 0.002084374
    },
    {
        "angel": 0.07,
        "width": 0.078539798,
        "depth": 0.0024278159999999
    },
    {
        "angel": 0.08,
        "width": 0.077619328,
        "depth": 0.0027666099999999
    },
    {
        "angel": 0.09,
        "width": 0.076692448,
        "depth": 0.0031008719999999
    },
    {
        "angel": 0.1,
        "width": 0.075758648,
        "depth": 0.0034304859999999
    },
    {
        "angel": 0.11,
        "width": 0.074818208,
        "depth": 0.00375545
    },
    {
        "angel": 0.12,
        "width": 0.0738714219999999,
        "depth": 0.0040756459999999
    },
    {
        "angel": 0.13,
        "width": 0.072918278,
        "depth": 0.0043910739999999
    },
    {
        "angel": 0.14,
        "width": 0.071958938,
        "depth": 0.0047017339999999
    },
    {
        "angel": 0.15,
        "width": 0.0709934479999999,
        "depth": 0.0050076249999999
    },
    {
        "angel": 0.16,
        "width": 0.070021638,
        "depth": 0.0053085099999999
    },
    {
        "angel": 0.17,
        "width": 0.0690442179999999,
        "depth": 0.0056048629999999
    },
    {
        "angel": 0.18,
        "width": 0.068060692,
        "depth": 0.0058959759999999
    },
    {
        "angel": 0.19,
        "width": 0.067071678,
        "depth": 0.006182198
    },
    {
        "angel": 0.2,
        "width": 0.066076608,
        "depth": 0.0064635279999999
    },
    {
        "angel": 0.21,
        "width": 0.065075923,
        "depth": 0.006739978
    },
    {
        "angel": 0.22,
        "width": 0.064070358,
        "depth": 0.0070112979999999
    },
    {
        "angel": 0.23,
        "width": 0.063058788,
        "depth": 0.0072776079999999
    },
    {
        "angel": 0.24,
        "width": 0.062042173,
        "depth": 0.0075387999999999
    },
    {
        "angel": 0.25,
        "width": 0.061020542,
        "depth": 0.00779498
    },
    {
        "angel": 0.26,
        "width": 0.059993598,
        "depth": 0.008045916
    },
    {
        "angel": 0.27,
        "width": 0.058961984,
        "depth": 0.0082919639999999
    },
    {
        "angel": 0.28,
        "width": 0.057924888,
        "depth": 0.0085325279999999
    },
    {
        "angel": 0.29,
        "width": 0.0568837379999999,
        "depth": 0.0087680849999999
    },
    {
        "angel": 0.3,
        "width": 0.055837578,
        "depth": 0.0089983979999999
    },
    {
        "angel": 0.31,
        "width": 0.054786574,
        "depth": 0.009223346
    },
    {
        "angel": 0.32,
        "width": 0.053731928,
        "depth": 0.0094431679999999
    },
    {
        "angel": 0.33,
        "width": 0.052672238,
        "depth": 0.0096576249999999
    },
    {
        "angel": 0.34,
        "width": 0.051608544,
        "depth": 0.0098668369999999
    },
    {
        "angel": 0.35,
        "width": 0.050540848,
        "depth": 0.0100706849999999
    },
    {
        "angel": 0.36,
        "width": 0.0494688929999999,
        "depth": 0.010269173
    },
    {
        "angel": 0.37,
        "width": 0.048393398,
        "depth": 0.010462288
    },
    {
        "angel": 0.38,
        "width": 0.0473134329999999,
        "depth": 0.0106499229999999
    },
    {
        "angel": 0.39,
        "width": 0.046230048,
        "depth": 0.010832317
    },
    {
        "angel": 0.4,
        "width": 0.045143388,
        "depth": 0.01100922
    },
    {
        "angel": 0.41,
        "width": 0.044053133,
        "depth": 0.0111807619999999
    },
    {
        "angel": 0.42,
        "width": 0.0429593079999999,
        "depth": 0.0113467
    },
    {
        "angel": 0.43,
        "width": 0.0418623579999999,
        "depth": 0.0115071569999999
    },
    {
        "angel": 0.44,
        "width": 0.040761988,
        "depth": 0.0116622499999999
    },
    {
        "angel": 0.45,
        "width": 0.0396591779999999,
        "depth": 0.0118118559999999
    },
    {
        "angel": 0.46,
        "width": 0.038553038,
        "depth": 0.01195574
    },
    {
        "angel": 0.47,
        "width": 0.037443828,
        "depth": 0.012094144
    },
    {
        "angel": 0.48,
        "width": 0.036332168,
        "depth": 0.0122271849999999
    },
    {
        "angel": 0.49,
        "width": 0.035218018,
        "depth": 0.0123544969999999
    },
    {
        "angel": 0.5,
        "width": 0.034100888,
        "depth": 0.01247633
    },
    {
        "angel": 0.51,
        "width": 0.032982008,
        "depth": 0.0125925599999999
    },
    {
        "angel": 0.52,
        "width": 0.0318607879999999,
        "depth": 0.012702946
    },
    {
        "angel": 0.53,
        "width": 0.0307372019999999,
        "depth": 0.01280809
    },
    {
        "angel": 0.54,
        "width": 0.0296111279999999,
        "depth": 0.0129073929999999
    },
    {
        "angel": 0.55,
        "width": 0.028483868,
        "depth": 0.01300109
    },
    {
        "angel": 0.56,
        "width": 0.027354508,
        "depth": 0.0130891839999999
    },
    {
        "angel": 0.57,
        "width": 0.0262235439999999,
        "depth": 0.01317144
    },
    {
        "angel": 0.58,
        "width": 0.025090638,
        "depth": 0.0132483299999999
    },
    {
        "angel": 0.59,
        "width": 0.023956728,
        "depth": 0.0133193799999999
    },
    {
        "angel": 0.6,
        "width": 0.022820918,
        "depth": 0.0133848269999999
    },
    {
        "angel": 0.61,
        "width": 0.0216845179999999,
        "depth": 0.01344455
    },
    {
        "angel": 0.62,
        "width": 0.0205465179999999,
        "depth": 0.0134985519999999
    },
    {
        "angel": 0.63,
        "width": 0.0194076179999999,
        "depth": 0.0135469499999999
    },
    {
        "angel": 0.64,
        "width": 0.0182676429999999,
        "depth": 0.0135895099999999
    },
    {
        "angel": 0.65,
        "width": 0.0171270279999999,
        "depth": 0.0136265829999999
    },
    {
        "angel": 0.66,
        "width": 0.015985858,
        "depth": 0.0136578159999999
    },
    {
        "angel": 0.67,
        "width": 0.014844108,
        "depth": 0.0136833269999999
    },
    {
        "angel": 0.68,
        "width": 0.0137018479999999,
        "depth": 0.0137031149999999
    },
    {
        "angel": 0.69,
        "width": 0.012558958,
        "depth": 0.013717182
    },
    {
        "angel": 0.7,
        "width": 0.011416068,
        "depth": 0.013725765
    },
    {
        "angel": 0.71,
        "width": 0.010273298,
        "depth": 0.013728388
    },
    {
        "angel": 0.72,
        "width": 0.009130154,
        "depth": 0.013725527
    },
    {
        "angel": 0.73,
        "width": 0.007987261,
        "depth": 0.0137165859999999
    },
    {
        "angel": 0.74,
        "width": 0.0068444979999999,
        "depth": 0.0137022799999999
    },
    {
        "angel": 0.75,
        "width": 0.0057022379999999,
        "depth": 0.0136820149999999
    },
    {
        "angel": 0.76,
        "width": 0.004560568,
        "depth": 0.013656266
    },
    {
        "angel": 0.77,
        "width": 0.0034195709999999,
        "depth": 0.0136245559999999
    },
    {
        "angel": 0.78,
        "width": 0.0022787859999999,
        "depth": 0.0135873629999999
    },
    {
        "angel": 0.79,
        "width": 0.001139223,
        "depth": 0.013544448
    },
    {
        "angel": 0.8,
        "width": 0.0,
        "depth": 0.0134955719999999
    }
 ]
--- a/starvla_inference_server.py
+++ b/starvla_inference_server.py
@@ -114,7 +114,7 @@ class StarvlaInferenceServer:
            actions = actions[0] # (16, 10)
        return {"ee_delta_position_chunks": actions[:, :3].tolist(), 
                "ee_delta_rot6d_chunks": actions[:, 3:9].tolist(),
-                "gripper_chunks": actions[:, 9:10].tolist()}
+                "gripper_width_chunks": actions[:, 9:10].tolist()}
    def register_routes(self):
--- a/starvla_policy.py
+++ b/starvla_policy.py
@@ -1,8 +1,13 @@
 import pickle
 import time
 import json
 import numpy as np
 import requests
 from joysim.annotations.config_class import configclass, field
 from joysim.annotations.stereotype import stereotype
-from joysim.controllers.motion_plan_controller import MotionPlanController
+from joysim.controllers.spawnable_controller import SpawnableController
 from joysim.core.robots.configs.actuator_configs.grippers import GripperDriveJointConfig
 from joysim.extensions.benchmark.action import RobotAction
 from joysim.extensions.benchmark.benchmark import (
    BenchmarkAction,
@@ -12,15 +17,13 @@ from joysim.extensions.benchmark.benchmark import (
 from joysim.extensions.benchmark.policy import Policy, PolicyConfig
 from joysim.utils.log import Log
 from joysim.utils.pose import Pose
 import numpy as np
 import requests
 import time
@configclass
@stereotype.register_config("starvla")
 class StarvlaPolicyConfig(PolicyConfig):
    robot_name: str = field(default="None", required=True, comment="The name of the robot")
    gripper_width_mapper_file: str = field(default="", required=True, comment="The file path to the gripper width mapper")
    sensor_names: list[str] = field(
        default=["Hand_Camera", "Left_Camera", "Right_Camera"],
        required=True,
@@ -55,14 +58,24 @@ class StarvlaPolicy(Policy):
        self.sensor_names = config.sensor_names
        self.server_url = config.server_url
        self.prompt = config.prompt
-
+        self.gripper_width_mapper = json.load(open(config.gripper_width_mapper_file, "r"))
    def reset(self) -> None:
        self.current_ee_position_state = None
        self.current_ee_rot6d_state = None
-        self.current_gripper_state = None
+        self.current_gripper_width = None
        self.current_chunk_id = 0
        self.current_chunk_result = None
        self.run_trunk_size = self.config.run_trunk_size
        self.drive_joints: dict[str, GripperDriveJointConfig] = SpawnableController.control_robot(self.robot_name, "get_gripper_drive_joints").unwrap()
        for joint_name, joint_config in self.drive_joints.items():
            SpawnableController.control_robot(self.robot_name, "set_joint_stiffness", parameters={"joint_names": [joint_name], "stiffness": joint_config.position_control_stiffness}).unwrap()
            SpawnableController.control_robot(self.robot_name, "set_joint_damping", parameters={"joint_names": [joint_name], "damping": joint_config.position_control_damping}).unwrap()
            SpawnableController.control_robot(self.robot_name, "set_joint_effort_limit", parameters={"joint_names": [joint_name], "effort_limit": joint_config.position_control_effort_limit}).unwrap()
        self.max_width = float("-inf")
        self.min_width = float("inf")
        for entry in self.gripper_width_mapper:
            self.max_width = max(self.max_width, entry["width"])
            self.min_width = min(self.min_width, entry["width"])
    def warmup(self, benchmark_observation: BenchmarkObservation) -> None:
        Log.info(f"Waiting for StarVLA inference server to be ready...")
@@ -87,10 +100,10 @@ class StarvlaPolicy(Policy):
    def preprocess_observation(self, benchmark_observation: BenchmarkObservation) -> dict:
        robot_obs = benchmark_observation.get_robot_observations(self.robot_name)["robot_data"]
-        ee_pose_base = robot_obs["ee_pose_base"]
+        ee_pose_base = robot_obs["ee_pose"]["base_frame"]
        ee_position, ee_rot6d = ee_pose_base["position"],ee_pose_base["rot6d"]
-        gripper = 0.0 if robot_obs["gripper_state"]["opened"] else 1.0
+        normalized_gripper_width = self.__map_joint_position_to_normalized_width(robot_obs["gripper_drive_state"]["position"][0])
-        state = np.concatenate([ee_position,ee_rot6d,np.array([gripper])])
+        state = np.concatenate([ee_position,ee_rot6d,np.array([normalized_gripper_width])])
        rgb_data = {}
        for sensor_name in self.sensor_names:
            sensor_obs = benchmark_observation.get_sensor_observations(sensor_name)
@@ -103,7 +116,7 @@ class StarvlaPolicy(Policy):
        if self.current_chunk_result is None:
            self.current_ee_position_state = np.array(observation["state"][:3]).astype(np.float64)
            self.current_ee_rot6d_state = np.array(observation["state"][3:9]).astype(np.float64)
-            self.current_gripper_state = np.array([observation["state"][9]])
+            self.current_gripper_width = np.array([observation["state"][9]])
            payload = pickle.dumps(observation)
            response = requests.post(
                f"{self.server_url}/inference",
@@ -122,6 +135,26 @@ class StarvlaPolicy(Policy):
            result = self.current_chunk_result
        return result
    def __map_joint_position_to_normalized_width(self, joint_position: float) -> float:
        for entry in self.gripper_width_mapper:
            if round(entry["angel"], 2) == round(joint_position, 2):
                return 1-(entry["width"] - self.min_width) / (self.max_width - self.min_width)
        raise ValueError(f"Joint position {joint_position} not found in gripper width mapper")
    def __map_gripper_joint_position(self, normalized_gripper_width: float) -> float:
        joint_positions = []
        joint_names = []
        if normalized_gripper_width > 0.5:
            for joint_name, joint_config in self.drive_joints.items():
                joint_positions.append(joint_config.close_position)
                joint_names.append(joint_name)
        else:
            for joint_name, joint_config in self.drive_joints.items():
                joint_positions.append(joint_config.open_position)
                joint_names.append(joint_name)
        return joint_positions, joint_names
    def postprocess_action(self, action: dict) -> BenchmarkAction:
        benchmark_action = BenchmarkAction()
@@ -129,24 +162,24 @@ class StarvlaPolicy(Policy):
        # get base frame end-effector pose
        delta_ee_pose = Pose(position=action["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action["ee_delta_rot6d_chunks"][self.current_chunk_id])
        curr_state_ee_pose = Pose(position=self.current_ee_position_state, rot6d=self.current_ee_rot6d_state)
        Log.debug(f"trunck_id: {self.current_chunk_id}, curr_state_ee_pose: {curr_state_ee_pose}")
        curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state
-        ik_result = MotionPlanController.solve_ik(
+        curr_action_gripper_width = action["gripper_width_chunks"][self.current_chunk_id]
            robot_name=self.robot_name,
            base_frame_ee_pose=curr_action_ee_pose,
        ).unwrap()
        if not ik_result["success"]:
            Log.error(f"IK failed. Ignore this action.")
            return benchmark_action
-        joint_names = ik_result["result"]["plannable_joint_names"]
+        gripper_joint_positions, gripper_joint_names = self.__map_gripper_joint_position(curr_action_gripper_width[0])
-        joint_positions = ik_result["result"]["plannable_joint_positions"][0]
+        Log.debug(f"action_gripper_joint_positions: {gripper_joint_positions}, action_normalized_gripper_width: {round(curr_action_gripper_width[0], 2)}")
        benchmark_action.add_robot_action(
            RobotAction(
                control_mode=ControlMode.POSITION,
                robot_name=self.robot_name,
-                joint_names=joint_names,
+                joint_names=gripper_joint_names,
-                joint_positions=joint_positions
+                joint_positions=gripper_joint_positions
            )
        )
        benchmark_action.add_robot_action(
            RobotAction(
                control_mode=ControlMode.EE_POSE,
                robot_name=self.robot_name,
                ee_pose=curr_action_ee_pose
            )
        )
        self.current_chunk_id += 1