From 833ade73fd4ee957d3b81dc4e7ee3e3d7ef82afd Mon Sep 17 00:00:00 2001
From: "hufei.hofee" <hufei.hofee@jd.com>
Date: Fri, 20 Mar 2026 23:05:18 +0800
Subject: [PATCH] Enhance benchmark configuration and gripper handling: Added
 'ee_link_name' and 'action_frequency' to benchmark.yaml, introduced gripper
 width mapping in policy, and updated inference server to reflect gripper
 width in actions.

---
 benchmark.yaml                        |   6 +-
 gripper_width_robotiq_2f85_fixed.json | 407 ++++++++++++++++++++++++++
 starvla_inference_server.py           |   2 +-
 starvla_policy.py                     |  77 +++--
 4 files changed, 467 insertions(+), 25 deletions(-)
 create mode 100644 gripper_width_robotiq_2f85_fixed.json

diff --git a/benchmark.yaml b/benchmark.yaml
index d66088f..e245ad2 100644
--- a/benchmark.yaml
+++ b/benchmark.yaml
@@ -73,6 +73,7 @@ scene:
       - 0.04983056883903615
       stereotype: single_gripper_arm_robot
       source: local
+      ee_link_name: panda_link8
       init_joint_position:
         panda_joint2: -0.1633
         panda_joint4: -1.07
@@ -178,7 +179,6 @@ extension:
         - stereotype: robot_observer
           name: Franka
           observe_ee_pose: true
-          observe_gripper_state: true
           observe_gripper_drive_state: true
         - stereotype: sensor_observer
           name: Hand_Camera
@@ -194,6 +194,7 @@ extension:
       enable: true
       stereotype: benchmark
       data_collector_name: benchmark_data_collect
+      action_frequency: 15.0
       goals:
         - name: cola on top of book
           description: check if the cola bottle is on the book
@@ -204,8 +205,9 @@ extension:
         stereotype: starvla
         robot_name: Franka
         sensor_names: [Hand_Camera, Left_Camera, Right_Camera]
-        prompt: pick up the white plug
+        prompt: pick up the can
         run_trunk_size: 16
+        gripper_width_mapper_file: ./gripper_width_robotiq_2f85_fixed.json
 
     recorder:
       enable: false # set to true to record the data
diff --git a/gripper_width_robotiq_2f85_fixed.json b/gripper_width_robotiq_2f85_fixed.json
new file mode 100644
index 0000000..08e700e
--- /dev/null
+++ b/gripper_width_robotiq_2f85_fixed.json
@@ -0,0 +1,407 @@
+[
+    {
+        "angel": 0.0,
+        "width": 0.084621108,
+        "depth": 0.0
+    },
+    {
+        "angel": 0.01,
+        "width": 0.083915558,
+        "depth": 0.0002994539999999
+    },
+    {
+        "angel": 0.02,
+        "width": 0.083037448,
+        "depth": 0.0006653029999999
+    },
+    {
+        "angel": 0.03,
+        "width": 0.082152198,
+        "depth": 0.0010267499999999
+    },
+    {
+        "angel": 0.04,
+        "width": 0.081259338,
+        "depth": 0.0013839
+    },
+    {
+        "angel": 0.05,
+        "width": 0.080359768,
+        "depth": 0.0017362829999999
+    },
+    {
+        "angel": 0.06,
+        "width": 0.079453358,
+        "depth": 0.002084374
+    },
+    {
+        "angel": 0.07,
+        "width": 0.078539798,
+        "depth": 0.0024278159999999
+    },
+    {
+        "angel": 0.08,
+        "width": 0.077619328,
+        "depth": 0.0027666099999999
+    },
+    {
+        "angel": 0.09,
+        "width": 0.076692448,
+        "depth": 0.0031008719999999
+    },
+    {
+        "angel": 0.1,
+        "width": 0.075758648,
+        "depth": 0.0034304859999999
+    },
+    {
+        "angel": 0.11,
+        "width": 0.074818208,
+        "depth": 0.00375545
+    },
+    {
+        "angel": 0.12,
+        "width": 0.0738714219999999,
+        "depth": 0.0040756459999999
+    },
+    {
+        "angel": 0.13,
+        "width": 0.072918278,
+        "depth": 0.0043910739999999
+    },
+    {
+        "angel": 0.14,
+        "width": 0.071958938,
+        "depth": 0.0047017339999999
+    },
+    {
+        "angel": 0.15,
+        "width": 0.0709934479999999,
+        "depth": 0.0050076249999999
+    },
+    {
+        "angel": 0.16,
+        "width": 0.070021638,
+        "depth": 0.0053085099999999
+    },
+    {
+        "angel": 0.17,
+        "width": 0.0690442179999999,
+        "depth": 0.0056048629999999
+    },
+    {
+        "angel": 0.18,
+        "width": 0.068060692,
+        "depth": 0.0058959759999999
+    },
+    {
+        "angel": 0.19,
+        "width": 0.067071678,
+        "depth": 0.006182198
+    },
+    {
+        "angel": 0.2,
+        "width": 0.066076608,
+        "depth": 0.0064635279999999
+    },
+    {
+        "angel": 0.21,
+        "width": 0.065075923,
+        "depth": 0.006739978
+    },
+    {
+        "angel": 0.22,
+        "width": 0.064070358,
+        "depth": 0.0070112979999999
+    },
+    {
+        "angel": 0.23,
+        "width": 0.063058788,
+        "depth": 0.0072776079999999
+    },
+    {
+        "angel": 0.24,
+        "width": 0.062042173,
+        "depth": 0.0075387999999999
+    },
+    {
+        "angel": 0.25,
+        "width": 0.061020542,
+        "depth": 0.00779498
+    },
+    {
+        "angel": 0.26,
+        "width": 0.059993598,
+        "depth": 0.008045916
+    },
+    {
+        "angel": 0.27,
+        "width": 0.058961984,
+        "depth": 0.0082919639999999
+    },
+    {
+        "angel": 0.28,
+        "width": 0.057924888,
+        "depth": 0.0085325279999999
+    },
+    {
+        "angel": 0.29,
+        "width": 0.0568837379999999,
+        "depth": 0.0087680849999999
+    },
+    {
+        "angel": 0.3,
+        "width": 0.055837578,
+        "depth": 0.0089983979999999
+    },
+    {
+        "angel": 0.31,
+        "width": 0.054786574,
+        "depth": 0.009223346
+    },
+    {
+        "angel": 0.32,
+        "width": 0.053731928,
+        "depth": 0.0094431679999999
+    },
+    {
+        "angel": 0.33,
+        "width": 0.052672238,
+        "depth": 0.0096576249999999
+    },
+    {
+        "angel": 0.34,
+        "width": 0.051608544,
+        "depth": 0.0098668369999999
+    },
+    {
+        "angel": 0.35,
+        "width": 0.050540848,
+        "depth": 0.0100706849999999
+    },
+    {
+        "angel": 0.36,
+        "width": 0.0494688929999999,
+        "depth": 0.010269173
+    },
+    {
+        "angel": 0.37,
+        "width": 0.048393398,
+        "depth": 0.010462288
+    },
+    {
+        "angel": 0.38,
+        "width": 0.0473134329999999,
+        "depth": 0.0106499229999999
+    },
+    {
+        "angel": 0.39,
+        "width": 0.046230048,
+        "depth": 0.010832317
+    },
+    {
+        "angel": 0.4,
+        "width": 0.045143388,
+        "depth": 0.01100922
+    },
+    {
+        "angel": 0.41,
+        "width": 0.044053133,
+        "depth": 0.0111807619999999
+    },
+    {
+        "angel": 0.42,
+        "width": 0.0429593079999999,
+        "depth": 0.0113467
+    },
+    {
+        "angel": 0.43,
+        "width": 0.0418623579999999,
+        "depth": 0.0115071569999999
+    },
+    {
+        "angel": 0.44,
+        "width": 0.040761988,
+        "depth": 0.0116622499999999
+    },
+    {
+        "angel": 0.45,
+        "width": 0.0396591779999999,
+        "depth": 0.0118118559999999
+    },
+    {
+        "angel": 0.46,
+        "width": 0.038553038,
+        "depth": 0.01195574
+    },
+    {
+        "angel": 0.47,
+        "width": 0.037443828,
+        "depth": 0.012094144
+    },
+    {
+        "angel": 0.48,
+        "width": 0.036332168,
+        "depth": 0.0122271849999999
+    },
+    {
+        "angel": 0.49,
+        "width": 0.035218018,
+        "depth": 0.0123544969999999
+    },
+    {
+        "angel": 0.5,
+        "width": 0.034100888,
+        "depth": 0.01247633
+    },
+    {
+        "angel": 0.51,
+        "width": 0.032982008,
+        "depth": 0.0125925599999999
+    },
+    {
+        "angel": 0.52,
+        "width": 0.0318607879999999,
+        "depth": 0.012702946
+    },
+    {
+        "angel": 0.53,
+        "width": 0.0307372019999999,
+        "depth": 0.01280809
+    },
+    {
+        "angel": 0.54,
+        "width": 0.0296111279999999,
+        "depth": 0.0129073929999999
+    },
+    {
+        "angel": 0.55,
+        "width": 0.028483868,
+        "depth": 0.01300109
+    },
+    {
+        "angel": 0.56,
+        "width": 0.027354508,
+        "depth": 0.0130891839999999
+    },
+    {
+        "angel": 0.57,
+        "width": 0.0262235439999999,
+        "depth": 0.01317144
+    },
+    {
+        "angel": 0.58,
+        "width": 0.025090638,
+        "depth": 0.0132483299999999
+    },
+    {
+        "angel": 0.59,
+        "width": 0.023956728,
+        "depth": 0.0133193799999999
+    },
+    {
+        "angel": 0.6,
+        "width": 0.022820918,
+        "depth": 0.0133848269999999
+    },
+    {
+        "angel": 0.61,
+        "width": 0.0216845179999999,
+        "depth": 0.01344455
+    },
+    {
+        "angel": 0.62,
+        "width": 0.0205465179999999,
+        "depth": 0.0134985519999999
+    },
+    {
+        "angel": 0.63,
+        "width": 0.0194076179999999,
+        "depth": 0.0135469499999999
+    },
+    {
+        "angel": 0.64,
+        "width": 0.0182676429999999,
+        "depth": 0.0135895099999999
+    },
+    {
+        "angel": 0.65,
+        "width": 0.0171270279999999,
+        "depth": 0.0136265829999999
+    },
+    {
+        "angel": 0.66,
+        "width": 0.015985858,
+        "depth": 0.0136578159999999
+    },
+    {
+        "angel": 0.67,
+        "width": 0.014844108,
+        "depth": 0.0136833269999999
+    },
+    {
+        "angel": 0.68,
+        "width": 0.0137018479999999,
+        "depth": 0.0137031149999999
+    },
+    {
+        "angel": 0.69,
+        "width": 0.012558958,
+        "depth": 0.013717182
+    },
+    {
+        "angel": 0.7,
+        "width": 0.011416068,
+        "depth": 0.013725765
+    },
+    {
+        "angel": 0.71,
+        "width": 0.010273298,
+        "depth": 0.013728388
+    },
+    {
+        "angel": 0.72,
+        "width": 0.009130154,
+        "depth": 0.013725527
+    },
+    {
+        "angel": 0.73,
+        "width": 0.007987261,
+        "depth": 0.0137165859999999
+    },
+    {
+        "angel": 0.74,
+        "width": 0.0068444979999999,
+        "depth": 0.0137022799999999
+    },
+    {
+        "angel": 0.75,
+        "width": 0.0057022379999999,
+        "depth": 0.0136820149999999
+    },
+    {
+        "angel": 0.76,
+        "width": 0.004560568,
+        "depth": 0.013656266
+    },
+    {
+        "angel": 0.77,
+        "width": 0.0034195709999999,
+        "depth": 0.0136245559999999
+    },
+    {
+        "angel": 0.78,
+        "width": 0.0022787859999999,
+        "depth": 0.0135873629999999
+    },
+    {
+        "angel": 0.79,
+        "width": 0.001139223,
+        "depth": 0.013544448
+    },
+    {
+        "angel": 0.8,
+        "width": 0.0,
+        "depth": 0.0134955719999999
+    }
+]
\ No newline at end of file
diff --git a/starvla_inference_server.py b/starvla_inference_server.py
index 0f432de..a91acea 100644
--- a/starvla_inference_server.py
+++ b/starvla_inference_server.py
@@ -114,7 +114,7 @@ class StarvlaInferenceServer:
             actions = actions[0] # (16, 10)
         return {"ee_delta_position_chunks": actions[:, :3].tolist(), 
                 "ee_delta_rot6d_chunks": actions[:, 3:9].tolist(),
-                "gripper_chunks": actions[:, 9:10].tolist()}
+                "gripper_width_chunks": actions[:, 9:10].tolist()}
 
     def register_routes(self):
 
diff --git a/starvla_policy.py b/starvla_policy.py
index 3f03a98..e7844f5 100644
--- a/starvla_policy.py
+++ b/starvla_policy.py
@@ -1,8 +1,13 @@
 import pickle
+import time
+import json
+import numpy as np
+import requests
 
 from joysim.annotations.config_class import configclass, field
 from joysim.annotations.stereotype import stereotype
-from joysim.controllers.motion_plan_controller import MotionPlanController
+from joysim.controllers.spawnable_controller import SpawnableController
+from joysim.core.robots.configs.actuator_configs.grippers import GripperDriveJointConfig
 from joysim.extensions.benchmark.action import RobotAction
 from joysim.extensions.benchmark.benchmark import (
     BenchmarkAction,
@@ -12,15 +17,13 @@ from joysim.extensions.benchmark.benchmark import (
 from joysim.extensions.benchmark.policy import Policy, PolicyConfig
 from joysim.utils.log import Log
 from joysim.utils.pose import Pose
-import numpy as np
-import requests
-import time
 
 @configclass
 @stereotype.register_config("starvla")
 class StarvlaPolicyConfig(PolicyConfig):
 
     robot_name: str = field(default="None", required=True, comment="The name of the robot")
+    gripper_width_mapper_file: str = field(default="", required=True, comment="The file path to the gripper width mapper")
     sensor_names: list[str] = field(
         default=["Hand_Camera", "Left_Camera", "Right_Camera"],
         required=True,
@@ -55,14 +58,24 @@ class StarvlaPolicy(Policy):
         self.sensor_names = config.sensor_names
         self.server_url = config.server_url
         self.prompt = config.prompt
-
+        self.gripper_width_mapper = json.load(open(config.gripper_width_mapper_file, "r"))
     def reset(self) -> None:
         self.current_ee_position_state = None
         self.current_ee_rot6d_state = None
-        self.current_gripper_state = None
+        self.current_gripper_width = None
         self.current_chunk_id = 0
         self.current_chunk_result = None
         self.run_trunk_size = self.config.run_trunk_size
+        self.drive_joints: dict[str, GripperDriveJointConfig] = SpawnableController.control_robot(self.robot_name, "get_gripper_drive_joints").unwrap()
+        for joint_name, joint_config in self.drive_joints.items():
+            SpawnableController.control_robot(self.robot_name, "set_joint_stiffness", parameters={"joint_names": [joint_name], "stiffness": joint_config.position_control_stiffness}).unwrap()
+            SpawnableController.control_robot(self.robot_name, "set_joint_damping", parameters={"joint_names": [joint_name], "damping": joint_config.position_control_damping}).unwrap()
+            SpawnableController.control_robot(self.robot_name, "set_joint_effort_limit", parameters={"joint_names": [joint_name], "effort_limit": joint_config.position_control_effort_limit}).unwrap()
+        self.max_width = float("-inf")
+        self.min_width = float("inf")
+        for entry in self.gripper_width_mapper:
+            self.max_width = max(self.max_width, entry["width"])
+            self.min_width = min(self.min_width, entry["width"])
 
     def warmup(self, benchmark_observation: BenchmarkObservation) -> None:
         Log.info(f"Waiting for StarVLA inference server to be ready...")
@@ -87,10 +100,10 @@ class StarvlaPolicy(Policy):
 
     def preprocess_observation(self, benchmark_observation: BenchmarkObservation) -> dict:
         robot_obs = benchmark_observation.get_robot_observations(self.robot_name)["robot_data"]
-        ee_pose_base = robot_obs["ee_pose_base"]
+        ee_pose_base = robot_obs["ee_pose"]["base_frame"]
         ee_position, ee_rot6d = ee_pose_base["position"],ee_pose_base["rot6d"]
-        gripper = 0.0 if robot_obs["gripper_state"]["opened"] else 1.0
-        state = np.concatenate([ee_position,ee_rot6d,np.array([gripper])])
+        normalized_gripper_width = self.__map_joint_position_to_normalized_width(robot_obs["gripper_drive_state"]["position"][0])
+        state = np.concatenate([ee_position,ee_rot6d,np.array([normalized_gripper_width])])
         rgb_data = {}
         for sensor_name in self.sensor_names:
             sensor_obs = benchmark_observation.get_sensor_observations(sensor_name)
@@ -103,7 +116,7 @@ class StarvlaPolicy(Policy):
         if self.current_chunk_result is None:
             self.current_ee_position_state = np.array(observation["state"][:3]).astype(np.float64)
             self.current_ee_rot6d_state = np.array(observation["state"][3:9]).astype(np.float64)
-            self.current_gripper_state = np.array([observation["state"][9]])
+            self.current_gripper_width = np.array([observation["state"][9]])
             payload = pickle.dumps(observation)
             response = requests.post(
                 f"{self.server_url}/inference",
@@ -122,6 +135,26 @@ class StarvlaPolicy(Policy):
             result = self.current_chunk_result
 
         return result
+    def __map_joint_position_to_normalized_width(self, joint_position: float) -> float:
+        for entry in self.gripper_width_mapper:
+            if round(entry["angel"], 2) == round(joint_position, 2):
+                return 1-(entry["width"] - self.min_width) / (self.max_width - self.min_width)
+            
+        raise ValueError(f"Joint position {joint_position} not found in gripper width mapper")
+
+    def __map_gripper_joint_position(self, normalized_gripper_width: float) -> float:
+        
+        joint_positions = []
+        joint_names = []
+        if normalized_gripper_width > 0.5:
+            for joint_name, joint_config in self.drive_joints.items():
+                joint_positions.append(joint_config.close_position)
+                joint_names.append(joint_name)
+        else:
+            for joint_name, joint_config in self.drive_joints.items():
+                joint_positions.append(joint_config.open_position)
+                joint_names.append(joint_name)
+        return joint_positions, joint_names
 
     def postprocess_action(self, action: dict) -> BenchmarkAction:
         benchmark_action = BenchmarkAction()
@@ -129,24 +162,24 @@ class StarvlaPolicy(Policy):
         # get base frame end-effector pose
         delta_ee_pose = Pose(position=action["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action["ee_delta_rot6d_chunks"][self.current_chunk_id])
         curr_state_ee_pose = Pose(position=self.current_ee_position_state, rot6d=self.current_ee_rot6d_state)
-        Log.debug(f"trunck_id: {self.current_chunk_id}, curr_state_ee_pose: {curr_state_ee_pose}")
         curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state
-        ik_result = MotionPlanController.solve_ik(
-            robot_name=self.robot_name,
-            base_frame_ee_pose=curr_action_ee_pose,
-        ).unwrap()
-        if not ik_result["success"]:
-            Log.error(f"IK failed. Ignore this action.")
-            return benchmark_action
+        curr_action_gripper_width = action["gripper_width_chunks"][self.current_chunk_id]
         
-        joint_names = ik_result["result"]["plannable_joint_names"]
-        joint_positions = ik_result["result"]["plannable_joint_positions"][0]
+        gripper_joint_positions, gripper_joint_names = self.__map_gripper_joint_position(curr_action_gripper_width[0])
+        Log.debug(f"action_gripper_joint_positions: {gripper_joint_positions}, action_normalized_gripper_width: {round(curr_action_gripper_width[0], 2)}")
         benchmark_action.add_robot_action(
             RobotAction(
                 control_mode=ControlMode.POSITION,
                 robot_name=self.robot_name,
-                joint_names=joint_names,
-                joint_positions=joint_positions
+                joint_names=gripper_joint_names,
+                joint_positions=gripper_joint_positions
+            )
+        )
+        benchmark_action.add_robot_action(
+            RobotAction(
+                control_mode=ControlMode.EE_POSE,
+                robot_name=self.robot_name,
+                ee_pose=curr_action_ee_pose
             )
         )
         self.current_chunk_id += 1