add debug code for replay

update the image size for inference
2026-06-15 14:58:28 +08:00 · 2026-06-12 14:36:13 +08:00
3 changed files with 51 additions and 13 deletions
--- a/benchmark.yaml
+++ b/benchmark.yaml
@@ -63,9 +63,9 @@ scene:
      source: local
      init_joint_position:
-        torso_joint1: -1.1
+        torso_joint1: 0.0
-        torso_joint2: 2.4
+        torso_joint2: 0.0
-        torso_joint3: 1.0
+        torso_joint3: 0.0
        torso_joint4: 0.0
        left_arm_joint1: -0.2
        left_arm_joint2: 0.05
@@ -160,10 +160,10 @@ scene:
    front_camera:
      name: front_camera
      stereotype: camera
-      position: [2, -4.1, 1.5]
+      position: [2, -4.1, 1.8]
      look_at:
        is_point: true
-        look_at_point: [0.0, -4.1, 0.6]
+        look_at_point: [0.0, -4.1, 1.2]
      data_types: [rgb]
      width: 1280
      height: 720
@@ -172,10 +172,10 @@ scene:
    left_camera:
      name: left_camera
      stereotype: camera
-      position: [-0.58554, -2.0, 1.2]
+      position: [-0.58554, -2.0, 1.8]
      look_at:
        is_point: true
-        look_at_point: [0.0, -4.1, 0.6]
+        look_at_point: [0.0, -4.1, 1.2]
      data_types: [rgb]
      width: 1280
      height: 720
@@ -184,10 +184,10 @@ scene:
    right_camera:
      name: right_camera
      stereotype: camera
-      position: [0.36816, -5.36, 1.53]
+      position: [0.36816, -5.36, 1.8]
      look_at:
        is_point: true
-        look_at_point: [0.0, -4.1, 0.6]
+        look_at_point: [0.0, -4.1, 1.2]
      data_types: [rgb]
      width: 1280
      height: 720
--- a/starvla_inference_server.py
+++ b/starvla_inference_server.py
@@ -129,7 +129,7 @@ class StarvlaInferenceServer:
    def inference(self, observation: dict) -> dict:
        img_head, state_vec, prompt = \
-            self.parse_observation(observation)
+            self.parse_observation(observation, target_size=(410, 224))
        vla_input = {
            # "batch_images": [[img_left, img_right, img_wrist]],
            "image": [img_head],
--- a/starvla_policy.py
+++ b/starvla_policy.py
@@ -2,6 +2,7 @@ import pickle
 import time
 import json
 import numpy as np
 from scipy.spatial.transform import Rotation as R
 import requests
 from fastsim.annotations.config_class import configclass, field
@@ -70,6 +71,11 @@ class StarvlaPolicy(Policy):
        self.visualize_action_ee_pose = config.visualize_action_ee_pose
        self.visualize_state_ee_pose = config.visualize_state_ee_pose
        self.visualize_bounding_box_targets = list(config.visualize_bounding_box_targets or [])
        # prevent circular import
        import pandas as pd
        df_data = pd.read_parquet("/home/zhiyuan/zhujuan/datasets/add_remove_lid_15fps_10epi/data/chunk-000/file-000.parquet")
        self.dummy_data = np.array(df_data.groupby('episode_index')['observation.state'].apply(list).to_dict()[0])
        self.dummy_data_idx = 0
    def reset(self) -> None:
        self.current_state = {}
@@ -167,26 +173,58 @@ class StarvlaPolicy(Policy):
    def postprocess_action(self, action: dict) -> BenchmarkAction:
        benchmark_action = BenchmarkAction()
        read_chunk_size = 1
        dummy_action = self.dummy_data[self.dummy_data_idx:(self.dummy_data_idx + read_chunk_size)]
        if self.dummy_data_idx + read_chunk_size >= self.dummy_data.shape[0]:
            self.dummy_data_idx = 0
            exit(0)
        else:
            self.dummy_data_idx += read_chunk_size
        read_chunk_id = 0
        print(f'{self.current_chunk_id=}, {self.dummy_data_idx = }, {read_chunk_id=}')
        time.sleep(1.0)
        left_rpy_state = dummy_action[:, 3:6]         # (3,)
        right_rpy_state = dummy_action[:, 31:34]         # (3,)
        left_rot_state = R.from_euler('xyz', left_rpy_state).as_matrix()
        right_rot_state = R.from_euler('xyz', right_rpy_state).as_matrix()
        left_state_rot6d = np.concatenate([left_rot_state[:, 0], left_rot_state[:, 1]], axis=-1) # (6,)
        right_state_rot6d = np.concatenate([right_rot_state[:, 0], right_rot_state[:, 1]], axis=-1) # (6,)
        read_state = {"left_arm": {
                    "ee_position_chunks": dummy_action[:, :3].tolist(),
                    "ee_rot6d_chunks": left_state_rot6d.tolist(),
                    "finger_chunks": dummy_action[:, 6:28].tolist()},
                "right_arm": {
                    "ee_position_chunks": dummy_action[:, 28:31].tolist(),
                    "ee_rot6d_chunks": right_state_rot6d.tolist(),
                    "finger_chunks": dummy_action[:, 34:56].tolist()}
                }
        for arm_key in self.robot['arms'].keys():
            action_arm = action[arm_key]
            delta_ee_pose = Pose(position=action_arm["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action_arm["ee_delta_rot6d_chunks"][self.current_chunk_id])
            curr_state_ee_pose = Pose(position=self.current_state[arm_key]["ee_pos"], rot6d=self.current_state[arm_key]["ee_rot6d"])
            curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state
            finger_joint_qpos = action_arm["finger_chunks"][self.current_chunk_id] + self.current_state[arm_key]["finger_qpos"] 
-            joint_names = self.left_hand_joints if arm_key == "left_arm" else self.right_hand_joints    
+            joint_names = self.left_hand_joints if arm_key == "left_arm" else self.right_hand_joints
            state_arm = read_state[arm_key]
            benchmark_action.add_robot_action(
                RobotAction(
                    control_mode=ControlMode.POSITION,
                    robot_name=self.robot_name,
                    joint_names=joint_names,
-                    joint_positions=finger_joint_qpos
+                    # joint_positions=finger_joint_qpos
                    joint_positions=state_arm["finger_chunks"][read_chunk_id]
                )
            )
            benchmark_action.add_robot_action(
                RobotAction(
                    control_mode=ControlMode.EE_POSE,
                    robot_name=self.robot_name,
-                    ee_pose=curr_action_ee_pose,
+                    # ee_pose=curr_action_ee_pose,
                    ee_pose=Pose(position=state_arm["ee_position_chunks"][read_chunk_id], rot6d=state_arm["ee_rot6d_chunks"][read_chunk_id]),
                    arm_name=arm_key
                )
            )
Author	SHA1	Message	Date
Zhu Juan	e99f6a0589	add debug code for replay	2026-06-15 14:58:28 +08:00
Zhu Juan	cbf0edfcae	update the image size for inference	2026-06-12 14:36:13 +08:00