2 Commits

Author SHA1 Message Date
Zhu Juan
e99f6a0589 add debug code for replay 2026-06-15 14:58:28 +08:00
Zhu Juan
cbf0edfcae update the image size for inference 2026-06-12 14:36:13 +08:00
3 changed files with 51 additions and 13 deletions

View File

@@ -63,9 +63,9 @@ scene:
source: local source: local
init_joint_position: init_joint_position:
torso_joint1: -1.1 torso_joint1: 0.0
torso_joint2: 2.4 torso_joint2: 0.0
torso_joint3: 1.0 torso_joint3: 0.0
torso_joint4: 0.0 torso_joint4: 0.0
left_arm_joint1: -0.2 left_arm_joint1: -0.2
left_arm_joint2: 0.05 left_arm_joint2: 0.05
@@ -160,10 +160,10 @@ scene:
front_camera: front_camera:
name: front_camera name: front_camera
stereotype: camera stereotype: camera
position: [2, -4.1, 1.5] position: [2, -4.1, 1.8]
look_at: look_at:
is_point: true is_point: true
look_at_point: [0.0, -4.1, 0.6] look_at_point: [0.0, -4.1, 1.2]
data_types: [rgb] data_types: [rgb]
width: 1280 width: 1280
height: 720 height: 720
@@ -172,10 +172,10 @@ scene:
left_camera: left_camera:
name: left_camera name: left_camera
stereotype: camera stereotype: camera
position: [-0.58554, -2.0, 1.2] position: [-0.58554, -2.0, 1.8]
look_at: look_at:
is_point: true is_point: true
look_at_point: [0.0, -4.1, 0.6] look_at_point: [0.0, -4.1, 1.2]
data_types: [rgb] data_types: [rgb]
width: 1280 width: 1280
height: 720 height: 720
@@ -184,10 +184,10 @@ scene:
right_camera: right_camera:
name: right_camera name: right_camera
stereotype: camera stereotype: camera
position: [0.36816, -5.36, 1.53] position: [0.36816, -5.36, 1.8]
look_at: look_at:
is_point: true is_point: true
look_at_point: [0.0, -4.1, 0.6] look_at_point: [0.0, -4.1, 1.2]
data_types: [rgb] data_types: [rgb]
width: 1280 width: 1280
height: 720 height: 720

View File

@@ -129,7 +129,7 @@ class StarvlaInferenceServer:
def inference(self, observation: dict) -> dict: def inference(self, observation: dict) -> dict:
img_head, state_vec, prompt = \ img_head, state_vec, prompt = \
self.parse_observation(observation) self.parse_observation(observation, target_size=(410, 224))
vla_input = { vla_input = {
# "batch_images": [[img_left, img_right, img_wrist]], # "batch_images": [[img_left, img_right, img_wrist]],
"image": [img_head], "image": [img_head],

View File

@@ -2,6 +2,7 @@ import pickle
import time import time
import json import json
import numpy as np import numpy as np
from scipy.spatial.transform import Rotation as R
import requests import requests
from fastsim.annotations.config_class import configclass, field from fastsim.annotations.config_class import configclass, field
@@ -70,6 +71,11 @@ class StarvlaPolicy(Policy):
self.visualize_action_ee_pose = config.visualize_action_ee_pose self.visualize_action_ee_pose = config.visualize_action_ee_pose
self.visualize_state_ee_pose = config.visualize_state_ee_pose self.visualize_state_ee_pose = config.visualize_state_ee_pose
self.visualize_bounding_box_targets = list(config.visualize_bounding_box_targets or []) self.visualize_bounding_box_targets = list(config.visualize_bounding_box_targets or [])
# prevent circular import
import pandas as pd
df_data = pd.read_parquet("/home/zhiyuan/zhujuan/datasets/add_remove_lid_15fps_10epi/data/chunk-000/file-000.parquet")
self.dummy_data = np.array(df_data.groupby('episode_index')['observation.state'].apply(list).to_dict()[0])
self.dummy_data_idx = 0
def reset(self) -> None: def reset(self) -> None:
self.current_state = {} self.current_state = {}
@@ -167,26 +173,58 @@ class StarvlaPolicy(Policy):
def postprocess_action(self, action: dict) -> BenchmarkAction: def postprocess_action(self, action: dict) -> BenchmarkAction:
benchmark_action = BenchmarkAction() benchmark_action = BenchmarkAction()
read_chunk_size = 1
dummy_action = self.dummy_data[self.dummy_data_idx:(self.dummy_data_idx + read_chunk_size)]
if self.dummy_data_idx + read_chunk_size >= self.dummy_data.shape[0]:
self.dummy_data_idx = 0
exit(0)
else:
self.dummy_data_idx += read_chunk_size
read_chunk_id = 0
print(f'{self.current_chunk_id=}, {self.dummy_data_idx = }, {read_chunk_id=}')
time.sleep(1.0)
left_rpy_state = dummy_action[:, 3:6] # (3,)
right_rpy_state = dummy_action[:, 31:34] # (3,)
left_rot_state = R.from_euler('xyz', left_rpy_state).as_matrix()
right_rot_state = R.from_euler('xyz', right_rpy_state).as_matrix()
left_state_rot6d = np.concatenate([left_rot_state[:, 0], left_rot_state[:, 1]], axis=-1) # (6,)
right_state_rot6d = np.concatenate([right_rot_state[:, 0], right_rot_state[:, 1]], axis=-1) # (6,)
read_state = {"left_arm": {
"ee_position_chunks": dummy_action[:, :3].tolist(),
"ee_rot6d_chunks": left_state_rot6d.tolist(),
"finger_chunks": dummy_action[:, 6:28].tolist()},
"right_arm": {
"ee_position_chunks": dummy_action[:, 28:31].tolist(),
"ee_rot6d_chunks": right_state_rot6d.tolist(),
"finger_chunks": dummy_action[:, 34:56].tolist()}
}
for arm_key in self.robot['arms'].keys(): for arm_key in self.robot['arms'].keys():
action_arm = action[arm_key] action_arm = action[arm_key]
delta_ee_pose = Pose(position=action_arm["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action_arm["ee_delta_rot6d_chunks"][self.current_chunk_id]) delta_ee_pose = Pose(position=action_arm["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action_arm["ee_delta_rot6d_chunks"][self.current_chunk_id])
curr_state_ee_pose = Pose(position=self.current_state[arm_key]["ee_pos"], rot6d=self.current_state[arm_key]["ee_rot6d"]) curr_state_ee_pose = Pose(position=self.current_state[arm_key]["ee_pos"], rot6d=self.current_state[arm_key]["ee_rot6d"])
curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state
finger_joint_qpos = action_arm["finger_chunks"][self.current_chunk_id] + self.current_state[arm_key]["finger_qpos"] finger_joint_qpos = action_arm["finger_chunks"][self.current_chunk_id] + self.current_state[arm_key]["finger_qpos"]
joint_names = self.left_hand_joints if arm_key == "left_arm" else self.right_hand_joints joint_names = self.left_hand_joints if arm_key == "left_arm" else self.right_hand_joints
state_arm = read_state[arm_key]
benchmark_action.add_robot_action( benchmark_action.add_robot_action(
RobotAction( RobotAction(
control_mode=ControlMode.POSITION, control_mode=ControlMode.POSITION,
robot_name=self.robot_name, robot_name=self.robot_name,
joint_names=joint_names, joint_names=joint_names,
joint_positions=finger_joint_qpos # joint_positions=finger_joint_qpos
joint_positions=state_arm["finger_chunks"][read_chunk_id]
) )
) )
benchmark_action.add_robot_action( benchmark_action.add_robot_action(
RobotAction( RobotAction(
control_mode=ControlMode.EE_POSE, control_mode=ControlMode.EE_POSE,
robot_name=self.robot_name, robot_name=self.robot_name,
ee_pose=curr_action_ee_pose, # ee_pose=curr_action_ee_pose,
ee_pose=Pose(position=state_arm["ee_position_chunks"][read_chunk_id], rot6d=state_arm["ee_rot6d_chunks"][read_chunk_id]),
arm_name=arm_key arm_name=arm_key
) )
) )