Compare commits

...

3 Commits

Author SHA1 Message Date
Zhu Juan
e99f6a0589 add debug code for replay 2026-06-15 14:58:28 +08:00
Zhu Juan
cbf0edfcae update the image size for inference 2026-06-12 14:36:13 +08:00
Zhu Juan
87f2a2abfc update benchmark view point 2026-05-25 18:12:37 +08:00
3 changed files with 69 additions and 20 deletions

View File

@@ -29,7 +29,7 @@ scene:
- 0.001
- 0.001
position:
- 0.419859
- 0.15
- -4.02430000000001
- 0.510259093
rotation:
@@ -47,7 +47,7 @@ scene:
- 0.001
- 0.001
position:
- 0.419859
- 0.15
- -4.152430000000001
- 0.510259093
quaternion: [1, 0, 0, 0]
@@ -57,7 +57,7 @@ scene:
r1pro_dex:
name: r1pro_dex
asset_path: asset://robots/r1pro/r1pro_dex.usd
position: [-0.5, -4.0, 0.0]
position: [-0.2, -4.1, 0.0]
rotation: [1, 0, 0, 0]
stereotype: modular_robot
source: local
@@ -67,18 +67,18 @@ scene:
torso_joint2: 0.0
torso_joint3: 0.0
torso_joint4: 0.0
left_arm_joint1: 0.0
left_arm_joint2: 0.5
left_arm_joint1: -0.2
left_arm_joint2: 0.05
left_arm_joint3: 0.0
left_arm_joint4: -1.0
left_arm_joint5: 0.0
left_arm_joint6: 0.0
left_arm_joint7: 0.0
right_arm_joint1: 0.0
right_arm_joint2: -0.5
right_arm_joint1: -0.2
right_arm_joint2: -0.05
right_arm_joint3: 0.0
right_arm_joint4: -1.0
right_arm_joint5: 0.0
right_arm_joint5: 0.1
right_arm_joint6: 0.0
right_arm_joint7: 0.0
@@ -137,22 +137,33 @@ scene:
head_camera:
name: head_camera
stereotype: camera
position: [-0.4, -4.0, 1.2]
look_at:
is_point: true
look_at_point: [0.4200, -4.1530, 0.4885]
data_types: [rgb]
width: 1280
height: 720
camera_model: pinhole
fix_camera: true
focal_length: 2.8
horizontal_aperture: 4.890881131191918
vertical_aperture: 2.7608816125932627
convention: opengl
attach_to:
target_name: r1pro_dex
is_articulation_part: true
articulation_part_name: zed_link
create_fixed_joint: true
local_position: [0.0, 0.0, 0.0]
local_rotation:
- 0.33
- 1.0
- -0.0
- 0.0
front_camera:
name: front_camera
stereotype: camera
position: [1.0, -4.0, 1.5]
position: [2, -4.1, 1.8]
look_at:
is_point: true
look_at_point: [-1.0, -4.0, 1.2]
look_at_point: [0.0, -4.1, 1.2]
data_types: [rgb]
width: 1280
height: 720
@@ -161,7 +172,7 @@ scene:
left_camera:
name: left_camera
stereotype: camera
position: [-1.0, -1.0, 1.2]
position: [-0.58554, -2.0, 1.8]
look_at:
is_point: true
look_at_point: [0.0, -4.1, 1.2]
@@ -173,7 +184,7 @@ scene:
right_camera:
name: right_camera
stereotype: camera
position: [-1.0, -6.5, 1.2]
position: [0.36816, -5.36, 1.8]
look_at:
is_point: true
look_at_point: [0.0, -4.1, 1.2]

View File

@@ -129,7 +129,7 @@ class StarvlaInferenceServer:
def inference(self, observation: dict) -> dict:
img_head, state_vec, prompt = \
self.parse_observation(observation)
self.parse_observation(observation, target_size=(410, 224))
vla_input = {
# "batch_images": [[img_left, img_right, img_wrist]],
"image": [img_head],

View File

@@ -2,6 +2,7 @@ import pickle
import time
import json
import numpy as np
from scipy.spatial.transform import Rotation as R
import requests
from fastsim.annotations.config_class import configclass, field
@@ -70,6 +71,11 @@ class StarvlaPolicy(Policy):
self.visualize_action_ee_pose = config.visualize_action_ee_pose
self.visualize_state_ee_pose = config.visualize_state_ee_pose
self.visualize_bounding_box_targets = list(config.visualize_bounding_box_targets or [])
# prevent circular import
import pandas as pd
df_data = pd.read_parquet("/home/zhiyuan/zhujuan/datasets/add_remove_lid_15fps_10epi/data/chunk-000/file-000.parquet")
self.dummy_data = np.array(df_data.groupby('episode_index')['observation.state'].apply(list).to_dict()[0])
self.dummy_data_idx = 0
def reset(self) -> None:
self.current_state = {}
@@ -167,6 +173,35 @@ class StarvlaPolicy(Policy):
def postprocess_action(self, action: dict) -> BenchmarkAction:
benchmark_action = BenchmarkAction()
read_chunk_size = 1
dummy_action = self.dummy_data[self.dummy_data_idx:(self.dummy_data_idx + read_chunk_size)]
if self.dummy_data_idx + read_chunk_size >= self.dummy_data.shape[0]:
self.dummy_data_idx = 0
exit(0)
else:
self.dummy_data_idx += read_chunk_size
read_chunk_id = 0
print(f'{self.current_chunk_id=}, {self.dummy_data_idx = }, {read_chunk_id=}')
time.sleep(1.0)
left_rpy_state = dummy_action[:, 3:6] # (3,)
right_rpy_state = dummy_action[:, 31:34] # (3,)
left_rot_state = R.from_euler('xyz', left_rpy_state).as_matrix()
right_rot_state = R.from_euler('xyz', right_rpy_state).as_matrix()
left_state_rot6d = np.concatenate([left_rot_state[:, 0], left_rot_state[:, 1]], axis=-1) # (6,)
right_state_rot6d = np.concatenate([right_rot_state[:, 0], right_rot_state[:, 1]], axis=-1) # (6,)
read_state = {"left_arm": {
"ee_position_chunks": dummy_action[:, :3].tolist(),
"ee_rot6d_chunks": left_state_rot6d.tolist(),
"finger_chunks": dummy_action[:, 6:28].tolist()},
"right_arm": {
"ee_position_chunks": dummy_action[:, 28:31].tolist(),
"ee_rot6d_chunks": right_state_rot6d.tolist(),
"finger_chunks": dummy_action[:, 34:56].tolist()}
}
for arm_key in self.robot['arms'].keys():
action_arm = action[arm_key]
delta_ee_pose = Pose(position=action_arm["ee_delta_position_chunks"][self.current_chunk_id], rot6d=action_arm["ee_delta_rot6d_chunks"][self.current_chunk_id])
@@ -174,19 +209,22 @@ class StarvlaPolicy(Policy):
curr_action_ee_pose = curr_state_ee_pose * delta_ee_pose # action2base = state2base * action2state
finger_joint_qpos = action_arm["finger_chunks"][self.current_chunk_id] + self.current_state[arm_key]["finger_qpos"]
joint_names = self.left_hand_joints if arm_key == "left_arm" else self.right_hand_joints
state_arm = read_state[arm_key]
benchmark_action.add_robot_action(
RobotAction(
control_mode=ControlMode.POSITION,
robot_name=self.robot_name,
joint_names=joint_names,
joint_positions=finger_joint_qpos
# joint_positions=finger_joint_qpos
joint_positions=state_arm["finger_chunks"][read_chunk_id]
)
)
benchmark_action.add_robot_action(
RobotAction(
control_mode=ControlMode.EE_POSE,
robot_name=self.robot_name,
ee_pose=curr_action_ee_pose,
# ee_pose=curr_action_ee_pose,
ee_pose=Pose(position=state_arm["ee_position_chunks"][read_chunk_id], rot6d=state_arm["ee_rot6d_chunks"][read_chunk_id]),
arm_name=arm_key
)
)