constrained planning, robot segmentation

This commit is contained in:
Balakumar Sundaralingam
2024-02-22 21:45:47 -08:00
parent 88eac64edc
commit bafdf80c05
102 changed files with 12440 additions and 8112 deletions

View File

@@ -9,7 +9,22 @@
# its affiliates is strictly prohibited.
#
"""CuRobo package."""
"""
cuRobo package is split into several modules:
- :mod:`curobo.opt` contains optimization solvers.
- :mod:`curobo.cuda_robot_model` contains robot kinematics.
- :mod:`curobo.curobolib` contains the cuda kernels and python bindings for them.
- :mod:`curobo.geom` contains geometry processing, collision checking and frame transforms.
- :mod:`curobo.graph` contains geometric planning with graph search methods.
- :mod:`curobo.rollout` contains methods that map actions to costs. This class wraps instances of
:mod:`curobo.cuda_robot_model` and :mod:`geom` to compute costs given trajectory of actions.
- :mod:`curobo.util` contains utility methods.
- :mod:`curobo.wrap` adds the user-level api for task programming. Includes implementation of
collision-free reacher and batched robot world collision checking.
- :mod:`curobo.types` contains custom dataclasses for common data types in robotics, including
:meth:`types.state.JointState`, :meth:`types.camera.CameraObservation`, :meth:`types.math.Pose`.
"""
# NOTE (roflaherty): This is inspired by how matplotlib does creates its version value.

View File

@@ -172,7 +172,7 @@
<dynamics damping="10.0"/>
<limit effort="12" lower="-2.8973" upper="2.8973" velocity="2.6100"/>
</joint>
<!--
<link name="panda_link8"/>
<joint name="panda_joint8" type="fixed">
<origin rpy="0 0 0" xyz="0 0 0.107"/>
@@ -180,18 +180,11 @@
<child link="panda_link8"/>
<axis xyz="0 0 0"/>
</joint>
Removing this joint seems to help with some stability things
-->
<joint name="panda_hand_joint" type="fixed">
<!--
<parent link="panda_link8"/>
-->
<parent link="panda_link7"/>
<child link="panda_hand"/>
<origin rpy="0 0 -0.785398163397" xyz="0 0 0.107"/>
<!--
<origin rpy="0 0 -0.785398163397" xyz="0 0 0"/>
-->
</joint>
<link name="panda_hand">
<visual>

View File

@@ -13,7 +13,7 @@ robot_cfg:
kinematics:
use_usd_kinematics: False
isaac_usd_path: "/Isaac/Robots/Franka/franka.usd"
usd_path: "robot/non_shipping/franka/franka_panda_meters.usda"
usd_path: "robot/franka/franka_panda.usda"
usd_robot_root: "/panda"
usd_flip_joints: ["panda_joint1","panda_joint2","panda_joint3","panda_joint4", "panda_joint5",
"panda_joint6","panda_joint7","panda_finger_joint1", "panda_finger_joint2"]
@@ -32,7 +32,7 @@ robot_cfg:
usd_flip_joint_limits: ["panda_finger_joint2"]
urdf_path: "robot/franka_description/franka_panda.urdf"
asset_root_path: "robot/franka_description"
base_link: "base_link"
base_link: "panda_link0"
ee_link: "panda_hand"
collision_link_names:
[
@@ -50,7 +50,7 @@ robot_cfg:
"attached_object",
]
collision_spheres: "spheres/franka_mesh.yml"
collision_sphere_buffer: 0.0025
collision_sphere_buffer: 0.004 # 0.0025
extra_collision_spheres: {"attached_object": 4}
use_global_cumul: True
self_collision_ignore:

View File

@@ -109,7 +109,7 @@ robot_cfg:
"base_x", "base_y", "base_z",
"panda_joint1","panda_joint2","panda_joint3","panda_joint4", "panda_joint5",
"panda_joint6","panda_joint7","panda_finger_joint1", "panda_finger_joint2"]
retract_config: [0,0,0,0.0, -1.3, 0.0, -2.5, 0.0, 1.0, 0., 0.04, 0.04]
retract_config: [0,0,0,0, -1.3, 0.0, -2.5, 0.0, 1.0, 0., 0.04, 0.04]
null_space_weight: [1,1,1,1,1,1,1,1,1,1,1,1]
cspace_distance_weight: [1,1,1,1,1,1,1,1,1,1,1,1]
max_acceleration: 15.0 #15.0

View File

@@ -12,7 +12,7 @@
robot_cfg:
kinematics:
use_usd_kinematics: False
usd_path: "robot/non_shipping/techman/tm12.usd"
usd_path: "robot/techman/tm12.usd"
usd_robot_root: "/tm12"
usd_flip_joints:
{

View File

@@ -49,7 +49,7 @@ robot_cfg:
- "center": [-0.055, 0.008, 0.14]
"radius": 0.07
- "center": [-0.001, -0.002, 0.143]
"radius": 0.076
"radius": 0.08
forearm_link:
- "center": [-0.01, 0.002, 0.031]
"radius": 0.072

View File

@@ -31,22 +31,23 @@ model:
cost:
pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # orientation, position for all timesteps
run_vec_weight: [1.00,1.00,1.00,1.0,1.0,1.0] # running weight orientation, position
weight: [2000,500000.0,30,50] #[150.0, 2000.0, 30, 40]
run_vec_weight: [0.00,0.00,0.00,0.0,0.0,0.0] # running weight orientation, position
weight: [2000,500000.0,30,50]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.0 #0.05
run_weight: 1.0
use_metric: True
link_pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # orientation, position for all timesteps
run_vec_weight: [1.00,1.00,1.00,1.0,1.0,1.0] # running weight orientation, position
run_vec_weight: [0.00,0.00,0.00,0.0,0.0,0.0] # running weight orientation, position
weight: [2000,500000.0,30,50] #[150.0, 2000.0, 30, 40]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.0
run_weight: 0.001
use_metric: True
cspace_cfg:
weight: 10000.0
terminal: True
@@ -54,7 +55,7 @@ cost:
bound_cfg:
weight: [5000.0, 50000.0, 50000.0, 50000.0] # needs to be 3 values
smooth_weight: [0.0,2000.0, 50.0, 0.0] # [vel, acc, jerk,]
smooth_weight: [0.0,5000.0, 50.0, 0.0] # [vel, acc, jerk,]
run_weight_velocity: 0.0
run_weight_acceleration: 1.0
run_weight_jerk: 1.0
@@ -78,11 +79,11 @@ cost:
lbfgs:
n_iters: 400
n_iters: 400 # 400
inner_iters: 25
cold_start_n_iters: 200
cold_start_n_iters: null
min_iters: 25
line_search_scale: [0.01, 0.3,0.7,1.0] # #
line_search_scale: [0.01, 0.3, 0.7,1.0] # #
fixed_iters: True
cost_convergence: 0.01
cost_delta_threshold: 1.0
@@ -90,7 +91,7 @@ lbfgs:
epsilon: 0.01
history: 15 #15
use_cuda_graph: True
n_envs: 1
n_problems: 1
store_debug: False
use_cuda_kernel: True
stable_mode: True

View File

@@ -0,0 +1,109 @@
##
## Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
##
## NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
## property and proprietary rights in and to this material, related
## documentation and any modifications thereto. Any use, reproduction,
## disclosure or distribution of this material and related documentation
## without an express license agreement from NVIDIA CORPORATION or
## its affiliates is strictly prohibited.
##
model:
horizon: 32
state_filter_cfg:
filter_coeff:
position: 1.0
velocity: 1.0
acceleration: 1.0
enable: False
dt_traj_params:
base_dt: 0.2
base_ratio: 1.0
max_dt: 0.2
vel_scale: 1.0
control_space: 'POSITION'
state_finite_difference_mode: "CENTRAL"
teleport_mode: False
return_full_act_buffer: True
cost:
pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # orientation, position for all timesteps
run_vec_weight: [1.00,1.00,1.00,1.0,1.0,1.0] # running weight orientation, position
weight: [2000,500000.0,30,50] #[150.0, 2000.0, 30, 40]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.0 #0.05
use_metric: True
link_pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # orientation, position for all timesteps
run_vec_weight: [1.00,1.00,1.00,1.0,1.0,1.0] # running weight orientation, position
weight: [2000,500000.0,30,50] #[150.0, 2000.0, 30, 40]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.001
use_metric: True
cspace_cfg:
weight: 10000.0
terminal: True
run_weight: 0.00 #1
bound_cfg:
weight: [5000.0, 50000.0, 50000.0, 50000.0] # needs to be 3 values
smooth_weight: [0.0,50000.0, 500.0, 0.0] # [vel, acc, jerk,]
run_weight_velocity: 0.0
run_weight_acceleration: 1.0
run_weight_jerk: 1.0
activation_distance: [0.1,0.1,0.1,10.0] # for position, velocity, acceleration and jerk
null_space_weight: [0.0]
primitive_collision_cfg:
weight: 1000000.0
use_sweep: True
sweep_steps: 6
classify: False
use_sweep_kernel: True
use_speed_metric: True
speed_dt: 0.01 # used only for speed metric
activation_distance: 0.025
self_collision_cfg:
weight: 5000.0
classify: False
lbfgs:
n_iters: 400 # 400
inner_iters: 25
cold_start_n_iters: 200
min_iters: 25
line_search_scale: [0.01, 0.3, 0.7,1.0] # #
fixed_iters: True
cost_convergence: 0.01
cost_delta_threshold: 1.0
cost_relative_threshold: 0.999 #0.999
epsilon: 0.01
history: 15 #15
use_cuda_graph: True
n_problems: 1
store_debug: False
use_cuda_kernel: True
stable_mode: True
line_search_type: "approx_wolfe"
use_cuda_line_search_kernel: True
use_cuda_update_best_kernel: True
use_temporal_smooth: False
sync_cuda_time: True
step_scale: 1.0
last_best: 10
use_coo_sparse: True
debug_info:
visual_traj : null #'ee_pos_seq'

View File

@@ -32,6 +32,7 @@ cost:
vec_convergence: [0.0, 0.00]
terminal: False
use_metric: True
run_weight: 1.0
link_pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
weight: [1000, 20000, 30, 50]
@@ -58,7 +59,7 @@ cost:
lbfgs:
n_iters: 100 #60
inner_iters: 25
cold_start_n_iters: 100
cold_start_n_iters: null
min_iters: 20
line_search_scale: [0.01, 0.3, 0.7, 1.0]
fixed_iters: True
@@ -69,7 +70,7 @@ lbfgs:
history: 6
horizon: 1
use_cuda_graph: True
n_envs: 1
n_problems: 1
store_debug: False
use_cuda_kernel: True
stable_mode: True

View File

@@ -83,7 +83,7 @@ lbfgs:
epsilon: 0.01
history: 15 #15
use_cuda_graph: True
n_envs: 1
n_problems: 1
store_debug: False
use_cuda_kernel: True
stable_mode: True

View File

@@ -31,11 +31,11 @@ model:
cost:
pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] # orientation, position for all timesteps
run_vec_weight: [1.00,1.00,1.00,1.0,1.0,1.0] # running weight orientation, position
run_vec_weight: [0.00,0.00,0.00,0.0,0.0,0.0] # running weight orientation, position
weight: [2000,50000.0,30,60] #[150.0, 2000.0, 30, 40]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.00
run_weight: 1.0
use_metric: True
link_pose_cfg:
@@ -44,7 +44,7 @@ cost:
weight: [2000,50000.0,30,50] #[150.0, 2000.0, 30, 40]
vec_convergence: [0.0,0.0] # orientation, position, orientation metric activation, position metric activation
terminal: True
run_weight: 0.0
run_weight: 0.001
use_metric: True
cspace_cfg:
@@ -54,7 +54,9 @@ cost:
bound_cfg:
weight: [5000.0, 50000.0, 50000.0,50000.0] # needs to be 3 values
#weight: [000.0, 000.0, 000.0,000.0]
smooth_weight: [0.0,10000.0,10.0, 0.0] # [vel, acc, jerk, alpha_v-not-used]
#smooth_weight: [0.0,0000.0,0.0, 0.0] # [vel, acc, jerk, alpha_v-not-used]
run_weight_velocity: 0.00
run_weight_acceleration: 1.0
run_weight_jerk: 1.0
@@ -79,9 +81,9 @@ cost:
lbfgs:
n_iters: 175 #175
n_iters: 125 #175
inner_iters: 25
cold_start_n_iters: 150
cold_start_n_iters: null
min_iters: 25
line_search_scale: [0.01,0.3,0.7,1.0] #[0.01,0.2, 0.3,0.5,0.7,0.9, 1.0] #
fixed_iters: True
@@ -91,7 +93,7 @@ lbfgs:
epsilon: 0.01
history: 15
use_cuda_graph: True
n_envs: 1
n_problems: 1
store_debug: False
use_cuda_kernel: True
stable_mode: True

View File

@@ -31,6 +31,7 @@ cost:
vec_convergence: [0.00, 0.000] # orientation, position
terminal: False
use_metric: True
run_weight: 1.0
link_pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
weight: [30, 50, 10, 10] #[20.0, 100.0]
@@ -56,7 +57,7 @@ mppi:
init_cov : 1.0 #0.15 #.5 #.5
gamma : 1.0
n_iters : 4
cold_start_n_iters: 4
cold_start_n_iters: null
step_size_mean : 0.9
step_size_cov : 0.1
beta : 0.01
@@ -69,12 +70,12 @@ mppi:
sample_mode : 'BEST'
base_action : 'REPEAT'
squash_fn : 'CLAMP'
n_envs : 1
n_problems : 1
use_cuda_graph : True
seed : 0
store_debug : False
random_mean : False
sample_per_env : True
sample_per_problem: True
sync_cuda_time : False
use_coo_sparse : True
sample_params:

View File

@@ -89,12 +89,12 @@ mppi:
sample_mode : 'BEST'
base_action : 'REPEAT'
squash_fn : 'CLAMP'
n_envs : 1
n_problems : 1
use_cuda_graph : True
seed : 0
store_debug : False
random_mean : True
sample_per_env : False
sample_per_problem: False
sync_cuda_time : True
use_coo_sparse : True
sample_params:

View File

@@ -30,17 +30,17 @@ model:
cost:
pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
run_vec_weight: [1.0,1.0,1.0,1.0,1.0,1.0] # running weight
run_vec_weight: [0.0,0.0,0.0,0.0,0.0,0.0] # running weight
weight: [250.0, 5000.0, 20, 20]
vec_convergence: [0.0,0.0,1000.0,1000.0]
terminal: True
run_weight: 0.00
run_weight: 1.0
use_metric: True
link_pose_cfg:
vec_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
run_vec_weight: [1.0,1.0,1.0,1.0,1.0,1.0] # running weight
weight: [250.0, 5000.0, 40, 40]
run_vec_weight: [0.00,0.00,0.00,0.0,0.0,0.0] # running weight orientation, position
weight: [0.0, 5000.0, 40, 40]
vec_convergence: [0.0,0.0,1000.0,1000.0]
terminal: True
run_weight: 0.00
@@ -54,10 +54,12 @@ cost:
bound_cfg:
weight: [0.1, 0.1,0.0,0.0]
activation_distance: [0.0,0.0,0.0,0.0] #-#0.01
#smooth_weight: [0.0,100.0,1.0,0.0]
smooth_weight: [0.0,20.0,0.0,0.0]
run_weight_velocity: 0.0
run_weight_acceleration: 1.0
run_weight_jerk: 1.0
null_space_weight: [0.00]
primitive_collision_cfg:
weight: 5000.0
@@ -80,7 +82,7 @@ mppi:
init_cov : 0.5
gamma : 1.0
n_iters : 2
cold_start_n_iters: 2
cold_start_n_iters: null
step_size_mean : 0.9
step_size_cov : 0.01
beta : 0.01
@@ -93,12 +95,12 @@ mppi:
sample_mode : 'BEST'
base_action : 'REPEAT'
squash_fn : 'CLAMP'
n_envs : 1
n_problems : 1
use_cuda_graph : True
seed : 0
store_debug : False
random_mean : True
sample_per_env : True
sample_per_problem: True
sync_cuda_time : False
use_coo_sparse : True
sample_params:

View File

@@ -11,4 +11,4 @@
cuboid:
table:
dims: [5.0, 5.0, 0.2] # x, y, z
pose: [0.0, 0.0, -0.1, 1, 0, 0, 0.0] # x, y, z, qx, qy, qz, qw
pose: [0.0, 0.0, -0.1, 1, 0, 0, 0.0] # x, y, z, qx, qy, qz, qw

View File

@@ -145,6 +145,9 @@ class CudaRobotGeneratorConfig:
#: cspace config
cspace: Union[None, CSpaceConfig, Dict[str, List[Any]]] = None
#: Enable loading meshes from kinematics parser.
load_meshes: bool = False
def __post_init__(self):
# add root path:
# Check if an external asset path is provided:
@@ -283,7 +286,10 @@ class CudaRobotGenerator(CudaRobotGeneratorConfig):
)
else:
self._kinematics_parser = UrdfKinematicsParser(
self.urdf_path, mesh_root=self.asset_root_path, extra_links=self.extra_links
self.urdf_path,
mesh_root=self.asset_root_path,
extra_links=self.extra_links,
load_meshes=self.load_meshes,
)
if self.lock_joints is None:

View File

@@ -30,7 +30,7 @@ from curobo.cuda_robot_model.types import (
SelfCollisionKinematicsConfig,
)
from curobo.curobolib.kinematics import get_cuda_kinematics
from curobo.geom.types import Sphere
from curobo.geom.types import Mesh, Sphere
from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
from curobo.util.logger import log_error
@@ -390,4 +390,8 @@ class CudaRobotModel(CudaRobotModelConfig):
return self.kinematics_config.base_link
def update_kinematics_config(self, new_kin_config: KinematicsTensorConfig):
self.kinematics_config.copy(new_kin_config)
self.kinematics_config.copy_(new_kin_config)
@property
def retract_config(self):
return self.kinematics_config.cspace.retract_config

View File

@@ -23,7 +23,7 @@ from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
from curobo.types.state import JointState
from curobo.types.tensor import T_DOF
from curobo.util.tensor_util import copy_if_not_none
from curobo.util.tensor_util import clone_if_not_none, copy_if_not_none
class JointType(Enum):
@@ -75,20 +75,28 @@ class JointLimits:
self.tensor_args,
)
def copy_(self, new_jl: JointLimits):
self.joint_names = new_jl.joint_names.copy()
self.position.copy_(new_jl.position)
self.velocity.copy_(new_jl.velocity)
self.acceleration.copy_(new_jl.acceleration)
self.effort = copy_if_not_none(new_jl.effort, self.effort)
return self
@dataclass
class CSpaceConfig:
joint_names: List[str]
retract_config: Optional[T_DOF] = None
cspace_distance_weight: Optional[T_DOF] = None
null_space_weight: Optional[T_DOF] = None # List[float]
null_space_weight: Optional[T_DOF] = None
tensor_args: TensorDeviceType = TensorDeviceType()
max_acceleration: Union[float, List[float]] = 10.0
max_jerk: Union[float, List[float]] = 500.0
velocity_scale: Union[float, List[float]] = 1.0
acceleration_scale: Union[float, List[float]] = 1.0
jerk_scale: Union[float, List[float]] = 1.0
position_limit_clip: Union[float, List[float]] = 0.01
position_limit_clip: Union[float, List[float]] = 0.0
def __post_init__(self):
if self.retract_config is not None:
@@ -147,12 +155,31 @@ class CSpaceConfig:
joint_names = [self.joint_names[n] for n in new_index]
self.joint_names = joint_names
def copy_(self, new_config: CSpaceConfig):
self.joint_names = new_config.joint_names.copy()
self.retract_config = copy_if_not_none(new_config.retract_config, self.retract_config)
self.null_space_weight = copy_if_not_none(
new_config.null_space_weight, self.null_space_weight
)
self.cspace_distance_weight = copy_if_not_none(
new_config.cspace_distance_weight, self.cspace_distance_weight
)
self.tensor_args = self.tensor_args
self.max_jerk = copy_if_not_none(new_config.max_jerk, self.max_jerk)
self.max_acceleration = copy_if_not_none(new_config.max_acceleration, self.max_acceleration)
self.velocity_scale = copy_if_not_none(new_config.velocity_scale, self.velocity_scale)
self.acceleration_scale = copy_if_not_none(
new_config.acceleration_scale, self.acceleration_scale
)
self.jerk_scale = copy_if_not_none(new_config.jerk_scale, self.jerk_scale)
return self
def clone(self) -> CSpaceConfig:
return CSpaceConfig(
joint_names=self.joint_names.copy(),
retract_config=copy_if_not_none(self.retract_config),
null_space_weight=copy_if_not_none(self.null_space_weight),
cspace_distance_weight=copy_if_not_none(self.cspace_distance_weight),
retract_config=clone_if_not_none(self.retract_config),
null_space_weight=clone_if_not_none(self.null_space_weight),
cspace_distance_weight=clone_if_not_none(self.cspace_distance_weight),
tensor_args=self.tensor_args,
max_jerk=self.max_jerk.clone(),
max_acceleration=self.max_acceleration.clone(),
@@ -215,12 +242,48 @@ class KinematicsTensorConfig:
cspace: Optional[CSpaceConfig] = None
base_link: str = "base_link"
ee_link: str = "ee_link"
reference_link_spheres: Optional[torch.Tensor] = None
def __post_init__(self):
if self.cspace is None and self.joint_limits is not None:
self.load_cspace_cfg_from_kinematics()
if self.joint_limits is not None and self.cspace is not None:
self.joint_limits = self.cspace.scale_joint_limits(self.joint_limits)
if self.link_spheres is not None and self.reference_link_spheres is None:
self.reference_link_spheres = self.link_spheres.clone()
def copy_(self, new_config: KinematicsTensorConfig):
self.fixed_transforms.copy_(new_config.fixed_transforms)
self.link_map.copy_(new_config.link_map)
self.joint_map.copy_(new_config.joint_map)
self.joint_map_type.copy_(new_config.joint_map_type)
self.store_link_map.copy_(new_config.store_link_map)
self.link_chain_map.copy_(new_config.link_chain_map)
self.joint_limits.copy_(new_config.joint_limits)
if new_config.link_spheres is not None and self.link_spheres is not None:
self.link_spheres.copy_(new_config.link_spheres)
if new_config.link_sphere_idx_map is not None and self.link_sphere_idx_map is not None:
self.link_sphere_idx_map.copy_(new_config.link_sphere_idx_map)
if new_config.link_name_to_idx_map is not None and self.link_name_to_idx_map is not None:
self.link_name_to_idx_map = new_config.link_name_to_idx_map.copy()
if (
new_config.reference_link_spheres is not None
and self.reference_link_spheres is not None
):
self.reference_link_spheres.copy_(new_config.reference_link_spheres)
self.base_link = new_config.base_link
self.ee_idx = new_config.ee_idx
self.ee_link = new_config.ee_link
self.debug = new_config.debug
self.cspace.copy_(new_config.cspace)
self.n_dof = new_config.n_dof
self.non_fixed_joint_names = new_config.non_fixed_joint_names
self.joint_names = new_config.joint_names
self.link_names = new_config.link_names
self.mesh_link_names = new_config.mesh_link_names
self.total_spheres = new_config.total_spheres
return self
def load_cspace_cfg_from_kinematics(self):
retract_config = (
@@ -270,6 +333,13 @@ class KinematicsTensorConfig:
link_sphere_index = self.get_sphere_index_from_link_name(link_name)
return self.link_spheres[link_sphere_index, :]
def get_reference_link_spheres(
self,
link_name: str,
):
link_sphere_index = self.get_sphere_index_from_link_name(link_name)
return self.reference_link_spheres[link_sphere_index, :]
def attach_object(
self,
sphere_radius: Optional[float] = None,
@@ -332,6 +402,19 @@ class KinematicsTensorConfig:
"""
return self.get_link_spheres(link_name).shape[0]
def disable_link_spheres(self, link_name: str):
if link_name not in self.link_name_to_idx_map.keys():
raise ValueError(link_name + " not found in spheres")
curr_spheres = self.get_link_spheres(link_name)
curr_spheres[:, 3] = -100.0
self.update_link_spheres(link_name, curr_spheres)
def enable_link_spheres(self, link_name: str):
if link_name not in self.link_name_to_idx_map.keys():
raise ValueError(link_name + " not found in spheres")
curr_spheres = self.get_reference_link_spheres(link_name)
self.update_link_spheres(link_name, curr_spheres)
@dataclass
class SelfCollisionKinematicsConfig:

View File

@@ -183,6 +183,7 @@ class UrdfKinematicsParser(KinematicsParser):
else:
# convert to list:
mesh_pose = Pose.from_matrix(mesh_pose).to_list()
return CuroboMesh(
name=link_name,
pose=mesh_pose,

View File

@@ -13,7 +13,6 @@ from typing import Dict, List, Optional, Tuple
# Third Party
import numpy as np
from pxr import Usd, UsdPhysics
# CuRobo
from curobo.cuda_robot_model.kinematics_parser import KinematicsParser, LinkParams
@@ -22,6 +21,15 @@ from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
from curobo.util.logger import log_error
try:
# Third Party
from pxr import Usd, UsdPhysics
except ImportError:
raise ImportError(
"usd-core failed to import, install with pip install usd-core"
+ " NOTE: Do not install this if using with ISAAC SIM."
)
class UsdKinematicsParser(KinematicsParser):
"""An experimental kinematics parser from USD.

View File

@@ -16,43 +16,55 @@
// CUDA forward declarations
std::vector<torch::Tensor> self_collision_distance(
torch::Tensor out_distance, torch::Tensor out_vec,
torch::Tensor sparse_index,
const torch::Tensor robot_spheres, // batch_size x n_spheres x 4
const torch::Tensor collision_offset, // n_spheres x n_spheres
const torch::Tensor weight,
const torch::Tensor collision_matrix, // n_spheres x n_spheres
const torch::Tensor thread_locations, const int locations_size,
const int batch_size, const int nspheres, const bool compute_grad = false,
const int ndpt = 8, // Does this need to match template?
const bool debug = false);
std::vector<torch::Tensor>self_collision_distance(
torch::Tensor out_distance,
torch::Tensor out_vec,
torch::Tensor sparse_index,
const torch::Tensor robot_spheres, // batch_size x n_spheres x 4
const torch::Tensor collision_offset, // n_spheres x n_spheres
const torch::Tensor weight,
const torch::Tensor collision_matrix, // n_spheres x n_spheres
const torch::Tensor thread_locations,
const int locations_size,
const int batch_size,
const int nspheres,
const bool compute_grad = false,
const int ndpt = 8, // Does this need to match template?
const bool debug = false);
// CUDA forward declarations
std::vector<torch::Tensor> swept_sphere_obb_clpt(
const torch::Tensor sphere_position, // batch_size, 3
torch::Tensor distance, // batch_size, 1
torch::Tensor
closest_point, // batch size, 4 -> written out as x,y,z,0 for gradient
torch::Tensor sparsity_idx, const torch::Tensor weight,
const torch::Tensor activation_distance, const torch::Tensor speed_dt,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4,
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const int sweep_steps, const bool enable_speed_metric,
const bool transform_back, const bool compute_distance,
const bool use_batch_env);
std::vector<torch::Tensor>swept_sphere_obb_clpt(
const torch::Tensor sphere_position, // batch_size, 3
torch::Tensor distance, // batch_size, 1
torch::Tensor
closest_point, // batch size, 4 -> written out as x,y,z,0 for gradient
torch::Tensor sparsity_idx,
const torch::Tensor weight,
const torch::Tensor activation_distance,
const torch::Tensor speed_dt,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4,
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs,
const int batch_size,
const int horizon,
const int n_spheres,
const int sweep_steps,
const bool enable_speed_metric,
const bool transform_back,
const bool compute_distance,
const bool use_batch_env);
std::vector<torch::Tensor>
sphere_obb_clpt(const torch::Tensor sphere_position, // batch_size, 4
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx, const torch::Tensor weight,
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx,
const torch::Tensor weight,
const torch::Tensor activation_distance,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
@@ -60,58 +72,75 @@ sphere_obb_clpt(const torch::Tensor sphere_position, // batch_size, 4
const torch::Tensor obb_enable, // n_boxes, 4, 4
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const bool transform_back,
const bool compute_distance, const bool use_batch_env);
const int max_nobs,
const int batch_size,
const int horizon,
const int n_spheres,
const bool transform_back,
const bool compute_distance,
const bool use_batch_env);
std::vector<torch::Tensor> pose_distance(
torch::Tensor out_distance, torch::Tensor out_position_distance,
torch::Tensor out_rotation_distance,
torch::Tensor distance_p_vector, // batch size, 3
torch::Tensor distance_q_vector, // batch size, 4
torch::Tensor out_gidx,
const torch::Tensor current_position, // batch_size, 3
const torch::Tensor goal_position, // n_boxes, 3
const torch::Tensor current_quat, const torch::Tensor goal_quat,
const torch::Tensor vec_weight, // n_boxes, 4, 4
const torch::Tensor weight, // n_boxes, 4, 4
const torch::Tensor vec_convergence, const torch::Tensor run_weight,
const torch::Tensor run_vec_weight, const torch::Tensor batch_pose_idx,
const int batch_size, const int horizon, const int mode,
const int num_goals = 1, const bool compute_grad = false,
const bool write_distance = true, const bool use_metric = false);
std::vector<torch::Tensor>pose_distance(
torch::Tensor out_distance,
torch::Tensor out_position_distance,
torch::Tensor out_rotation_distance,
torch::Tensor distance_p_vector, // batch size, 3
torch::Tensor distance_q_vector, // batch size, 4
torch::Tensor out_gidx,
const torch::Tensor current_position, // batch_size, 3
const torch::Tensor goal_position, // n_boxes, 3
const torch::Tensor current_quat,
const torch::Tensor goal_quat,
const torch::Tensor vec_weight, // n_boxes, 4, 4
const torch::Tensor weight, // n_boxes, 4, 4
const torch::Tensor vec_convergence,
const torch::Tensor run_weight,
const torch::Tensor run_vec_weight,
const torch::Tensor offset_waypoint,
const torch::Tensor offset_tstep_fraction,
const torch::Tensor batch_pose_idx,
const int batch_size,
const int horizon,
const int mode,
const int num_goals = 1,
const bool compute_grad = false,
const bool write_distance = true,
const bool use_metric = false,
const bool project_distance = true);
std::vector<torch::Tensor>
backward_pose_distance(torch::Tensor out_grad_p, torch::Tensor out_grad_q,
backward_pose_distance(torch::Tensor out_grad_p,
torch::Tensor out_grad_q,
const torch::Tensor grad_distance, // batch_size, 3
const torch::Tensor grad_p_distance, // n_boxes, 3
const torch::Tensor grad_q_distance,
const torch::Tensor pose_weight,
const torch::Tensor grad_p_vec, // n_boxes, 4, 4
const torch::Tensor grad_q_vec, const int batch_size,
const bool use_distance = false);
const torch::Tensor grad_p_vec, // n_boxes, 4, 4
const torch::Tensor grad_q_vec,
const int batch_size,
const bool use_distance = false);
// C++ interface
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), # x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), # x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor> self_collision_distance_wrapper(
torch::Tensor out_distance, torch::Tensor out_vec,
torch::Tensor sparse_index,
const torch::Tensor robot_spheres, // batch_size x n_spheres x 4
const torch::Tensor collision_offset, // n_spheres
const torch::Tensor weight,
const torch::Tensor collision_matrix, // n_spheres
const torch::Tensor thread_locations, const int thread_locations_size,
const int batch_size, const int nspheres, const bool compute_grad = false,
const int ndpt = 8, const bool debug = false) {
std::vector<torch::Tensor>self_collision_distance_wrapper(
torch::Tensor out_distance, torch::Tensor out_vec,
torch::Tensor sparse_index,
const torch::Tensor robot_spheres, // batch_size x n_spheres x 4
const torch::Tensor collision_offset, // n_spheres
const torch::Tensor weight,
const torch::Tensor collision_matrix, // n_spheres
const torch::Tensor thread_locations, const int thread_locations_size,
const int batch_size, const int nspheres, const bool compute_grad = false,
const int ndpt = 8, const bool debug = false)
{
CHECK_INPUT(out_distance);
CHECK_INPUT(out_vec);
CHECK_INPUT(robot_spheres);
@@ -123,29 +152,30 @@ std::vector<torch::Tensor> self_collision_distance_wrapper(
const at::cuda::OptionalCUDAGuard guard(robot_spheres.device());
return self_collision_distance(
out_distance, out_vec, sparse_index, robot_spheres,
collision_offset, weight, collision_matrix, thread_locations,
thread_locations_size, batch_size, nspheres, compute_grad, ndpt, debug);
out_distance, out_vec, sparse_index, robot_spheres,
collision_offset, weight, collision_matrix, thread_locations,
thread_locations_size, batch_size, nspheres, compute_grad, ndpt, debug);
}
std::vector<torch::Tensor> sphere_obb_clpt_wrapper(
const torch::Tensor sphere_position, // batch_size, 4
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx, const torch::Tensor weight,
const torch::Tensor activation_distance,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4, 4
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const bool transform_back, const bool compute_distance,
const bool use_batch_env) {
std::vector<torch::Tensor>sphere_obb_clpt_wrapper(
const torch::Tensor sphere_position, // batch_size, 4
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx, const torch::Tensor weight,
const torch::Tensor activation_distance,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4, 4
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const bool transform_back, const bool compute_distance,
const bool use_batch_env)
{
const at::cuda::OptionalCUDAGuard guard(sphere_position.device());
CHECK_INPUT(distance);
CHECK_INPUT(closest_point);
CHECK_INPUT(sphere_position);
@@ -154,55 +184,61 @@ std::vector<torch::Tensor> sphere_obb_clpt_wrapper(
CHECK_INPUT(activation_distance);
CHECK_INPUT(obb_accel);
return sphere_obb_clpt(
sphere_position, distance, closest_point, sparsity_idx, weight,
activation_distance, obb_accel, obb_bounds, obb_pose, obb_enable,
n_env_obb, env_query_idx, max_nobs, batch_size, horizon, n_spheres,
transform_back, compute_distance, use_batch_env);
sphere_position, distance, closest_point, sparsity_idx, weight,
activation_distance, obb_accel, obb_bounds, obb_pose, obb_enable,
n_env_obb, env_query_idx, max_nobs, batch_size, horizon, n_spheres,
transform_back, compute_distance, use_batch_env);
}
std::vector<torch::Tensor> swept_sphere_obb_clpt_wrapper(
const torch::Tensor sphere_position, // batch_size, 4
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx, const torch::Tensor weight,
const torch::Tensor activation_distance, const torch::Tensor speed_dt,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4, 4
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const int sweep_steps, const bool enable_speed_metric,
const bool transform_back, const bool compute_distance,
const bool use_batch_env) {
std::vector<torch::Tensor>swept_sphere_obb_clpt_wrapper(
const torch::Tensor sphere_position, // batch_size, 4
torch::Tensor distance,
torch::Tensor closest_point, // batch size, 3
torch::Tensor sparsity_idx, const torch::Tensor weight,
const torch::Tensor activation_distance, const torch::Tensor speed_dt,
const torch::Tensor obb_accel, // n_boxes, 4, 4
const torch::Tensor obb_bounds, // n_boxes, 3
const torch::Tensor obb_pose, // n_boxes, 4, 4
const torch::Tensor obb_enable, // n_boxes, 4, 4
const torch::Tensor n_env_obb, // n_boxes, 4, 4
const torch::Tensor env_query_idx, // n_boxes, 4, 4
const int max_nobs, const int batch_size, const int horizon,
const int n_spheres, const int sweep_steps, const bool enable_speed_metric,
const bool transform_back, const bool compute_distance,
const bool use_batch_env)
{
const at::cuda::OptionalCUDAGuard guard(sphere_position.device());
CHECK_INPUT(distance);
CHECK_INPUT(closest_point);
CHECK_INPUT(sphere_position);
return swept_sphere_obb_clpt(
sphere_position,
distance, closest_point, sparsity_idx, weight, activation_distance,
speed_dt, obb_accel, obb_bounds, obb_pose, obb_enable, n_env_obb,
env_query_idx, max_nobs, batch_size, horizon, n_spheres, sweep_steps,
enable_speed_metric, transform_back, compute_distance, use_batch_env);
sphere_position,
distance, closest_point, sparsity_idx, weight, activation_distance,
speed_dt, obb_accel, obb_bounds, obb_pose, obb_enable, n_env_obb,
env_query_idx, max_nobs, batch_size, horizon, n_spheres, sweep_steps,
enable_speed_metric, transform_back, compute_distance, use_batch_env);
}
std::vector<torch::Tensor> pose_distance_wrapper(
torch::Tensor out_distance, torch::Tensor out_position_distance,
torch::Tensor out_rotation_distance,
torch::Tensor distance_p_vector, // batch size, 3
torch::Tensor distance_q_vector, // batch size, 4
torch::Tensor out_gidx,
const torch::Tensor current_position, // batch_size, 3
const torch::Tensor goal_position, // n_boxes, 3
const torch::Tensor current_quat, const torch::Tensor goal_quat,
const torch::Tensor vec_weight, // n_boxes, 4, 4
const torch::Tensor weight, const torch::Tensor vec_convergence,
const torch::Tensor run_weight, const torch::Tensor run_vec_weight,
const torch::Tensor batch_pose_idx, const int batch_size, const int horizon,
const int mode, const int num_goals = 1, const bool compute_grad = false,
const bool write_distance = false, const bool use_metric = false) {
std::vector<torch::Tensor>pose_distance_wrapper(
torch::Tensor out_distance, torch::Tensor out_position_distance,
torch::Tensor out_rotation_distance,
torch::Tensor distance_p_vector, // batch size, 3
torch::Tensor distance_q_vector, // batch size, 4
torch::Tensor out_gidx,
const torch::Tensor current_position, // batch_size, 3
const torch::Tensor goal_position, // n_boxes, 3
const torch::Tensor current_quat, const torch::Tensor goal_quat,
const torch::Tensor vec_weight, // n_boxes, 4, 4
const torch::Tensor weight, const torch::Tensor vec_convergence,
const torch::Tensor run_weight, const torch::Tensor run_vec_weight,
const torch::Tensor offset_waypoint, const torch::Tensor offset_tstep_fraction,
const torch::Tensor batch_pose_idx, const int batch_size, const int horizon,
const int mode, const int num_goals = 1, const bool compute_grad = false,
const bool write_distance = false, const bool use_metric = false,
const bool project_distance = true)
{
// at::cuda::DeviceGuard guard(angle.device());
CHECK_INPUT(out_distance);
CHECK_INPUT(out_position_distance);
@@ -214,24 +250,30 @@ std::vector<torch::Tensor> pose_distance_wrapper(
CHECK_INPUT(current_quat);
CHECK_INPUT(goal_quat);
CHECK_INPUT(batch_pose_idx);
CHECK_INPUT(offset_waypoint);
CHECK_INPUT(offset_tstep_fraction);
const at::cuda::OptionalCUDAGuard guard(current_position.device());
return pose_distance(
out_distance, out_position_distance, out_rotation_distance,
distance_p_vector, distance_q_vector, out_gidx, current_position,
goal_position, current_quat, goal_quat, vec_weight, weight,
vec_convergence, run_weight, run_vec_weight, batch_pose_idx, batch_size,
horizon, mode, num_goals, compute_grad, write_distance, use_metric);
out_distance, out_position_distance, out_rotation_distance,
distance_p_vector, distance_q_vector, out_gidx, current_position,
goal_position, current_quat, goal_quat, vec_weight, weight,
vec_convergence, run_weight, run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx, batch_size,
horizon, mode, num_goals, compute_grad, write_distance, use_metric, project_distance);
}
std::vector<torch::Tensor> backward_pose_distance_wrapper(
torch::Tensor out_grad_p, torch::Tensor out_grad_q,
const torch::Tensor grad_distance, // batch_size, 3
const torch::Tensor grad_p_distance, // n_boxes, 3
const torch::Tensor grad_q_distance, const torch::Tensor pose_weight,
const torch::Tensor grad_p_vec, // n_boxes, 4, 4
const torch::Tensor grad_q_vec, const int batch_size,
const bool use_distance) {
std::vector<torch::Tensor>backward_pose_distance_wrapper(
torch::Tensor out_grad_p, torch::Tensor out_grad_q,
const torch::Tensor grad_distance, // batch_size, 3
const torch::Tensor grad_p_distance, // n_boxes, 3
const torch::Tensor grad_q_distance, const torch::Tensor pose_weight,
const torch::Tensor grad_p_vec, // n_boxes, 4, 4
const torch::Tensor grad_q_vec, const int batch_size,
const bool use_distance)
{
CHECK_INPUT(out_grad_p);
CHECK_INPUT(out_grad_q);
CHECK_INPUT(grad_distance);
@@ -241,18 +283,19 @@ std::vector<torch::Tensor> backward_pose_distance_wrapper(
const at::cuda::OptionalCUDAGuard guard(grad_distance.device());
return backward_pose_distance(
out_grad_p, out_grad_q, grad_distance, grad_p_distance, grad_q_distance,
pose_weight, grad_p_vec, grad_q_vec, batch_size, use_distance);
out_grad_p, out_grad_q, grad_distance, grad_p_distance, grad_q_distance,
pose_weight, grad_p_vec, grad_q_vec, batch_size, use_distance);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("pose_distance", &pose_distance_wrapper, "Pose Distance (curobolib)");
m.def("pose_distance_backward", &backward_pose_distance_wrapper,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("pose_distance", &pose_distance_wrapper, "Pose Distance (curobolib)");
m.def("pose_distance_backward", &backward_pose_distance_wrapper,
"Pose Distance Backward (curobolib)");
m.def("closest_point", &sphere_obb_clpt_wrapper,
m.def("closest_point", &sphere_obb_clpt_wrapper,
"Closest Point OBB(curobolib)");
m.def("swept_closest_point", &swept_sphere_obb_clpt_wrapper,
m.def("swept_closest_point", &swept_sphere_obb_clpt_wrapper,
"Swept Closest Point OBB(curobolib)");
m.def("self_collision_distance", &self_collision_distance_wrapper,

View File

@@ -16,91 +16,112 @@
// CUDA forward declarations
std::vector<torch::Tensor>
matrix_to_quaternion(torch::Tensor out_quat,
matrix_to_quaternion(torch::Tensor out_quat,
const torch::Tensor in_rot // batch_size, 3
);
);
std::vector<torch::Tensor> kin_fused_forward(
torch::Tensor link_pos, torch::Tensor link_quat,
torch::Tensor batch_robot_spheres, torch::Tensor global_cumul_mat,
const torch::Tensor joint_vec, const torch::Tensor fixed_transform,
const torch::Tensor robot_spheres, const torch::Tensor link_map,
const torch::Tensor joint_map, const torch::Tensor joint_map_type,
const torch::Tensor store_link_map, const torch::Tensor link_sphere_map,
const int batch_size, const int n_spheres,
const bool use_global_cumul = false);
std::vector<torch::Tensor>kin_fused_forward(
torch::Tensor link_pos,
torch::Tensor link_quat,
torch::Tensor batch_robot_spheres,
torch::Tensor global_cumul_mat,
const torch::Tensor joint_vec,
const torch::Tensor fixed_transform,
const torch::Tensor robot_spheres,
const torch::Tensor link_map,
const torch::Tensor joint_map,
const torch::Tensor joint_map_type,
const torch::Tensor store_link_map,
const torch::Tensor link_sphere_map,
const int batch_size,
const int n_spheres,
const bool use_global_cumul = false);
std::vector<torch::Tensor>kin_fused_backward_16t(
torch::Tensor grad_out,
const torch::Tensor grad_nlinks_pos,
const torch::Tensor grad_nlinks_quat,
const torch::Tensor grad_spheres,
const torch::Tensor global_cumul_mat,
const torch::Tensor joint_vec,
const torch::Tensor fixed_transform,
const torch::Tensor robot_spheres,
const torch::Tensor link_map,
const torch::Tensor joint_map,
const torch::Tensor joint_map_type,
const torch::Tensor store_link_map,
const torch::Tensor link_sphere_map,
const torch::Tensor link_chain_map,
const int batch_size,
const int n_spheres,
const bool sparsity_opt = true,
const bool use_global_cumul = false);
std::vector<torch::Tensor> kin_fused_backward_16t(
torch::Tensor grad_out, const torch::Tensor grad_nlinks_pos,
const torch::Tensor grad_nlinks_quat, const torch::Tensor grad_spheres,
const torch::Tensor global_cumul_mat, const torch::Tensor joint_vec,
const torch::Tensor fixed_transform, const torch::Tensor robot_spheres,
const torch::Tensor link_map, const torch::Tensor joint_map,
const torch::Tensor joint_map_type, const torch::Tensor store_link_map,
const torch::Tensor link_sphere_map, const torch::Tensor link_chain_map,
const int batch_size, const int n_spheres, const bool sparsity_opt = true,
const bool use_global_cumul = false);
// C++ interface
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), # x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), # x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor> kin_forward_wrapper(
torch::Tensor link_pos, torch::Tensor link_quat,
torch::Tensor batch_robot_spheres, torch::Tensor global_cumul_mat,
const torch::Tensor joint_vec, const torch::Tensor fixed_transform,
const torch::Tensor robot_spheres, const torch::Tensor link_map,
const torch::Tensor joint_map, const torch::Tensor joint_map_type,
const torch::Tensor store_link_map, const torch::Tensor link_sphere_map,
const int batch_size, const int n_spheres,
const bool use_global_cumul = false) {
std::vector<torch::Tensor>kin_forward_wrapper(
torch::Tensor link_pos, torch::Tensor link_quat,
torch::Tensor batch_robot_spheres, torch::Tensor global_cumul_mat,
const torch::Tensor joint_vec, const torch::Tensor fixed_transform,
const torch::Tensor robot_spheres, const torch::Tensor link_map,
const torch::Tensor joint_map, const torch::Tensor joint_map_type,
const torch::Tensor store_link_map, const torch::Tensor link_sphere_map,
const int batch_size, const int n_spheres,
const bool use_global_cumul = false)
{
const at::cuda::OptionalCUDAGuard guard(joint_vec.device());
// TODO: add check input
return kin_fused_forward(
link_pos, link_quat, batch_robot_spheres, global_cumul_mat, joint_vec,
fixed_transform, robot_spheres, link_map, joint_map, joint_map_type,
store_link_map, link_sphere_map, batch_size, n_spheres, use_global_cumul);
link_pos, link_quat, batch_robot_spheres, global_cumul_mat, joint_vec,
fixed_transform, robot_spheres, link_map, joint_map, joint_map_type,
store_link_map, link_sphere_map, batch_size, n_spheres, use_global_cumul);
}
std::vector<torch::Tensor> kin_backward_wrapper(
torch::Tensor grad_out, const torch::Tensor grad_nlinks_pos,
const torch::Tensor grad_nlinks_quat, const torch::Tensor grad_spheres,
const torch::Tensor global_cumul_mat, const torch::Tensor joint_vec,
const torch::Tensor fixed_transform, const torch::Tensor robot_spheres,
const torch::Tensor link_map, const torch::Tensor joint_map,
const torch::Tensor joint_map_type, const torch::Tensor store_link_map,
const torch::Tensor link_sphere_map, const torch::Tensor link_chain_map,
const int batch_size, const int n_spheres, const bool sparsity_opt = true,
const bool use_global_cumul = false) {
std::vector<torch::Tensor>kin_backward_wrapper(
torch::Tensor grad_out, const torch::Tensor grad_nlinks_pos,
const torch::Tensor grad_nlinks_quat, const torch::Tensor grad_spheres,
const torch::Tensor global_cumul_mat, const torch::Tensor joint_vec,
const torch::Tensor fixed_transform, const torch::Tensor robot_spheres,
const torch::Tensor link_map, const torch::Tensor joint_map,
const torch::Tensor joint_map_type, const torch::Tensor store_link_map,
const torch::Tensor link_sphere_map, const torch::Tensor link_chain_map,
const int batch_size, const int n_spheres, const bool sparsity_opt = true,
const bool use_global_cumul = false)
{
const at::cuda::OptionalCUDAGuard guard(joint_vec.device());
return kin_fused_backward_16t(
grad_out, grad_nlinks_pos, grad_nlinks_quat, grad_spheres,
global_cumul_mat, joint_vec, fixed_transform, robot_spheres, link_map,
joint_map, joint_map_type, store_link_map, link_sphere_map,
link_chain_map, batch_size, n_spheres, sparsity_opt, use_global_cumul);
grad_out, grad_nlinks_pos, grad_nlinks_quat, grad_spheres,
global_cumul_mat, joint_vec, fixed_transform, robot_spheres, link_map,
joint_map, joint_map_type, store_link_map, link_sphere_map,
link_chain_map, batch_size, n_spheres, sparsity_opt, use_global_cumul);
}
std::vector<torch::Tensor>
matrix_to_quaternion_wrapper(torch::Tensor out_quat,
matrix_to_quaternion_wrapper(torch::Tensor out_quat,
const torch::Tensor in_rot // batch_size, 3
) {
)
{
const at::cuda::OptionalCUDAGuard guard(in_rot.device());
CHECK_INPUT(in_rot);
CHECK_INPUT(out_quat);
return matrix_to_quaternion(out_quat, in_rot);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &kin_forward_wrapper, "Kinematics fused forward (CUDA)");
m.def("backward", &kin_backward_wrapper, "Kinematics fused backward (CUDA)");
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("forward", &kin_forward_wrapper, "Kinematics fused forward (CUDA)");
m.def("backward", &kin_backward_wrapper, "Kinematics fused backward (CUDA)");
m.def("matrix_to_quaternion", &matrix_to_quaternion_wrapper,
"Rotation Matrix to Quaternion (CUDA)");
}

File diff suppressed because it is too large Load Diff

View File

@@ -15,45 +15,68 @@
#include <c10/cuda/CUDAGuard.h>
// CUDA forward declarations
std::vector<torch::Tensor> reduce_cuda(torch::Tensor vec, torch::Tensor vec2,
torch::Tensor rho_buffer,
torch::Tensor sum, const int batch_size,
const int v_dim, const int m);
std::vector<torch::Tensor>reduce_cuda(torch::Tensor vec,
torch::Tensor vec2,
torch::Tensor rho_buffer,
torch::Tensor sum,
const int batch_size,
const int v_dim,
const int m);
std::vector<torch::Tensor>
lbfgs_step_cuda(torch::Tensor step_vec, torch::Tensor rho_buffer,
torch::Tensor y_buffer, torch::Tensor s_buffer,
torch::Tensor grad_q, const float epsilon, const int batch_size,
const int m, const int v_dim);
lbfgs_step_cuda(torch::Tensor step_vec,
torch::Tensor rho_buffer,
torch::Tensor y_buffer,
torch::Tensor s_buffer,
torch::Tensor grad_q,
const float epsilon,
const int batch_size,
const int m,
const int v_dim);
std::vector<torch::Tensor>
lbfgs_update_cuda(torch::Tensor rho_buffer, torch::Tensor y_buffer,
torch::Tensor s_buffer, torch::Tensor q, torch::Tensor grad_q,
torch::Tensor x_0, torch::Tensor grad_0, const int batch_size,
const int m, const int v_dim);
lbfgs_update_cuda(torch::Tensor rho_buffer,
torch::Tensor y_buffer,
torch::Tensor s_buffer,
torch::Tensor q,
torch::Tensor grad_q,
torch::Tensor x_0,
torch::Tensor grad_0,
const int batch_size,
const int m,
const int v_dim);
std::vector<torch::Tensor>
lbfgs_cuda_fuse(torch::Tensor step_vec, torch::Tensor rho_buffer,
torch::Tensor y_buffer, torch::Tensor s_buffer, torch::Tensor q,
torch::Tensor grad_q, torch::Tensor x_0, torch::Tensor grad_0,
const float epsilon, const int batch_size, const int m,
const int v_dim, const bool stable_mode);
lbfgs_cuda_fuse(torch::Tensor step_vec,
torch::Tensor rho_buffer,
torch::Tensor y_buffer,
torch::Tensor s_buffer,
torch::Tensor q,
torch::Tensor grad_q,
torch::Tensor x_0,
torch::Tensor grad_0,
const float epsilon,
const int batch_size,
const int m,
const int v_dim,
const bool stable_mode);
// C++ interface
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), # x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), # x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor>
lbfgs_step_call(torch::Tensor step_vec, torch::Tensor rho_buffer,
torch::Tensor y_buffer, torch::Tensor s_buffer,
torch::Tensor grad_q, const float epsilon, const int batch_size,
const int m, const int v_dim) {
const int m, const int v_dim)
{
CHECK_INPUT(step_vec);
CHECK_INPUT(rho_buffer);
CHECK_INPUT(y_buffer);
@@ -69,7 +92,8 @@ std::vector<torch::Tensor>
lbfgs_update_call(torch::Tensor rho_buffer, torch::Tensor y_buffer,
torch::Tensor s_buffer, torch::Tensor q, torch::Tensor grad_q,
torch::Tensor x_0, torch::Tensor grad_0, const int batch_size,
const int m, const int v_dim) {
const int m, const int v_dim)
{
CHECK_INPUT(rho_buffer);
CHECK_INPUT(y_buffer);
CHECK_INPUT(s_buffer);
@@ -86,7 +110,8 @@ lbfgs_update_call(torch::Tensor rho_buffer, torch::Tensor y_buffer,
std::vector<torch::Tensor>
reduce_cuda_call(torch::Tensor vec, torch::Tensor vec2,
torch::Tensor rho_buffer, torch::Tensor sum,
const int batch_size, const int v_dim, const int m) {
const int batch_size, const int v_dim, const int m)
{
const at::cuda::OptionalCUDAGuard guard(sum.device());
return reduce_cuda(vec, vec2, rho_buffer, sum, batch_size, v_dim, m);
@@ -97,7 +122,8 @@ lbfgs_call(torch::Tensor step_vec, torch::Tensor rho_buffer,
torch::Tensor y_buffer, torch::Tensor s_buffer, torch::Tensor q,
torch::Tensor grad_q, torch::Tensor x_0, torch::Tensor grad_0,
const float epsilon, const int batch_size, const int m,
const int v_dim, const bool stable_mode) {
const int v_dim, const bool stable_mode)
{
CHECK_INPUT(step_vec);
CHECK_INPUT(rho_buffer);
CHECK_INPUT(y_buffer);
@@ -113,9 +139,10 @@ lbfgs_call(torch::Tensor step_vec, torch::Tensor rho_buffer,
stable_mode);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("step", &lbfgs_step_call, "L-BFGS step (CUDA)");
m.def("update", &lbfgs_update_call, "L-BFGS Update (CUDA)");
m.def("forward", &lbfgs_call, "L-BFGS Update + Step (CUDA)");
m.def("debug_reduce", &reduce_cuda_call, "L-BFGS Debug");
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("step", &lbfgs_step_call, "L-BFGS step (CUDA)");
m.def("update", &lbfgs_update_call, "L-BFGS Update (CUDA)");
m.def("forward", &lbfgs_call, "L-BFGS Update + Step (CUDA)");
m.def("debug_reduce", &reduce_cuda_call, "L-BFGS Debug");
}

File diff suppressed because it is too large Load Diff

View File

@@ -18,49 +18,67 @@
// CUDA forward declarations
std::vector<torch::Tensor>
update_best_cuda(torch::Tensor best_cost, torch::Tensor best_q,
torch::Tensor best_iteration,
torch::Tensor current_iteration,
update_best_cuda(torch::Tensor best_cost,
torch::Tensor best_q,
torch::Tensor best_iteration,
torch::Tensor current_iteration,
const torch::Tensor cost,
const torch::Tensor q, const int d_opt, const int cost_s1,
const int cost_s2, const int iteration,
const float delta_threshold,
const float relative_threshold = 0.999);
const torch::Tensor q,
const int d_opt,
const int cost_s1,
const int cost_s2,
const int iteration,
const float delta_threshold,
const float relative_threshold = 0.999);
std::vector<torch::Tensor>line_search_cuda(
// torch::Tensor m,
torch::Tensor best_x,
torch::Tensor best_c,
torch::Tensor best_grad,
const torch::Tensor g_x,
const torch::Tensor x_set,
const torch::Tensor step_vec,
const torch::Tensor c_0,
const torch::Tensor alpha_list,
const torch::Tensor c_idx,
const float c_1,
const float c_2,
const bool strong_wolfe,
const bool approx_wolfe,
const int l1,
const int l2,
const int batchsize);
std::vector<torch::Tensor> line_search_cuda(
// torch::Tensor m,
torch::Tensor best_x, torch::Tensor best_c, torch::Tensor best_grad,
const torch::Tensor g_x, const torch::Tensor x_set,
const torch::Tensor step_vec, const torch::Tensor c_0,
const torch::Tensor alpha_list, const torch::Tensor c_idx, const float c_1,
const float c_2, const bool strong_wolfe, const bool approx_wolfe,
const int l1, const int l2, const int batchsize);
// C++ interface
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), # x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), # x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor> line_search_call(
// torch::Tensor m,
torch::Tensor best_x, torch::Tensor best_c, torch::Tensor best_grad,
const torch::Tensor g_x, const torch::Tensor x_set,
const torch::Tensor step_vec, const torch::Tensor c_0,
const torch::Tensor alpha_list, const torch::Tensor c_idx, const float c_1,
const float c_2, const bool strong_wolfe, const bool approx_wolfe,
const int l1, const int l2, const int batchsize) {
std::vector<torch::Tensor>line_search_call(
// torch::Tensor m,
torch::Tensor best_x, torch::Tensor best_c, torch::Tensor best_grad,
const torch::Tensor g_x, const torch::Tensor x_set,
const torch::Tensor step_vec, const torch::Tensor c_0,
const torch::Tensor alpha_list, const torch::Tensor c_idx, const float c_1,
const float c_2, const bool strong_wolfe, const bool approx_wolfe,
const int l1, const int l2, const int batchsize)
{
CHECK_INPUT(g_x);
CHECK_INPUT(x_set);
CHECK_INPUT(step_vec);
CHECK_INPUT(c_0);
CHECK_INPUT(alpha_list);
CHECK_INPUT(c_idx);
// CHECK_INPUT(m);
CHECK_INPUT(best_x);
CHECK_INPUT(best_c);
@@ -76,14 +94,14 @@ std::vector<torch::Tensor> line_search_call(
std::vector<torch::Tensor>
update_best_call(torch::Tensor best_cost, torch::Tensor best_q,
torch::Tensor best_iteration,
torch::Tensor best_iteration,
torch::Tensor current_iteration,
const torch::Tensor cost,
const torch::Tensor q, const int d_opt, const int cost_s1,
const int cost_s2, const int iteration,
const float delta_threshold,
const float relative_threshold=0.999) {
const float relative_threshold = 0.999)
{
CHECK_INPUT(best_cost);
CHECK_INPUT(best_q);
CHECK_INPUT(cost);
@@ -96,8 +114,8 @@ update_best_call(torch::Tensor best_cost, torch::Tensor best_q,
cost_s1, cost_s2, iteration, delta_threshold, relative_threshold);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("update_best", &update_best_call, "Update Best (CUDA)");
m.def("line_search", &line_search_call, "Line search (CUDA)");
}
}

View File

@@ -37,362 +37,431 @@
#define FULL_MASK 0xffffffff
namespace Curobo {
namespace Curobo
{
namespace Optimization
{
template<typename scalar_t, typename psum_t>
__inline__ __device__ void reduce(scalar_t v, int m, unsigned mask,
psum_t *data, scalar_t *result)
{
psum_t val = v;
namespace Optimization {
val += __shfl_down_sync(mask, val, 1);
val += __shfl_down_sync(mask, val, 2);
val += __shfl_down_sync(mask, val, 4);
val += __shfl_down_sync(mask, val, 8);
val += __shfl_down_sync(mask, val, 16);
template <typename scalar_t, typename psum_t>
__inline__ __device__ void reduce(scalar_t v, int m, unsigned mask,
psum_t *data, scalar_t *result) {
psum_t val = v;
val += __shfl_down_sync(mask, val, 1);
val += __shfl_down_sync(mask, val, 2);
val += __shfl_down_sync(mask, val, 4);
val += __shfl_down_sync(mask, val, 8);
val += __shfl_down_sync(mask, val, 16);
// int leader = __ffs(mask) 1; // select a leader lane
int leader = 0;
if (threadIdx.x % 32 == leader) {
if (m <= 32) {
result[0] = (scalar_t)val;
} else {
data[(threadIdx.x + 1) / 32] = val;
}
}
if (m > 32) {
__syncthreads();
int elems = (m + 31) / 32;
assert(elems <= 32);
unsigned mask2 = __ballot_sync(FULL_MASK, threadIdx.x < elems);
if (threadIdx.x < elems) { // only the first warp will do this work
psum_t val2 = data[threadIdx.x % 32];
int shift = 1;
for (int i = elems - 1; i > 0; i /= 2) {
val2 += __shfl_down_sync(mask2, val2, shift);
shift *= 2;
}
// int leader = __ffs(mask2) 1; // select a leader lane
// int leader = __ffs(mask) 1; // select a leader lane
int leader = 0;
if (threadIdx.x % 32 == leader) {
result[0] = (scalar_t)val2;
if (threadIdx.x % 32 == leader)
{
if (m <= 32)
{
result[0] = (scalar_t)val;
}
else
{
data[(threadIdx.x + 1) / 32] = val;
}
}
if (m > 32)
{
__syncthreads();
int elems = (m + 31) / 32;
assert(elems <= 32);
unsigned mask2 = __ballot_sync(FULL_MASK, threadIdx.x < elems);
if (threadIdx.x < elems) // only the first warp will do this work
{
psum_t val2 = data[threadIdx.x % 32];
int shift = 1;
for (int i = elems - 1; i > 0; i /= 2)
{
val2 += __shfl_down_sync(mask2, val2, shift);
shift *= 2;
}
// int leader = __ffs(mask2) 1; // select a leader lane
int leader = 0;
if (threadIdx.x % 32 == leader)
{
result[0] = (scalar_t)val2;
}
}
}
__syncthreads();
}
// Launched with l2 threads/block and batchsize blocks
template<typename scalar_t, typename psum_t>
__global__ void line_search_kernel(
// int64_t *m_idx, // 4x1x1
scalar_t *best_x, // 4x280
scalar_t *best_c, // 4x1
scalar_t *best_grad, // 4x280
const scalar_t *g_x, // 4x6x280
const scalar_t *x_set, // 4x6x280
const scalar_t *step_vec, // 4x280x1
const scalar_t *c, // 4x6x1
const scalar_t *alpha_list, // 4x6x1
const int64_t *c_idx, // 4x1x1
const float c_1, const float c_2, const bool strong_wolfe,
const bool approx_wolfe,
const int l1, // 6
const int l2, // 280
const int batchsize) // 4
{
int batch = blockIdx.x;
__shared__ psum_t data[32];
__shared__ scalar_t result[32];
assert(l1 <= 32);
unsigned mask = __ballot_sync(FULL_MASK, threadIdx.x < l2);
if (threadIdx.x >= l2)
{
return;
}
scalar_t sv_elem = step_vec[batch * l2 + threadIdx.x];
// g_step = g0 @ step_vec_T
// g_x @ step_vec_T
for (int i = 0; i < l1; i++)
{
reduce(g_x[batch * l1 * l2 + l2 * i + threadIdx.x] * sv_elem, l2, mask,
&data[0], &result[i]);
}
__shared__ scalar_t step_success[32];
__shared__ scalar_t step_success_w1[32];
assert(blockDim.x >= l1);
bool wolfe_1 = false;
bool wolfe = false;
bool condition = threadIdx.x < l1;
if (condition)
{
// scalar_t alpha_list_elem = alpha_list[batch*l1 + threadIdx.x];
scalar_t alpha_list_elem = alpha_list[threadIdx.x];
// condition 1:
wolfe_1 = c[batch * l1 + threadIdx.x] <=
(c[batch * l1] + c_1 * alpha_list_elem * result[0]);
// condition 2:
bool wolfe_2;
if (strong_wolfe)
{
wolfe_2 = abs(result[threadIdx.x]) <= c_2 *abs(result[0]);
}
else
{
wolfe_2 = result[threadIdx.x] >= c_2 * result[0];
}
wolfe = wolfe_1 & wolfe_2;
step_success[threadIdx.x] = wolfe * (alpha_list_elem + 0.1);
step_success_w1[threadIdx.x] = wolfe_1 * (alpha_list_elem + 0.1);
}
__syncthreads();
__shared__ int idx_shared;
if (threadIdx.x == 0)
{
int m_id = 0;
int m1_id = 0;
scalar_t max1 = step_success[0];
scalar_t max2 = step_success_w1[0];
for (int i = 1; i < l1; i++)
{
if (max1 < step_success[i])
{
max1 = step_success[i];
m_id = i;
}
if (max2 < step_success_w1[i])
{
max2 = step_success_w1[i];
m1_id = i;
}
}
if (!approx_wolfe)
{
// m_idx = torch.where(m_idx == 0, m1_idx, m_idx)
if (m_id == 0)
{
m_id = m1_id;
}
// m_idx[m_idx == 0] = 1
if (m_id == 0)
{
m_id = 1;
}
}
idx_shared = m_id + c_idx[batch];
}
////////////////////////////////////
// write outputs using the computed index.
// one index per batch is computed
////////////////////////////////////
// l2 is d_opt, l1 is line_search n.
// idx_shared contains index in l1
//
__syncthreads();
if (threadIdx.x < l2)
{
if (threadIdx.x == 0)
{
// printf("block: %d, idx_shared: %d\n", batch, idx_shared);
}
best_x[batch * l2 + threadIdx.x] = x_set[idx_shared * l2 + threadIdx.x];
best_grad[batch * l2 + threadIdx.x] = g_x[idx_shared * l2 + threadIdx.x];
}
if (threadIdx.x == 0)
{
best_c[batch] = c[idx_shared];
}
}
}
__syncthreads();
}
// Launched with l2 threads/block and batchsize blocks
template <typename scalar_t, typename psum_t>
__global__ void line_search_kernel(
// int64_t *m_idx, // 4x1x1
scalar_t *best_x, // 4x280
scalar_t *best_c, // 4x1
scalar_t *best_grad, // 4x280
const scalar_t *g_x, // 4x6x280
const scalar_t *x_set, // 4x6x280
const scalar_t *step_vec, // 4x280x1
const scalar_t *c, // 4x6x1
const scalar_t *alpha_list, // 4x6x1
const int64_t *c_idx, // 4x1x1
const float c_1, const float c_2, const bool strong_wolfe,
const bool approx_wolfe,
const int l1, // 6
const int l2, // 280
const int batchsize) // 4
// Launched with l2 threads/block and #blocks = batchsize
template<typename scalar_t, typename psum_t>
__global__ void line_search_kernel_mask(
// int64_t *m_idx, // 4x1x1
scalar_t *best_x, // 4x280
scalar_t *best_c, // 4x1
scalar_t *best_grad, // 4x280
const scalar_t *g_x, // 4x6x280
const scalar_t *x_set, // 4x6x280
const scalar_t *step_vec, // 4x280x1
const scalar_t *c, // 4x6x1
const scalar_t *alpha_list, // 4x6x1
const int64_t *c_idx, // 4x1x1
const float c_1, const float c_2, const bool strong_wolfe,
const bool approx_wolfe,
const int l1, // 6
const int l2, // 280
const int batchsize) // 4
{
int batch = blockIdx.x;
__shared__ psum_t data[32];
__shared__ scalar_t result[32];
assert(l1 <= 32);
unsigned mask = __ballot_sync(FULL_MASK, threadIdx.x < l2);
if (threadIdx.x >= l2)
{
return;
}
scalar_t sv_elem = step_vec[batch * l2 + threadIdx.x];
// g_step = g0 @ step_vec_T
// g_x @ step_vec_T
for (int i = 0; i < l1; i++)
{
reduce(g_x[batch * l1 * l2 + l2 * i + threadIdx.x] * sv_elem, l2, mask,
&data[0], &result[i]);
}
// __shared__ scalar_t step_success[32];
// __shared__ scalar_t step_success_w1[32];
assert(blockDim.x >= l1);
bool wolfe_1 = false;
bool wolfe = false;
bool condition = threadIdx.x < l1;
if (condition)
{
scalar_t alpha_list_elem = alpha_list[threadIdx.x];
// scalar_t alpha_list_elem = alpha_list[batch*l1 + threadIdx.x];
// condition 1:
wolfe_1 = c[batch * l1 + threadIdx.x] <=
(c[batch * l1] + c_1 * alpha_list_elem * result[0]);
// condition 2:
bool wolfe_2;
if (strong_wolfe)
{
wolfe_2 = abs(result[threadIdx.x]) <= c_2 *abs(result[0]);
}
else
{
wolfe_2 = result[threadIdx.x] >= c_2 * result[0];
}
// wolfe = torch.logical_and(wolfe_1, wolfe_2)
wolfe = wolfe_1 & wolfe_2;
// // step_success = wolfe * (self.alpha_list[:, :, 0:1] + 0.1)
// // step_success_w1 = wolfe_1 * (self.alpha_list[:, :, 0:1] + 0.1)
// step_success[threadIdx.x] = wolfe * (alpha_list_elem + 0.1);
// step_success_w1[threadIdx.x] = wolfe_1 * (alpha_list_elem + 0.1);
}
unsigned msk1 = __ballot_sync(FULL_MASK, wolfe_1 & condition);
unsigned msk = __ballot_sync(FULL_MASK, wolfe & condition);
// get the index of the last occurance of true
unsigned msk1_brev = __brev(msk1);
unsigned msk_brev = __brev(msk);
int id1 = 32 - __ffs(msk1_brev); // position of least signficant bit set to 1
int id = 32 - __ffs(msk_brev); // position of least signficant bit set to 1
__syncthreads();
__shared__ int idx_shared;
if (threadIdx.x == 0)
{
if (!approx_wolfe)
{
if (id == 32) // msk is zero
{
id = id1;
}
if (id == 0) // bit 0 is set
{
id = id1;
}
if (id == 32) // msk is zero
{
id = 1;
}
if (id == 0)
{
id = 1;
}
}
else
{
if (id == 32) // msk is zero
{
id = 0;
}
}
// // _, m_idx = torch.max(step_success, dim=-2)
// // _, m1_idx = torch.max(step_success_w1, dim=-2)
// int m_id = 0;
// int m1_id = 0;
// scalar_t max1 = step_success[0];
// scalar_t max2 = step_success_w1[0];
// for (int i=1; i<l1; i++) {
// if (max1<step_success[i]) {
// max1 = step_success[i];
// m_id = i;
// }
// if (max2<step_success_w1[i]) {
// max2 = step_success_w1[i];
// m1_id = i;
// }
// }
// // m_idx = torch.where(m_idx == 0, m1_idx, m_idx)
// if (m_id == 0) {
// m_id = m1_id;
// }
// // m_idx[m_idx == 0] = 1
// if (m_id == 0) {
// m_id = 1;
// }
// if (id != m_id) {
// printf("id=%d, m_id=%d\n", id, m_id);
// printf("msk1=%x, msk=%x, raw id1=%d, raw id=%d\n", msk1, msk,
// 32-__ffs(msk1_brev), 32-__ffs(msk_brev));
// }
// m_idx[batch] = m_id;
// m_idx[batch] = id;
idx_shared = id + c_idx[batch];
}
////////////////////////////////////
// write outputs using the computed index.
// one index per batch is computed
////////////////////////////////////
__syncthreads();
if (threadIdx.x < l2)
{
if (threadIdx.x == 0)
{
// printf("block: %d, idx_shared: %d\n", batch, idx_shared);
}
best_x[batch * l2 + threadIdx.x] = x_set[idx_shared * l2 + threadIdx.x];
best_grad[batch * l2 + threadIdx.x] = g_x[idx_shared * l2 + threadIdx.x];
}
if (threadIdx.x == 0)
{
best_c[batch] = c[idx_shared];
}
}
} // namespace Optimization
} // namespace Curobo
std::vector<torch::Tensor>line_search_cuda(
// torch::Tensor m_idx,
torch::Tensor best_x, torch::Tensor best_c, torch::Tensor best_grad,
const torch::Tensor g_x, const torch::Tensor x_set,
const torch::Tensor step_vec, const torch::Tensor c_0,
const torch::Tensor alpha_list, const torch::Tensor c_idx, const float c_1,
const float c_2, const bool strong_wolfe, const bool approx_wolfe,
const int l1, const int l2, const int batchsize)
{
int batch = blockIdx.x;
__shared__ psum_t data[32];
__shared__ scalar_t result[32];
assert(l1 <= 32);
unsigned mask = __ballot_sync(FULL_MASK, threadIdx.x < l2);
if (threadIdx.x >= l2) {
return;
}
scalar_t sv_elem = step_vec[batch * l2 + threadIdx.x];
// g_step = g0 @ step_vec_T
// g_x @ step_vec_T
for (int i = 0; i < l1; i++) {
reduce(g_x[batch * l1 * l2 + l2 * i + threadIdx.x] * sv_elem, l2, mask,
&data[0], &result[i]);
}
__shared__ scalar_t step_success[32];
__shared__ scalar_t step_success_w1[32];
assert(blockDim.x >= l1);
bool wolfe_1 = false;
bool wolfe = false;
bool condition = threadIdx.x < l1;
if (condition) {
// scalar_t alpha_list_elem = alpha_list[batch*l1 + threadIdx.x];
scalar_t alpha_list_elem = alpha_list[threadIdx.x];
// condition 1:
wolfe_1 = c[batch * l1 + threadIdx.x] <=
(c[batch * l1] + c_1 * alpha_list_elem * result[0]);
// condition 2:
bool wolfe_2;
if (strong_wolfe) {
wolfe_2 = abs(result[threadIdx.x]) <= c_2 * abs(result[0]);
} else {
wolfe_2 = result[threadIdx.x] >= c_2 * result[0];
}
wolfe = wolfe_1 & wolfe_2;
step_success[threadIdx.x] = wolfe * (alpha_list_elem + 0.1);
step_success_w1[threadIdx.x] = wolfe_1 * (alpha_list_elem + 0.1);
}
__syncthreads();
__shared__ int idx_shared;
if (threadIdx.x == 0) {
int m_id = 0;
int m1_id = 0;
scalar_t max1 = step_success[0];
scalar_t max2 = step_success_w1[0];
for (int i = 1; i < l1; i++) {
if (max1 < step_success[i]) {
max1 = step_success[i];
m_id = i;
}
if (max2 < step_success_w1[i]) {
max2 = step_success_w1[i];
m1_id = i;
}
}
if (!approx_wolfe) {
// m_idx = torch.where(m_idx == 0, m1_idx, m_idx)
if (m_id == 0) {
m_id = m1_id;
}
// m_idx[m_idx == 0] = 1
if (m_id == 0) {
m_id = 1;
}
}
idx_shared = m_id + c_idx[batch];
}
////////////////////////////////////
// write outputs using the computed index.
// one index per batch is computed
////////////////////////////////////
// l2 is d_opt, l1 is line_search n.
// idx_shared contains index in l1
//
__syncthreads();
if (threadIdx.x < l2) {
if (threadIdx.x == 0) {
// printf("block: %d, idx_shared: %d\n", batch, idx_shared);
}
best_x[batch * l2 + threadIdx.x] = x_set[idx_shared * l2 + threadIdx.x];
best_grad[batch * l2 + threadIdx.x] = g_x[idx_shared * l2 + threadIdx.x];
}
if (threadIdx.x == 0) {
best_c[batch] = c[idx_shared];
}
}
// Launched with l2 threads/block and #blocks = batchsize
template <typename scalar_t, typename psum_t>
__global__ void line_search_kernel_mask(
// int64_t *m_idx, // 4x1x1
scalar_t *best_x, // 4x280
scalar_t *best_c, // 4x1
scalar_t *best_grad, // 4x280
const scalar_t *g_x, // 4x6x280
const scalar_t *x_set, // 4x6x280
const scalar_t *step_vec, // 4x280x1
const scalar_t *c, // 4x6x1
const scalar_t *alpha_list, // 4x6x1
const int64_t *c_idx, // 4x1x1
const float c_1, const float c_2, const bool strong_wolfe,
const bool approx_wolfe,
const int l1, // 6
const int l2, // 280
const int batchsize) // 4
{
int batch = blockIdx.x;
__shared__ psum_t data[32];
__shared__ scalar_t result[32];
assert(l1 <= 32);
unsigned mask = __ballot_sync(FULL_MASK, threadIdx.x < l2);
if (threadIdx.x >= l2) {
return;
}
scalar_t sv_elem = step_vec[batch * l2 + threadIdx.x];
// g_step = g0 @ step_vec_T
// g_x @ step_vec_T
for (int i = 0; i < l1; i++) {
reduce(g_x[batch * l1 * l2 + l2 * i + threadIdx.x] * sv_elem, l2, mask,
&data[0], &result[i]);
}
// __shared__ scalar_t step_success[32];
// __shared__ scalar_t step_success_w1[32];
assert(blockDim.x >= l1);
bool wolfe_1 = false;
bool wolfe = false;
bool condition = threadIdx.x < l1;
if (condition) {
scalar_t alpha_list_elem = alpha_list[threadIdx.x];
// scalar_t alpha_list_elem = alpha_list[batch*l1 + threadIdx.x];
// condition 1:
wolfe_1 = c[batch * l1 + threadIdx.x] <=
(c[batch * l1] + c_1 * alpha_list_elem * result[0]);
// condition 2:
bool wolfe_2;
if (strong_wolfe) {
wolfe_2 = abs(result[threadIdx.x]) <= c_2 * abs(result[0]);
} else {
wolfe_2 = result[threadIdx.x] >= c_2 * result[0];
}
// wolfe = torch.logical_and(wolfe_1, wolfe_2)
wolfe = wolfe_1 & wolfe_2;
// // step_success = wolfe * (self.alpha_list[:, :, 0:1] + 0.1)
// // step_success_w1 = wolfe_1 * (self.alpha_list[:, :, 0:1] + 0.1)
// step_success[threadIdx.x] = wolfe * (alpha_list_elem + 0.1);
// step_success_w1[threadIdx.x] = wolfe_1 * (alpha_list_elem + 0.1);
}
unsigned msk1 = __ballot_sync(FULL_MASK, wolfe_1 & condition);
unsigned msk = __ballot_sync(FULL_MASK, wolfe & condition);
// get the index of the last occurance of true
unsigned msk1_brev = __brev(msk1);
unsigned msk_brev = __brev(msk);
int id1 = 32 - __ffs(msk1_brev); // position of least signficant bit set to 1
int id = 32 - __ffs(msk_brev); // position of least signficant bit set to 1
__syncthreads();
__shared__ int idx_shared;
if (threadIdx.x == 0) {
if (!approx_wolfe) {
if (id == 32) { // msk is zero
id = id1;
}
if (id == 0) { // bit 0 is set
id = id1;
}
if (id == 32) { // msk is zero
id = 1;
}
if (id == 0) {
id = 1;
}
} else {
if (id == 32) { // msk is zero
id = 0;
}
}
// // _, m_idx = torch.max(step_success, dim=-2)
// // _, m1_idx = torch.max(step_success_w1, dim=-2)
// int m_id = 0;
// int m1_id = 0;
// scalar_t max1 = step_success[0];
// scalar_t max2 = step_success_w1[0];
// for (int i=1; i<l1; i++) {
// if (max1<step_success[i]) {
// max1 = step_success[i];
// m_id = i;
// }
// if (max2<step_success_w1[i]) {
// max2 = step_success_w1[i];
// m1_id = i;
// }
// }
// // m_idx = torch.where(m_idx == 0, m1_idx, m_idx)
// if (m_id == 0) {
// m_id = m1_id;
// }
// // m_idx[m_idx == 0] = 1
// if (m_id == 0) {
// m_id = 1;
// }
// if (id != m_id) {
// printf("id=%d, m_id=%d\n", id, m_id);
// printf("msk1=%x, msk=%x, raw id1=%d, raw id=%d\n", msk1, msk,
// 32-__ffs(msk1_brev), 32-__ffs(msk_brev));
// }
// m_idx[batch] = m_id;
// m_idx[batch] = id;
idx_shared = id + c_idx[batch];
}
////////////////////////////////////
// write outputs using the computed index.
// one index per batch is computed
////////////////////////////////////
__syncthreads();
if (threadIdx.x < l2) {
if (threadIdx.x == 0) {
// printf("block: %d, idx_shared: %d\n", batch, idx_shared);
}
best_x[batch * l2 + threadIdx.x] = x_set[idx_shared * l2 + threadIdx.x];
best_grad[batch * l2 + threadIdx.x] = g_x[idx_shared * l2 + threadIdx.x];
}
if (threadIdx.x == 0) {
best_c[batch] = c[idx_shared];
}
}
} // namespace Optimization
} // namespace Curobo
std::vector<torch::Tensor> line_search_cuda(
// torch::Tensor m_idx,
torch::Tensor best_x, torch::Tensor best_c, torch::Tensor best_grad,
const torch::Tensor g_x, const torch::Tensor x_set,
const torch::Tensor step_vec, const torch::Tensor c_0,
const torch::Tensor alpha_list, const torch::Tensor c_idx, const float c_1,
const float c_2, const bool strong_wolfe, const bool approx_wolfe,
const int l1, const int l2, const int batchsize) {
using namespace Curobo::Optimization;
assert(l2 <= 1024);
// multiple of 32
const int threadsPerBlock = 32 * ((l2 + 31) / 32); // l2;
const int blocksPerGrid = batchsize;
const int blocksPerGrid = batchsize;
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(
g_x.scalar_type(), "line_search_cu", ([&] {
line_search_kernel_mask<scalar_t, scalar_t>
<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(
// m_idx.data_ptr<int>(),
best_x.data_ptr<scalar_t>(), best_c.data_ptr<scalar_t>(),
best_grad.data_ptr<scalar_t>(), g_x.data_ptr<scalar_t>(),
x_set.data_ptr<scalar_t>(), step_vec.data_ptr<scalar_t>(),
c_0.data_ptr<scalar_t>(), alpha_list.data_ptr<scalar_t>(),
c_idx.data_ptr<int64_t>(), c_1, c_2, strong_wolfe, approx_wolfe,
l1, l2, batchsize);
}));
g_x.scalar_type(), "line_search_cu", ([&] {
line_search_kernel_mask<scalar_t, scalar_t>
<< < blocksPerGrid, threadsPerBlock, 0, stream >> > (
// m_idx.data_ptr<int>(),
best_x.data_ptr<scalar_t>(), best_c.data_ptr<scalar_t>(),
best_grad.data_ptr<scalar_t>(), g_x.data_ptr<scalar_t>(),
x_set.data_ptr<scalar_t>(), step_vec.data_ptr<scalar_t>(),
c_0.data_ptr<scalar_t>(), alpha_list.data_ptr<scalar_t>(),
c_idx.data_ptr<int64_t>(), c_1, c_2, strong_wolfe, approx_wolfe,
l1, l2, batchsize);
}));
C10_CUDA_KERNEL_LAUNCH_CHECK();
return {best_x, best_c, best_grad};
}
return { best_x, best_c, best_grad };
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -13,72 +13,120 @@
#include <c10/cuda/CUDAGuard.h>
#include <vector>
std::vector<torch::Tensor> step_position_clique(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof);
std::vector<torch::Tensor> step_position_clique2(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof, const int mode);
std::vector<torch::Tensor> step_position_clique2_idx(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof, const int mode);
std::vector<torch::Tensor>step_position_clique(
torch::Tensor out_position,
torch::Tensor out_velocity,
torch::Tensor out_acceleration,
torch::Tensor out_jerk,
const torch::Tensor u_position,
const torch::Tensor start_position,
const torch::Tensor start_velocity,
const torch::Tensor start_acceleration,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof);
std::vector<torch::Tensor>step_position_clique2(
torch::Tensor out_position,
torch::Tensor out_velocity,
torch::Tensor out_acceleration,
torch::Tensor out_jerk,
const torch::Tensor u_position,
const torch::Tensor start_position,
const torch::Tensor start_velocity,
const torch::Tensor start_acceleration,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof,
const int mode);
std::vector<torch::Tensor>step_position_clique2_idx(
torch::Tensor out_position,
torch::Tensor out_velocity,
torch::Tensor out_acceleration,
torch::Tensor out_jerk,
const torch::Tensor u_position,
const torch::Tensor start_position,
const torch::Tensor start_velocity,
const torch::Tensor start_acceleration,
const torch::Tensor start_idx,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof,
const int mode);
std::vector<torch::Tensor> backward_step_position_clique(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof);
std::vector<torch::Tensor> backward_step_position_clique2(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof, const int mode);
std::vector<torch::Tensor>backward_step_position_clique(
torch::Tensor out_grad_position,
const torch::Tensor grad_position,
const torch::Tensor grad_velocity,
const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof);
std::vector<torch::Tensor>backward_step_position_clique2(
torch::Tensor out_grad_position,
const torch::Tensor grad_position,
const torch::Tensor grad_velocity,
const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof,
const int mode);
std::vector<torch::Tensor>
step_acceleration(torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
step_acceleration(torch::Tensor out_position,
torch::Tensor out_velocity,
torch::Tensor out_acceleration,
torch::Tensor out_jerk,
const torch::Tensor u_acc,
const torch::Tensor start_position,
const torch::Tensor start_velocity,
const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size,
const int horizon, const int dof, const bool use_rk2 = true);
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof,
const bool use_rk2 = true);
std::vector<torch::Tensor>step_acceleration_idx(
torch::Tensor out_position,
torch::Tensor out_velocity,
torch::Tensor out_acceleration,
torch::Tensor out_jerk,
const torch::Tensor u_acc,
const torch::Tensor start_position,
const torch::Tensor start_velocity,
const torch::Tensor start_acceleration,
const torch::Tensor start_idx,
const torch::Tensor traj_dt,
const int batch_size,
const int horizon,
const int dof,
const bool use_rk2 = true);
std::vector<torch::Tensor> step_acceleration_idx(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const bool use_rk2 = true);
// NOTE: AT_ASSERT has become AT_CHECK on master after 0.4.
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
#define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), # x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERTM(x.is_contiguous(), # x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
std::vector<torch::Tensor> step_position_clique_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof) {
std::vector<torch::Tensor>step_position_clique_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof)
{
const at::cuda::OptionalCUDAGuard guard(u_position.device());
assert(false); // not supported
CHECK_INPUT(u_position);
CHECK_INPUT(out_position);
@@ -96,14 +144,15 @@ std::vector<torch::Tensor> step_position_clique_wrapper(
batch_size, horizon, dof);
}
std::vector<torch::Tensor> step_position_clique2_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof,
const int mode) {
std::vector<torch::Tensor>step_position_clique2_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof,
const int mode)
{
const at::cuda::OptionalCUDAGuard guard(u_position.device());
CHECK_INPUT(u_position);
@@ -122,14 +171,15 @@ std::vector<torch::Tensor> step_position_clique2_wrapper(
batch_size, horizon, dof, mode);
}
std::vector<torch::Tensor> step_position_clique2_idx_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const int mode) {
std::vector<torch::Tensor>step_position_clique2_idx_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_position, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const int mode)
{
const at::cuda::OptionalCUDAGuard guard(u_position.device());
CHECK_INPUT(u_position);
@@ -144,17 +194,19 @@ std::vector<torch::Tensor> step_position_clique2_idx_wrapper(
CHECK_INPUT(start_idx);
return step_position_clique2_idx(
out_position, out_velocity, out_acceleration, out_jerk, u_position,
start_position, start_velocity, start_acceleration, start_idx, traj_dt,
batch_size, horizon, dof, mode);
out_position, out_velocity, out_acceleration, out_jerk, u_position,
start_position, start_velocity, start_acceleration, start_idx, traj_dt,
batch_size, horizon, dof, mode);
}
std::vector<torch::Tensor> backward_step_position_clique_wrapper(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof) {
std::vector<torch::Tensor>backward_step_position_clique_wrapper(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof)
{
const at::cuda::OptionalCUDAGuard guard(grad_position.device());
assert(false); // not supported
CHECK_INPUT(out_grad_position);
CHECK_INPUT(grad_position);
@@ -164,15 +216,17 @@ std::vector<torch::Tensor> backward_step_position_clique_wrapper(
CHECK_INPUT(traj_dt);
return backward_step_position_clique(
out_grad_position, grad_position, grad_velocity, grad_acceleration,
grad_jerk, traj_dt, batch_size, horizon, dof);
out_grad_position, grad_position, grad_velocity, grad_acceleration,
grad_jerk, traj_dt, batch_size, horizon, dof);
}
std::vector<torch::Tensor> backward_step_position_clique2_wrapper(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const int mode) {
std::vector<torch::Tensor>backward_step_position_clique2_wrapper(
torch::Tensor out_grad_position, const torch::Tensor grad_position,
const torch::Tensor grad_velocity, const torch::Tensor grad_acceleration,
const torch::Tensor grad_jerk, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const int mode)
{
const at::cuda::OptionalCUDAGuard guard(grad_position.device());
CHECK_INPUT(out_grad_position);
@@ -183,17 +237,18 @@ std::vector<torch::Tensor> backward_step_position_clique2_wrapper(
CHECK_INPUT(traj_dt);
return backward_step_position_clique2(
out_grad_position, grad_position, grad_velocity, grad_acceleration,
grad_jerk, traj_dt, batch_size, horizon, dof, mode);
out_grad_position, grad_position, grad_velocity, grad_acceleration,
grad_jerk, traj_dt, batch_size, horizon, dof, mode);
}
std::vector<torch::Tensor> step_acceleration_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof, const bool use_rk2 = true) {
std::vector<torch::Tensor>step_acceleration_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor traj_dt, const int batch_size, const int horizon,
const int dof, const bool use_rk2 = true)
{
const at::cuda::OptionalCUDAGuard guard(u_acc.device());
CHECK_INPUT(u_acc);
@@ -212,14 +267,15 @@ std::vector<torch::Tensor> step_acceleration_wrapper(
dof, use_rk2);
}
std::vector<torch::Tensor> step_acceleration_idx_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const bool use_rk2 = true) {
std::vector<torch::Tensor>step_acceleration_idx_wrapper(
torch::Tensor out_position, torch::Tensor out_velocity,
torch::Tensor out_acceleration, torch::Tensor out_jerk,
const torch::Tensor u_acc, const torch::Tensor start_position,
const torch::Tensor start_velocity, const torch::Tensor start_acceleration,
const torch::Tensor start_idx, const torch::Tensor traj_dt,
const int batch_size, const int horizon, const int dof,
const bool use_rk2 = true)
{
const at::cuda::OptionalCUDAGuard guard(u_acc.device());
CHECK_INPUT(u_acc);
@@ -239,19 +295,20 @@ std::vector<torch::Tensor> step_acceleration_idx_wrapper(
batch_size, horizon, dof, use_rk2);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("step_position", &step_position_clique_wrapper,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("step_position", &step_position_clique_wrapper,
"Tensor Step Position (curobolib)");
m.def("step_position2", &step_position_clique2_wrapper,
m.def("step_position2", &step_position_clique2_wrapper,
"Tensor Step Position (curobolib)");
m.def("step_idx_position2", &step_position_clique2_idx_wrapper,
m.def("step_idx_position2", &step_position_clique2_idx_wrapper,
"Tensor Step Position (curobolib)");
m.def("step_position_backward", &backward_step_position_clique_wrapper,
m.def("step_position_backward", &backward_step_position_clique_wrapper,
"Tensor Step Position (curobolib)");
m.def("step_position_backward2", &backward_step_position_clique2_wrapper,
"Tensor Step Position (curobolib)");
m.def("step_acceleration", &step_acceleration_wrapper,
m.def("step_acceleration", &step_acceleration_wrapper,
"Tensor Step Acceleration (curobolib)");
m.def("step_acceleration_idx", &step_acceleration_idx_wrapper,
m.def("step_acceleration_idx", &step_acceleration_idx_wrapper,
"Tensor Step Acceleration (curobolib)");
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -26,101 +26,108 @@
#include <cub/cub.cuh>
#include <math.h>
namespace Curobo {
namespace Optimization {
// We launch with d_opt*cost_s1 threads.
// We assume that cost_s2 is always 1.
template <typename scalar_t>
__global__ void update_best_kernel(scalar_t *best_cost, // 200x1
scalar_t *best_q, // 200x7
int16_t *best_iteration, // 200 x 1
int16_t *current_iteration, // 1
const scalar_t *cost, // 200x1
const scalar_t *q, // 200x7
const int d_opt, // 7
const int cost_s1, // 200
const int cost_s2,
const int iteration,
const float delta_threshold,
const float relative_threshold) // 1
namespace Curobo
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int size = cost_s1 * d_opt; // size of best_q
if (tid >= size) {
return;
}
const int cost_idx = tid / d_opt;
const float cost_new = cost[cost_idx];
const float best_cost_in = best_cost[cost_idx];
const bool change = (best_cost_in - cost_new) > delta_threshold && cost_new < best_cost_in * relative_threshold;
if (change) {
best_q[tid] = q[tid]; // update best_q
if (tid % d_opt == 0) {
best_cost[cost_idx] = cost_new ; // update best_cost
//best_iteration[cost_idx] = curr_iter + iteration; //
// this tensor keeps track of whether the cost reduced by at least
// delta_threshold.
// here iteration is the last_best parameter.
}
}
if (tid % d_opt == 0)
namespace Optimization
{
if (change)
// We launch with d_opt*cost_s1 threads.
// We assume that cost_s2 is always 1.
template<typename scalar_t>
__global__ void update_best_kernel(scalar_t *best_cost, // 200x1
scalar_t *best_q, // 200x7
int16_t *best_iteration, // 200 x 1
int16_t *current_iteration, // 1
const scalar_t *cost, // 200x1
const scalar_t *q, // 200x7
const int d_opt, // 7
const int cost_s1, // 200
const int cost_s2,
const int iteration,
const float delta_threshold,
const float relative_threshold) // 1
{
best_iteration[cost_idx] = 0;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int size = cost_s1 * d_opt; // size of best_q
if (tid >= size)
{
return;
}
const int cost_idx = tid / d_opt;
const float cost_new = cost[cost_idx];
const float best_cost_in = best_cost[cost_idx];
const bool change = (best_cost_in - cost_new) > delta_threshold &&
cost_new < best_cost_in * relative_threshold;
if (change)
{
best_q[tid] = q[tid]; // update best_q
if (tid % d_opt == 0)
{
best_cost[cost_idx] = cost_new; // update best_cost
// best_iteration[cost_idx] = curr_iter + iteration; //
// this tensor keeps track of whether the cost reduced by at least
// delta_threshold.
// here iteration is the last_best parameter.
}
}
if (tid % d_opt == 0)
{
if (change)
{
best_iteration[cost_idx] = 0;
}
else
{
best_iteration[cost_idx] -= 1;
}
}
// .if (tid == 0)
// {
// curr_iter += 1;
// current_iteration[0] = curr_iter;
// }
}
else
{
best_iteration[cost_idx] -= 1;
}
}
//.if (tid == 0)
//{
// curr_iter += 1;
// current_iteration[0] = curr_iter;
//}
}
} // namespace Optimization
} // namespace Curobo
} // namespace Optimization
} // namespace Curobo
std::vector<torch::Tensor>
update_best_cuda(torch::Tensor best_cost, torch::Tensor best_q,
torch::Tensor best_iteration,
torch::Tensor current_iteration,
const torch::Tensor cost,
const torch::Tensor cost,
const torch::Tensor q, const int d_opt, const int cost_s1,
const int cost_s2, const int iteration,
const float delta_threshold,
const float relative_threshold = 0.999) {
const float relative_threshold = 0.999)
{
using namespace Curobo::Optimization;
const int threadsPerBlock = 128;
const int cost_size = cost_s1 * d_opt;
const int cost_size = cost_s1 * d_opt;
assert(cost_s2 == 1); // assumption
const int blocksPerGrid = (cost_size + threadsPerBlock - 1) / threadsPerBlock;
// printf("cost_s1=%d, d_opt=%d, blocksPerGrid=%d\n", cost_s1, d_opt,
// blocksPerGrid);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(
cost.scalar_type(), "update_best_cu", ([&] {
update_best_kernel<scalar_t>
<<<blocksPerGrid, threadsPerBlock, 0, stream>>>(
best_cost.data_ptr<scalar_t>(), best_q.data_ptr<scalar_t>(),
best_iteration.data_ptr<int16_t>(),
current_iteration.data_ptr<int16_t>(),
cost.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(), d_opt, cost_s1, cost_s2, iteration,
delta_threshold, relative_threshold);
}));
cost.scalar_type(), "update_best_cu", ([&] {
update_best_kernel<scalar_t>
<< < blocksPerGrid, threadsPerBlock, 0, stream >> > (
best_cost.data_ptr<scalar_t>(), best_q.data_ptr<scalar_t>(),
best_iteration.data_ptr<int16_t>(),
current_iteration.data_ptr<int16_t>(),
cost.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(), d_opt, cost_s1, cost_s2, iteration,
delta_threshold, relative_threshold);
}));
C10_CUDA_KERNEL_LAUNCH_CHECK();
return {best_cost, best_q, best_iteration};
}
return { best_cost, best_q, best_iteration };
}

View File

@@ -153,6 +153,8 @@ def get_pose_distance(
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
batch_size,
horizon,
@@ -161,6 +163,7 @@ def get_pose_distance(
write_grad=False,
write_distance=False,
use_metric=False,
project_distance=True,
):
if batch_pose_idx.shape[0] != batch_size:
raise ValueError("Index buffer size is different from batch size")
@@ -181,6 +184,8 @@ def get_pose_distance(
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
batch_size,
horizon,
@@ -189,6 +194,7 @@ def get_pose_distance(
write_grad,
write_distance,
use_metric,
project_distance,
)
out_distance = r[0]
@@ -229,6 +235,331 @@ def get_pose_distance_backward(
return r[0], r[1]
@torch.jit.script
def backward_PoseError_jit(grad_g_dist, grad_out_distance, weight, g_vec):
grad_vec = grad_g_dist + (grad_out_distance * weight)
grad = 1.0 * (grad_vec).unsqueeze(-1) * g_vec
return grad
# full method:
@torch.jit.script
def backward_full_PoseError_jit(
grad_out_distance, grad_g_dist, grad_r_err, p_w, q_w, g_vec_p, g_vec_q
):
p_grad = (grad_g_dist + (grad_out_distance * p_w)).unsqueeze(-1) * g_vec_p
q_grad = (grad_r_err + (grad_out_distance * q_w)).unsqueeze(-1) * g_vec_q
# p_grad = ((grad_out_distance * p_w)).unsqueeze(-1) * g_vec_p
# q_grad = ((grad_out_distance * q_w)).unsqueeze(-1) * g_vec_q
return p_grad, q_grad
class PoseErrorDistance(torch.autograd.Function):
@staticmethod
def forward(
ctx,
current_position,
goal_position,
current_quat,
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
out_p_grad,
out_q_grad,
batch_size,
horizon,
mode, # =PoseErrorType.BATCH_GOAL.value,
num_goals,
use_metric, # =False,
project_distance, # =True,
):
# out_distance = current_position[..., 0].detach().clone() * 0.0
# out_position_distance = out_distance.detach().clone()
# out_rotation_distance = out_distance.detach().clone()
# out_vec = (
# torch.cat((current_position.detach().clone(), current_quat.detach().clone()), dim=-1)
# * 0.0
# )
# out_idx = out_distance.clone().to(dtype=torch.long)
(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
) = get_pose_distance(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
current_position.contiguous(),
goal_position,
current_quat.contiguous(),
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
batch_size,
horizon,
mode,
num_goals,
current_position.requires_grad,
True,
use_metric,
project_distance,
)
ctx.save_for_backward(out_p_vec, out_r_vec, weight, out_p_grad, out_q_grad)
return out_distance, out_position_distance, out_rotation_distance, out_idx # .view(-1,1)
@staticmethod
def backward(ctx, grad_out_distance, grad_g_dist, grad_r_err, grad_out_idx):
(g_vec_p, g_vec_q, weight, out_grad_p, out_grad_q) = ctx.saved_tensors
pos_grad = None
quat_grad = None
batch_size = g_vec_p.shape[0] * g_vec_p.shape[1]
if ctx.needs_input_grad[0] and ctx.needs_input_grad[2]:
pos_grad, quat_grad = get_pose_distance_backward(
out_grad_p,
out_grad_q,
grad_out_distance.contiguous(),
grad_g_dist.contiguous(),
grad_r_err.contiguous(),
weight,
g_vec_p,
g_vec_q,
batch_size,
use_distance=True,
)
elif ctx.needs_input_grad[0]:
pos_grad = backward_PoseError_jit(grad_g_dist, grad_out_distance, weight[1], g_vec_p)
elif ctx.needs_input_grad[2]:
quat_grad = backward_PoseError_jit(grad_r_err, grad_out_distance, weight[0], g_vec_q)
return (
pos_grad,
None,
quat_grad,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
class PoseError(torch.autograd.Function):
@staticmethod
def forward(
ctx,
current_position: torch.Tensor,
goal_position: torch.Tensor,
current_quat: torch.Tensor,
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
out_p_grad,
out_q_grad,
batch_size,
horizon,
mode,
num_goals,
use_metric,
project_distance,
return_loss,
):
"""Compute error in pose
_extended_summary_
Args:
ctx: _description_
current_position: _description_
goal_position: _description_
current_quat: _description_
goal_quat: _description_
vec_weight: _description_
weight: _description_
vec_convergence: _description_
run_weight: _description_
run_vec_weight: _description_
offset_waypoint: _description_
offset_tstep_fraction: _description_
batch_pose_idx: _description_
out_distance: _description_
out_position_distance: _description_
out_rotation_distance: _description_
out_p_vec: _description_
out_r_vec: _description_
out_idx: _description_
out_p_grad: _description_
out_q_grad: _description_
batch_size: _description_
horizon: _description_
mode: _description_
num_goals: _description_
use_metric: _description_
project_distance: _description_
return_loss: _description_
Returns:
_description_
"""
# out_distance = current_position[..., 0].detach().clone() * 0.0
# out_position_distance = out_distance.detach().clone()
# out_rotation_distance = out_distance.detach().clone()
# out_vec = (
# torch.cat((current_position.detach().clone(), current_quat.detach().clone()), dim=-1)
# * 0.0
# )
# out_idx = out_distance.clone().to(dtype=torch.long)
ctx.return_loss = return_loss
(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
) = get_pose_distance(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
current_position.contiguous(),
goal_position,
current_quat.contiguous(),
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
offset_waypoint,
offset_tstep_fraction,
batch_pose_idx,
batch_size,
horizon,
mode,
num_goals,
current_position.requires_grad,
False,
use_metric,
project_distance,
)
ctx.save_for_backward(out_p_vec, out_r_vec)
return out_distance
@staticmethod
def backward(ctx, grad_out_distance): # , grad_g_dist, grad_r_err, grad_out_idx):
pos_grad = None
quat_grad = None
if ctx.needs_input_grad[0] and ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p
quat_grad = g_vec_q
if ctx.return_loss:
pos_grad = pos_grad * grad_out_distance.unsqueeze(1)
quat_grad = quat_grad * grad_out_distance.unsqueeze(1)
elif ctx.needs_input_grad[0]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p
if ctx.return_loss:
pos_grad = pos_grad * grad_out_distance.unsqueeze(1)
elif ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
quat_grad = g_vec_q
if ctx.return_loss:
quat_grad = quat_grad * grad_out_distance.unsqueeze(1)
return (
pos_grad,
None,
quat_grad,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
class SdfSphereOBB(torch.autograd.Function):
@staticmethod
def forward(

118
src/curobo/geom/cv.py Normal file
View File

@@ -0,0 +1,118 @@
#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Third Party
import torch
@torch.jit.script
def project_depth_to_pointcloud(depth_image: torch.Tensor, intrinsics_matrix: torch.Tensor):
"""Projects numpy depth image to point cloud.
Args:
np_depth_image: numpy array float, shape (h, w).
intrinsics array: numpy array float, 3x3 intrinsics matrix.
Returns:
array of float (h, w, 3)
"""
fx = intrinsics_matrix[0, 0]
fy = intrinsics_matrix[1, 1]
cx = intrinsics_matrix[0, 2]
cy = intrinsics_matrix[1, 2]
height = depth_image.shape[0]
width = depth_image.shape[1]
input_x = torch.arange(width, dtype=torch.float32, device=depth_image.device)
input_y = torch.arange(height, dtype=torch.float32, device=depth_image.device)
input_x, input_y = torch.meshgrid(input_x, input_y, indexing="ij")
input_x, input_y = input_x.T, input_y.T
input_z = depth_image
output_x = (input_x * input_z - cx * input_z) / fx
output_y = (input_y * input_z - cy * input_z) / fy
raw_pc = torch.stack([output_x, output_y, input_z], -1)
return raw_pc
@torch.jit.script
def get_projection_rays(height: int, width: int, intrinsics_matrix: torch.Tensor):
"""Projects numpy depth image to point cloud.
Args:
np_depth_image: numpy array float, shape (h, w).
intrinsics array: numpy array float, 3x3 intrinsics matrix.
Returns:
array of float (h, w, 3)
"""
fx = intrinsics_matrix[:, 0, 0]
fy = intrinsics_matrix[:, 1, 1]
cx = intrinsics_matrix[:, 0, 2]
cy = intrinsics_matrix[:, 1, 2]
input_x = torch.arange(width, dtype=torch.float32, device=intrinsics_matrix.device)
input_y = torch.arange(height, dtype=torch.float32, device=intrinsics_matrix.device)
input_x, input_y = torch.meshgrid(input_x, input_y, indexing="ij")
input_x, input_y = input_x.T, input_y.T
input_x = input_x.unsqueeze(0).repeat(intrinsics_matrix.shape[0], 1, 1)
input_y = input_y.unsqueeze(0).repeat(intrinsics_matrix.shape[0], 1, 1)
input_z = torch.ones(
(intrinsics_matrix.shape[0], height, width),
device=intrinsics_matrix.device,
dtype=torch.float32,
)
output_x = (input_x - cx) / fx
output_y = (input_y - cy) / fy
rays = torch.stack([output_x, output_y, input_z], -1).reshape(
intrinsics_matrix.shape[0], width * height, 3
)
rays = rays * (1.0 / 1000.0)
return rays
@torch.jit.script
def project_pointcloud_to_depth(
pointcloud: torch.Tensor,
output_image: torch.Tensor,
):
"""Projects pointcloud to depth image
Args:
np_depth_image: numpy array float, shape (h, w).
intrinsics array: numpy array float, 3x3 intrinsics matrix.
Returns:
array of float (h, w)
"""
width, height = output_image.shape
output_image = output_image.view(-1)
output_image[:] = pointcloud[:, 2]
output_image = output_image.view(width, height)
return output_image
@torch.jit.script
def project_depth_using_rays(
depth_image: torch.Tensor, rays: torch.Tensor, filter_origin: bool = False
):
if filter_origin:
depth_image = torch.where(depth_image < 0.01, 0, depth_image)
depth_image = depth_image.view(depth_image.shape[0], -1, 1).contiguous()
points = depth_image * rays
return points

View File

@@ -364,8 +364,12 @@ class WorldPrimitiveCollision(WorldCollision):
if self.cache is not None and "obb" in self.cache and self.cache["obb"] not in [None, 0]:
self._create_obb_cache(self.cache["obb"])
def load_collision_model(self, world_config: WorldConfig, env_idx=0):
self._load_collision_model_in_cache(world_config, env_idx)
def load_collision_model(
self, world_config: WorldConfig, env_idx=0, fix_cache_reference: bool = False
):
self._load_collision_model_in_cache(
world_config, env_idx, fix_cache_reference=fix_cache_reference
)
def load_batch_collision_model(self, world_config_list: List[WorldConfig]):
"""Load a batch of collision environments from a list of world configs.
@@ -436,7 +440,9 @@ class WorldPrimitiveCollision(WorldCollision):
)
self.collision_types["primitive"] = True
def _load_collision_model_in_cache(self, world_config: WorldConfig, env_idx: int = 0):
def _load_collision_model_in_cache(
self, world_config: WorldConfig, env_idx: int = 0, fix_cache_reference: bool = False
):
cube_objs = world_config.cuboid
max_obb = len(cube_objs)
self.world_model = world_config
@@ -444,8 +450,11 @@ class WorldPrimitiveCollision(WorldCollision):
log_info("No OBB objs")
return
if self._cube_tensor_list is None or self._cube_tensor_list[0].shape[1] < max_obb:
log_info("Creating Obb cache" + str(max_obb))
self._create_obb_cache(max_obb)
if not fix_cache_reference:
log_info("Creating Obb cache" + str(max_obb))
self._create_obb_cache(max_obb)
else:
log_error("number of OBB is larger than collision cache, create larger cache.")
# load as a batch:
pose_batch = [c.pose for c in cube_objs]
@@ -835,7 +844,9 @@ class WorldPrimitiveCollision(WorldCollision):
return dist
def clear_cache(self):
pass
if self._cube_tensor_list is not None:
self._cube_tensor_list[2][:] = 1
self._env_n_obbs[:] = 0
def get_voxels_in_bounding_box(
self,

View File

@@ -55,7 +55,7 @@ class WorldBloxCollision(WorldMeshCollision):
self._blox_voxel_sizes = [0.02]
super().__init__(config)
def load_collision_model(self, world_model: WorldConfig):
def load_collision_model(self, world_model: WorldConfig, fix_cache_reference: bool = False):
# load nvblox mesh
if len(world_model.blox) > 0:
# check if there is a mapper instance:
@@ -109,15 +109,17 @@ class WorldBloxCollision(WorldMeshCollision):
self._blox_names = names
self.collision_types["blox"] = True
return super().load_collision_model(world_model)
return super().load_collision_model(world_model, fix_cache_reference=fix_cache_reference)
def clear_cache(self):
self._blox_mapper.clear()
self._blox_mapper.update_hashmaps()
super().clear_cache()
def clear_blox_layer(self, layer_name: str):
index = self._blox_names.index(layer_name)
self._blox_mapper.clear(index)
self._blox_mapper.update_hashmaps()
def _get_blox_sdf(
self,
@@ -147,6 +149,7 @@ class WorldBloxCollision(WorldMeshCollision):
speed_dt,
sweep_steps,
enable_speed_metric,
return_loss=False,
):
d = self._blox_mapper.query_sphere_trajectory_sdf_cost(
query_spheres,
@@ -160,6 +163,8 @@ class WorldBloxCollision(WorldMeshCollision):
self._blox_tensor_list[1],
sweep_steps,
enable_speed_metric,
return_loss,
use_experimental=False,
)
return d
@@ -279,6 +284,7 @@ class WorldBloxCollision(WorldMeshCollision):
speed_dt=speed_dt,
sweep_steps=sweep_steps,
enable_speed_metric=enable_speed_metric,
return_loss=return_loss,
)
if ("primitive" not in self.collision_types or not self.collision_types["primitive"]) and (
@@ -332,6 +338,7 @@ class WorldBloxCollision(WorldMeshCollision):
speed_dt=speed_dt,
sweep_steps=sweep_steps,
enable_speed_metric=enable_speed_metric,
return_loss=return_loss,
)
if ("primitive" not in self.collision_types or not self.collision_types["primitive"]) and (

View File

@@ -72,7 +72,11 @@ class WorldMeshCollision(WorldPrimitiveCollision):
return super()._init_cache()
def load_collision_model(
self, world_model: WorldConfig, env_idx: int = 0, load_obb_obs: bool = True
self,
world_model: WorldConfig,
env_idx: int = 0,
load_obb_obs: bool = True,
fix_cache_reference: bool = False,
):
max_nmesh = len(world_model.mesh)
if max_nmesh > 0:
@@ -91,14 +95,16 @@ class WorldMeshCollision(WorldPrimitiveCollision):
self.collision_types["mesh"] = True
if load_obb_obs:
super().load_collision_model(world_model, env_idx)
super().load_collision_model(
world_model, env_idx, fix_cache_reference=fix_cache_reference
)
else:
self.world_model = world_model
def load_batch_collision_model(self, world_config_list: List[WorldConfig]):
max_nmesh = max([len(x.mesh) for x in world_config_list])
if self._mesh_tensor_list is None or self._mesh_tensor_list[0].shape[1] < max_nmesh:
log_info("Creating new Mesh cache: " + str(max_nmesh))
log_warn("Creating new Mesh cache: " + str(max_nmesh))
self._create_mesh_cache(max_nmesh)
for env_idx, world_model in enumerate(world_config_list):

View File

@@ -228,6 +228,61 @@ def pose_inverse(
return out_position, out_quaternion
@wp.kernel
def compute_pose_inverse(
position: wp.array(dtype=wp.vec3),
quat: wp.array(dtype=wp.vec4),
out_position: wp.array(dtype=wp.vec3),
out_quat: wp.array(dtype=wp.vec4),
): # b pose_1 and b pose_2, compute pose_1 * pose_2
b_idx = wp.tid()
# read data:
in_position = position[b_idx]
in_quat = quat[b_idx]
# read point
# create a transform from a vector/quaternion:
t_1 = wp.transform(in_position, wp.quaternion(in_quat[1], in_quat[2], in_quat[3], in_quat[0]))
t_3 = wp.transform_inverse(t_1)
# write pt:
out_q = wp.transform_get_rotation(t_3)
out_v = wp.vec4()
out_v[0] = out_q[3] # out_q[3]
out_v[1] = out_q[0] # [0]
out_v[2] = out_q[1] # wp.extract(out_q, 1)
out_v[3] = out_q[2] # wp.extract(out_q, 2)
out_position[b_idx] = wp.transform_get_translation(t_3)
out_quat[b_idx] = out_v
@wp.kernel
def compute_matrix_to_quat(
in_mat: wp.array(dtype=wp.mat33),
out_quat: wp.array(dtype=wp.vec4),
):
# b pose_1 and b pose_2, compute pose_1 * pose_2
b_idx = wp.tid()
# read data:
in_m = in_mat[b_idx]
# read point
# create a transform from a vector/quaternion:
out_q = wp.quat_from_matrix(in_m)
out_v = wp.vec4()
out_v[0] = out_q[3] # wp.extract(out_q, 3)
out_v[1] = out_q[0] # wp.extract(out_q, 0)
out_v[2] = out_q[1] # wp.extract(out_q, 1)
out_v[3] = out_q[2] # wp.extract(out_q, 2)
# write pt:
out_quat[b_idx] = out_v
@wp.kernel
def compute_transform_point(
position: wp.array(dtype=wp.vec3),
@@ -331,37 +386,6 @@ def compute_batch_pose_multipy(
out_quat[b_idx] = out_v
@wp.kernel
def compute_pose_inverse(
position: wp.array(dtype=wp.vec3),
quat: wp.array(dtype=wp.vec4),
out_position: wp.array(dtype=wp.vec3),
out_quat: wp.array(dtype=wp.vec4),
): # b pose_1 and b pose_2, compute pose_1 * pose_2
b_idx = wp.tid()
# read data:
in_position = position[b_idx]
in_quat = quat[b_idx]
# read point
# create a transform from a vector/quaternion:
t_1 = wp.transform(in_position, wp.quaternion(in_quat[1], in_quat[2], in_quat[3], in_quat[0]))
t_3 = wp.transform_inverse(t_1)
# write pt:
out_q = wp.transform_get_rotation(t_3)
out_v = wp.vec4()
out_v[0] = out_q[3]
out_v[1] = out_q[0]
out_v[2] = out_q[1]
out_v[3] = out_q[2]
out_position[b_idx] = wp.transform_get_translation(t_3)
out_quat[b_idx] = out_v
@wp.kernel
def compute_quat_to_matrix(
quat: wp.array(dtype=wp.vec4),
@@ -382,30 +406,6 @@ def compute_quat_to_matrix(
out_mat[b_idx] = m_1
@wp.kernel
def compute_matrix_to_quat(
in_mat: wp.array(dtype=wp.mat33),
out_quat: wp.array(dtype=wp.vec4),
):
# b pose_1 and b pose_2, compute pose_1 * pose_2
b_idx = wp.tid()
# read data:
in_m = in_mat[b_idx]
# read point
# create a transform from a vector/quaternion:
out_q = wp.quat_from_matrix(in_m)
out_v = wp.vec4()
out_v[0] = out_q[3]
out_v[1] = out_q[0]
out_v[2] = out_q[1]
out_v[3] = out_q[2]
# write pt:
out_quat[b_idx] = out_v
@wp.kernel
def compute_pose_multipy(
position: wp.array(dtype=wp.vec3),
@@ -962,6 +962,7 @@ class PoseInverse(torch.autograd.Function):
adj_quaternion,
)
ctx.b = b
wp.launch(
kernel=compute_pose_inverse,
dim=b,
@@ -1071,6 +1072,7 @@ class QuatToMatrix(torch.autograd.Function):
adj_quaternion,
)
ctx.b = b
wp.launch(
kernel=compute_quat_to_matrix,
dim=b,
@@ -1153,6 +1155,7 @@ class MatrixToQuaternion(torch.autograd.Function):
adj_mat,
)
ctx.b = b
wp.launch(
kernel=compute_matrix_to_quat,
dim=b,

View File

@@ -12,7 +12,7 @@ from __future__ import annotations
# Standard Library
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence, Union
# Third Party
import numpy as np
@@ -22,6 +22,7 @@ import trimesh
# CuRobo
from curobo.geom.sphere_fit import SphereFitType, fit_spheres_to_mesh
from curobo.types.base import TensorDeviceType
from curobo.types.camera import CameraObservation
from curobo.types.math import Pose
from curobo.util.logger import log_error, log_warn
from curobo.util_file import get_assets_path, join_path
@@ -60,7 +61,7 @@ class Obstacle:
tensor_args: TensorDeviceType = TensorDeviceType()
def get_trimesh_mesh(self, process: bool = True) -> trimesh.Trimesh:
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True) -> trimesh.Trimesh:
"""Create a trimesh instance from the obstacle representation.
Args:
@@ -74,8 +75,11 @@ class Obstacle:
"""
raise NotImplementedError
def save_as_mesh(self, file_path: str):
def save_as_mesh(self, file_path: str, transform_with_pose: bool = False):
mesh_scene = self.get_trimesh_mesh()
if transform_with_pose:
mesh_scene.apply_transform(self.get_transform_matrix())
mesh_scene.export(file_path)
def get_cuboid(self) -> Cuboid:
@@ -238,7 +242,7 @@ class Cuboid(Obstacle):
if self.pose is None:
log_error("Cuboid Obstacle requires Pose")
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
m = trimesh.creation.box(extents=self.dims)
if self.color is not None:
color_visual = trimesh.visual.color.ColorVisuals(
@@ -254,7 +258,7 @@ class Capsule(Obstacle):
base: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
tip: List[float] = field(default_factory=lambda: [0.0, 0.0, 0.0])
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
height = self.tip[2] - self.base[2]
if (
height < 0
@@ -280,7 +284,7 @@ class Cylinder(Obstacle):
radius: float = 0.0
height: float = 0.0
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
m = trimesh.creation.cylinder(radius=self.radius, height=self.height)
if self.color is not None:
color_visual = trimesh.visual.color.ColorVisuals(
@@ -304,7 +308,7 @@ class Sphere(Obstacle):
if self.pose is not None:
self.position = self.pose[:3]
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
m = trimesh.creation.icosphere(radius=self.radius)
if self.color is not None:
color_visual = trimesh.visual.color.ColorVisuals(
@@ -356,9 +360,9 @@ class Sphere(Obstacle):
@dataclass
class Mesh(Obstacle):
file_path: Optional[str] = None
file_string: Optional[
str
] = None # storing full mesh as a string, loading from this is not implemented yet.
file_string: Optional[str] = (
None # storing full mesh as a string, loading from this is not implemented yet.
)
urdf_path: Optional[str] = None # useful for visualization in isaac gym.
vertices: Optional[List[List[float]]] = None
faces: Optional[List[int]] = None
@@ -375,13 +379,13 @@ class Mesh(Obstacle):
self.vertices = np.ravel(self.scale) * self.vertices
self.scale = None
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
# load mesh from filepath or verts and faces:
if self.file_path is not None:
m = trimesh.load(self.file_path, process=process, force="mesh")
if isinstance(m, trimesh.Scene):
m = m.dump(concatenate=True)
if isinstance(m.visual, trimesh.visual.texture.TextureVisuals):
if process_color and isinstance(m.visual, trimesh.visual.texture.TextureVisuals):
m.visual = m.visual.to_color()
else:
m = trimesh.Trimesh(
@@ -467,7 +471,7 @@ class BloxMap(Obstacle):
name=self.name + "_mesh", file_path=self.mesh_file_path, pose=self.pose
)
def get_trimesh_mesh(self, process: bool = True):
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
if self.mesh is not None:
return self.mesh.get_trimesh_mesh(process)
else:
@@ -475,6 +479,91 @@ class BloxMap(Obstacle):
return None
@dataclass
class PointCloud(Obstacle):
points: Union[torch.Tensor, np.ndarray, List[List[float]]] = None
points_features: Union[torch.Tensor, np.ndarray, List[List[float]], None] = None
def __post_init__(self):
if self.scale is not None and self.points is not None:
self.points = np.ravel(self.scale) * self.points
self.scale = None
def get_trimesh_mesh(self, process: bool = True, process_color: bool = True):
points = self.points
if isinstance(points, torch.Tensor):
points = points.view(-1, 3).cpu().numpy()
if isinstance(points, list):
points = np.ndarray(points)
mesh = Mesh.from_pointcloud(points, pose=self.pose)
return mesh.get_trimesh_mesh()
def get_mesh_data(self, process: bool = True):
verts = faces = None
m = self.get_trimesh_mesh(process=process)
verts = m.vertices.view(np.ndarray)
faces = m.faces
return verts, faces
@staticmethod
def from_camera_observation(
camera_obs: CameraObservation, name: str = "pc_obstacle", pose: Optional[List[float]] = None
):
return PointCloud(name=name, pose=pose, points=camera_obs.get_pointcloud())
def get_bounding_spheres(
self,
n_spheres: int = 1,
surface_sphere_radius: float = 0.002,
fit_type: SphereFitType = SphereFitType.VOXEL_VOLUME_SAMPLE_SURFACE,
voxelize_method: str = "ray",
pre_transform_pose: Optional[Pose] = None,
tensor_args: TensorDeviceType = TensorDeviceType(),
) -> List[Sphere]:
"""Compute n spheres that fits in the volume of the object.
Args:
n: number of spheres
Returns:
spheres
"""
# sample points in pointcloud:
# mesh = self.get_trimesh_mesh()
# pts, n_radius = fit_spheres_to_mesh(
# mesh, n_spheres, surface_sphere_radius, fit_type, voxelize_method=voxelize_method
# )
obj_pose = Pose.from_list(self.pose, tensor_args)
# transform object:
# transform points:
if pre_transform_pose is not None:
obj_pose = pre_transform_pose.multiply(obj_pose) # convert object pose to another frame
if pts is None or len(pts) == 0:
log_warn("spheres could not be fit!, adding one sphere at origin")
pts = np.zeros((1, 3))
pts[0, :] = mesh.centroid
n_radius = [0.02]
obj_pose = Pose.from_list([0, 0, 0, 1, 0, 0, 0], tensor_args)
points_cuda = tensor_args.to_device(pts)
pts = obj_pose.transform_points(points_cuda).cpu().view(-1, 3).numpy()
new_spheres = [
Sphere(
name="sph_" + str(i),
pose=[pts[i, 0], pts[i, 1], pts[i, 2], 1, 0, 0, 0],
radius=n_radius[i],
)
for i in range(pts.shape[0])
]
return new_spheres
@dataclass
class WorldConfig(Sequence):
"""Representation of World for use in CuRobo."""
@@ -664,23 +753,25 @@ class WorldConfig(Sequence):
)
@staticmethod
def get_scene_graph(current_world: WorldConfig):
def get_scene_graph(current_world: WorldConfig, process_color: bool = True):
m_world = WorldConfig.create_mesh_world(current_world)
mesh_scene = trimesh.scene.scene.Scene(base_frame="world")
mesh_scene = trimesh.scene.scene.Scene(base_frame="world_origin")
mesh_list = m_world
for m in mesh_list:
mesh_scene.add_geometry(
m.get_trimesh_mesh(),
m.get_trimesh_mesh(process_color=process_color),
geom_name=m.name,
parent_node_name="world",
parent_node_name="world_origin",
transform=m.get_transform_matrix(),
)
return mesh_scene
@staticmethod
def create_merged_mesh_world(current_world: WorldConfig, process: bool = True):
mesh_scene = WorldConfig.get_scene_graph(current_world)
def create_merged_mesh_world(
current_world: WorldConfig, process: bool = True, process_color: bool = True
):
mesh_scene = WorldConfig.get_scene_graph(current_world, process_color=process_color)
mesh_scene = mesh_scene.dump(concatenate=True)
new_mesh = Mesh(
vertices=mesh_scene.vertices.view(np.ndarray),
@@ -702,8 +793,10 @@ class WorldConfig(Sequence):
def get_collision_check_world(self, mesh_process: bool = False):
return WorldConfig.create_collision_support_world(self, process=mesh_process)
def save_world_as_mesh(self, file_path: str, save_as_scene_graph=False):
mesh_scene = WorldConfig.get_scene_graph(self)
def save_world_as_mesh(
self, file_path: str, save_as_scene_graph=False, process_color: bool = True
):
mesh_scene = WorldConfig.get_scene_graph(self, process_color=process_color)
if save_as_scene_graph:
mesh_scene = mesh_scene.dump(concatenate=True)

View File

@@ -10,10 +10,19 @@
#
# Standard Library
import random
# Third Party
import networkx as nx
import numpy as np
import torch
# This is needed to get deterministic results from networkx.
# Note: it has to be set in global space.
np.random.seed(2)
random.seed(2)
class NetworkxGraph(object):
def __init__(self):

View File

@@ -8,3 +8,15 @@
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
""" Optimization module containing several numerical solvers.
Base for an opimization solver is at :class:`opt_base.Optimizer`. cuRobo provides two base classes
for implementing two popular ways to optimize, (1) using particles
with :class:`particle.particle_opt_base.ParticleOptBase` and (2) using Newton/Quasi-Newton solvers
with :class:`newton.newton_base.NewtonOptBase`. :class:`newton.newton_base.NewtonOptBase` contains
implementations of several line search schemes. Note that these line search schemes are approximate
as cuRobo tries different line search magnitudes in parallel and chooses the largest that satisfies
line search conditions.
"""

View File

@@ -10,5 +10,5 @@
#
"""
This module contains code for cuda accelerated kinematics.
This module contains Newton/Quasi-Newton solvers.
"""

View File

@@ -95,9 +95,9 @@ class LBFGSOpt(NewtonOptBase, LBFGSOptConfig):
self.step_q_buffer[:] = 0.0
return super().reset()
def update_nenvs(self, n_envs):
self.init_hessian(b=n_envs)
return super().update_nenvs(n_envs)
def update_nproblems(self, n_problems):
self.init_hessian(b=n_problems)
return super().update_nproblems(n_problems)
def init_hessian(self, b=1):
self.x_0 = torch.zeros(

View File

@@ -79,7 +79,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
self.outer_iters = math.ceil(self.n_iters / self.inner_iters)
# create line search
self.update_nenvs(self.n_envs)
self.update_nproblems(self.n_problems)
self.reset()
@@ -135,7 +135,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
if self.store_debug:
self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
with profiler.record_function("newton_base/init_opt"):
q = q.view(self.n_envs, self.action_horizon * self.d_action)
q = q.view(self.n_problems, self.action_horizon * self.d_action)
grad_q = q.detach() * 0.0
# run opt graph
if not self.cu_opt_init:
@@ -150,7 +150,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
if check_convergence(self.best_iteration, self.current_iteration, self.last_best):
break
best_q = best_q.view(self.n_envs, self.action_horizon, self.d_action)
best_q = best_q.view(self.n_problems, self.action_horizon, self.d_action)
return best_q
def reset(self):
@@ -171,9 +171,6 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
if self.store_debug:
self.debug.append(self.best_q.view(-1, self.action_horizon, self.d_action).clone())
self.debug_cost.append(self.best_cost.detach().view(-1, 1).clone())
# self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
# self.debug_cost.append(cost_n.detach().view(-1, 1).clone())
# print(grad_q)
return self.best_q.detach(), self.best_cost.detach(), q.detach(), grad_q.detach()
@@ -222,11 +219,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
def _compute_cost_gradient(self, x):
x_n = x.detach().requires_grad_(True)
x_in = x_n.view(
self.n_envs * self.num_particles, self.action_horizon, self.rollout_fn.d_action
self.n_problems * self.num_particles, self.action_horizon, self.rollout_fn.d_action
)
trajectories = self.rollout_fn(x_in) # x_n = (batch*line_search_scale) x horizon x d_action
cost = torch.sum(
trajectories.costs.view(self.n_envs, self.num_particles, self.horizon),
trajectories.costs.view(self.n_problems, self.num_particles, self.horizon),
dim=-1,
keepdim=True,
)
@@ -235,7 +232,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
return (
cost,
g_x,
) # cost: [n_envs, n_particles, 1], g_x: [n_envs, n_particles, horizon*d_action]
) # cost: [n_problems, n_particles, 1], g_x: [n_problems, n_particles, horizon*d_action]
def _wolfe_line_search(self, x, step_direction):
# x_set = get_x_set_jit(step_direction, x, self.alpha_list, self.action_lows, self.action_highs)
@@ -455,36 +452,40 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
mask_q = mask.unsqueeze(-1).expand(-1, self.d_opt)
self.best_q.copy_(torch.where(mask_q, q, self.best_q))
def update_nenvs(self, n_envs):
def update_nproblems(self, n_problems):
self.l_vec = torch.ones(
(n_envs, self.num_particles, 1),
(n_problems, self.num_particles, 1),
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
)
self.best_cost = (
torch.ones((n_envs, 1), device=self.tensor_args.device, dtype=self.tensor_args.dtype)
torch.ones(
(n_problems, 1), device=self.tensor_args.device, dtype=self.tensor_args.dtype
)
* 5000000.0
)
self.best_q = torch.zeros(
(n_envs, self.d_opt), device=self.tensor_args.device, dtype=self.tensor_args.dtype
(n_problems, self.d_opt), device=self.tensor_args.device, dtype=self.tensor_args.dtype
)
self.best_grad_q = torch.zeros(
(n_envs, 1, self.d_opt), device=self.tensor_args.device, dtype=self.tensor_args.dtype
(n_problems, 1, self.d_opt),
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
)
# create list:
self.alpha_list = self.line_scale.repeat(n_envs, 1, 1)
self.alpha_list = self.line_scale.repeat(n_problems, 1, 1)
self.zero_alpha_list = self.alpha_list[:, :, 0:1].contiguous()
h = self.alpha_list.shape[1]
self.c_idx = torch.arange(
0, n_envs * h, step=(h), device=self.tensor_args.device, dtype=torch.long
0, n_problems * h, step=(h), device=self.tensor_args.device, dtype=torch.long
)
self.best_iteration = torch.zeros(
(n_envs), device=self.tensor_args.device, dtype=torch.int16
(n_problems), device=self.tensor_args.device, dtype=torch.int16
)
self.current_iteration = torch.zeros((1), device=self.tensor_args.device, dtype=torch.int16)
self.cu_opt_init = False
super().update_nenvs(n_envs)
super().update_nproblems(n_problems)
def _initialize_opt_iters_graph(self, q, grad_q, shift_steps):
if self.use_cuda_graph:

View File

@@ -8,6 +8,9 @@
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
""" Base module for Optimization. """
from __future__ import annotations
# Standard Library
import time
from abc import abstractmethod
@@ -28,31 +31,75 @@ from curobo.util.torch_utils import is_cuda_graph_available
@dataclass
class OptimizerConfig:
"""Configuration for an :meth:`Optimizer`."""
#: Number of optimization variables per timestep.
d_action: int
#: Lower bound for optimization variables.
action_lows: List[float]
#: Higher bound for optimization variables
action_highs: List[float]
horizon: int
n_iters: int
rollout_fn: RolloutBase
tensor_args: TensorDeviceType
use_cuda_graph: bool
store_debug: bool
debug_info: Any
cold_start_n_iters: int
num_particles: Union[int, None]
n_envs: int
sync_cuda_time: bool
use_coo_sparse: bool
#:
action_horizon: int
#: Number of timesteps in optimization state, total variables = d_action * horizon
horizon: int
#: Number of iterations to run optimization
n_iters: int
#: Number of iterations to run optimization during the first instance. Setting to None will
#: use n_iters. This parameter is useful in MPC like settings where we need to run many
#: iterations during initialization (cold start) and then run only few iterations (warm start).
cold_start_n_iters: Union[int, None]
#: Rollout function to use for computing cost, given optimization variables.
rollout_fn: RolloutBase
#: Tensor device to use for optimization.
tensor_args: TensorDeviceType
#: Capture optimization iteration in a cuda graph and replay graph instead of eager execution.
#: Enabling this can make optimization 10x faster. But changing control flow, tensor
#: shapes, or problem type is not allowed.
use_cuda_graph: bool
#: Record debugging data such as optimization variables, and cost at every iteration. Enabling
#: this will disable cuda graph.
store_debug: bool
#: Use this to record additional attributes from rollouts.
debug_info: Any
#: Number of parallel problems to optimize.
n_problems: int
#: Number of particles to use per problem. Common optimization solvers use many particles to
#: optimize a single problem. E.g., MPPI rolls out many parallel samples and computes a weighted
#: mean. In cuRobo, Quasi-Newton solvers use particles to run many line search magnitudes.
#: Total optimization batch size = n_problems * num_particles.
num_particles: Union[int, None]
#: Synchronize device before computing solver time.
sync_cuda_time: bool
#: Matmul with a Sparse tensor is used to create particles for each problem index to save memory
#: and compute. Some versions of pytorch do not support coo sparse, specifically during
#: torch profile runs. Set this to False to use a standard tensor.
use_coo_sparse: bool
def __post_init__(self):
object.__setattr__(self, "action_highs", self.tensor_args.to_device(self.action_highs))
object.__setattr__(self, "action_lows", self.tensor_args.to_device(self.action_lows))
# check cuda graph version:
if self.use_cuda_graph:
self.use_cuda_graph = is_cuda_graph_available()
if self.num_particles is None:
self.num_particles = 1
if self.cold_start_n_iters is None:
self.cold_start_n_iters = self.n_iters
@staticmethod
def create_data_dict(
@@ -61,6 +108,17 @@ class OptimizerConfig:
tensor_args: TensorDeviceType = TensorDeviceType(),
child_dict: Optional[Dict] = None,
):
"""Helper function to create dictionary from optimizer parameters and rollout class.
Args:
data_dict: optimizer parameters dictionary.
rollout_fn: rollout function.
tensor_args: tensor cuda device.
child_dict: new dictionary where parameters will be stored.
Returns:
Dictionary with parameters to create a :meth:`OptimizerConfig`
"""
if child_dict is None:
child_dict = deepcopy(data_dict)
child_dict["d_action"] = rollout_fn.d_action
@@ -78,17 +136,33 @@ class OptimizerConfig:
class Optimizer(OptimizerConfig):
def __init__(self, config: Optional[OptimizerConfig] = None) -> None:
"""Base optimization solver class
Args:
config: Initialized with parameters from a dataclass.
"""
if config is not None:
super().__init__(**vars(config))
self.opt_dt = 0.0
self.COLD_START = True
self.update_nenvs(self.n_envs)
self.update_nproblems(self.n_problems)
self._batch_goal = None
self._rollout_list = None
self.debug = []
self.debug_cost = []
def optimize(self, opt_tensor: torch.Tensor, shift_steps=0, n_iters=None) -> torch.Tensor:
"""Find a solution through optimization given the initial values for variables.
Args:
opt_tensor: Initial value of optimization variables.
Shape: [n_problems, action_horizon, d_action]
shift_steps: Shift variables along action_horizon. Useful in mpc warm-start setting.
n_iters: Override number of iterations to run optimization.
Returns:
Optimized values returned as a tensor of shape [n_problems, action_horizon, d_action].
"""
if self.COLD_START:
n_iters = self.cold_start_n_iters
self.COLD_START = False
@@ -99,17 +173,12 @@ class Optimizer(OptimizerConfig):
self.opt_dt = time.time() - st_time
return out
@abstractmethod
def _optimize(self, opt_tensor: torch.Tensor, shift_steps=0, n_iters=None) -> torch.Tensor:
pass
def _shift(self, shift_steps=0):
"""
Shift the variables in the solver to hotstart the next timestep
"""
return
def update_params(self, goal: Goal):
"""Update parameters in the :meth:`curobo.rollout.rollout_base.RolloutBase` instance.
Args:
goal: parameters to update rollout instance.
"""
with profiler.record_function("OptBase/batch_goal"):
if self._batch_goal is not None:
self._batch_goal.copy_(goal, update_idx_buffers=True) # why True?
@@ -118,80 +187,37 @@ class Optimizer(OptimizerConfig):
self.rollout_fn.update_params(self._batch_goal)
def reset(self):
"""
Reset the controller
"""
"""Reset optimizer."""
self.rollout_fn.reset()
self.debug = []
self.debug_cost = []
# self.COLD_START = True
def update_nenvs(self, n_envs):
assert n_envs > 0
self._update_env_kernel(n_envs, self.num_particles)
self.n_envs = n_envs
def update_nproblems(self, n_problems: int):
"""Update the number of problems that need to be optimized.
def _update_env_kernel(self, n_envs, num_particles):
log_info(
"Updating env kernel [n_envs: "
+ str(n_envs)
+ " , num_particles: "
+ str(num_particles)
+ " ]"
)
Args:
n_problems: number of problems.
"""
assert n_problems > 0
self._update_problem_kernel(n_problems, self.num_particles)
self.n_problems = n_problems
self.env_col = torch.arange(
0, n_envs, step=1, dtype=torch.long, device=self.tensor_args.device
)
self.n_select_ = torch.tensor(
[x * n_envs + x for x in range(n_envs)],
device=self.tensor_args.device,
dtype=torch.long,
)
# create sparse tensor:
sparse_indices = []
for i in range(n_envs):
sparse_indices.extend([[i * num_particles + x, i] for x in range(num_particles)])
sparse_values = torch.ones(len(sparse_indices))
sparse_indices = torch.tensor(sparse_indices)
if self.use_coo_sparse:
self.env_kernel_ = torch.sparse_coo_tensor(
sparse_indices.t(),
sparse_values,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
)
else:
self.env_kernel_ = torch.sparse_coo_tensor(
sparse_indices.t(),
sparse_values,
device="cpu",
dtype=self.tensor_args.dtype,
)
self.env_kernel_ = self.env_kernel_.to_dense().to(device=self.tensor_args.device)
self._env_seeds = self.num_particles
def get_nenv_tensor(self, x):
"""This function takes an input tensor of shape (n_env,....) and converts it into
(n_particles,...)
def get_nproblem_tensor(self, x):
"""This function takes an input tensor of shape (n_problem,....) and converts it into
(n_particles,...).
"""
# if x.shape[0] != self.n_envs:
# x_env = x.unsqueeze(0).repeat(self.n_envs, 1)
# else:
# x_env = x
# create a tensor
nx_env = self.env_kernel_ @ x
nx_problem = self.problem_kernel_ @ x
return nx_env
return nx_problem
def reset_seed(self):
"""Reset seeds."""
return True
def reset_cuda_graph(self):
"""Reset CUDA Graphs. This does not work, workaround is to create a new instance."""
if self.use_cuda_graph:
self.cu_opt_init = False
else:
@@ -199,7 +225,87 @@ class Optimizer(OptimizerConfig):
self._batch_goal = None
self.rollout_fn.reset_cuda_graph()
def reset_shape(self):
"""Reset any flags in rollout class. Useful to reinitialize tensors for a new shape."""
self.rollout_fn.reset_shape()
self._batch_goal = None
def get_all_rollout_instances(self) -> List[RolloutBase]:
"""Get all instances of Rollout class in the optimizer."""
if self._rollout_list is None:
self._rollout_list = [self.rollout_fn]
return self._rollout_list
@abstractmethod
def _optimize(self, opt_tensor: torch.Tensor, shift_steps=0, n_iters=None) -> torch.Tensor:
"""Implement this function in a derived class containing the solver.
Args:
opt_tensor: Initial value of optimization variables.
Shape: [n_problems, action_horizon, d_action]
shift_steps: Shift variables along action_horizon. Useful in mpc warm-start setting.
n_iters: Override number of iterations to run optimization.
Returns:
Optimized variables in tensor shape [action_horizon, d_action].
"""
return opt_tensor
@abstractmethod
def _shift(self, shift_steps=0):
"""Shift the variables in the solver to hotstart the next timestep.
Args:
shift_steps: Number of timesteps to shift.
"""
return
def _update_problem_kernel(self, n_problems: int, num_particles: int):
"""Update matrix used to map problem index to number of particles.
Args:
n_problems: Number of optimization problems.
num_particles: Number of particles per problem.
"""
log_info(
"Updating problem kernel [n_problems: "
+ str(n_problems)
+ " , num_particles: "
+ str(num_particles)
+ " ]"
)
self.problem_col = torch.arange(
0, n_problems, step=1, dtype=torch.long, device=self.tensor_args.device
)
self.n_select_ = torch.tensor(
[x * n_problems + x for x in range(n_problems)],
device=self.tensor_args.device,
dtype=torch.long,
)
# create sparse tensor:
sparse_indices = []
for i in range(n_problems):
sparse_indices.extend([[i * num_particles + x, i] for x in range(num_particles)])
sparse_values = torch.ones(len(sparse_indices))
sparse_indices = torch.tensor(sparse_indices)
if self.use_coo_sparse:
self.problem_kernel_ = torch.sparse_coo_tensor(
sparse_indices.t(),
sparse_values,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
)
else:
self.problem_kernel_ = torch.sparse_coo_tensor(
sparse_indices.t(),
sparse_values,
device="cpu",
dtype=self.tensor_args.dtype,
)
self.problem_kernel_ = self.problem_kernel_.to_dense().to(
device=self.tensor_args.device
)
self._problem_seeds = self.num_particles

View File

@@ -65,7 +65,7 @@ class ParallelMPPIConfig(ParticleOptConfig):
alpha: float
gamma: float
kappa: float
sample_per_env: bool
sample_per_problem: bool
def __post_init__(self):
self.init_cov = self.tensor_args.to_device(self.init_cov).unsqueeze(0)
@@ -264,7 +264,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
actions = trajectories.actions
total_costs = self._compute_total_cost(costs)
# Let's reshape to n_envs now:
# Let's reshape to n_problems now:
# first find the means before doing exponential utility:
w = self._exp_util(total_costs)
@@ -272,7 +272,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
# Update best action
if self.sample_mode == SampleMode.BEST:
best_idx = torch.argmax(w, dim=-1)
self.best_traj.copy_(actions[self.env_col, best_idx])
self.best_traj.copy_(actions[self.problem_col, best_idx])
if self.store_rollouts and self.visual_traj is not None:
vis_seq = getattr(trajectories.state, self.visual_traj)
@@ -281,7 +281,9 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
self.top_idx = top_idx
top_trajs = torch.index_select(vis_seq, 0, top_idx[0])
for i in range(1, top_idx.shape[0]):
trajs = torch.index_select(vis_seq, 0, top_idx[i] + (self.particles_per_env * i))
trajs = torch.index_select(
vis_seq, 0, top_idx[i] + (self.particles_per_problem * i)
)
top_trajs = torch.cat((top_trajs, trajs), dim=0)
if self.top_trajs is None or top_trajs.shape != self.top_trajs:
self.top_trajs = top_trajs
@@ -317,13 +319,15 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
act_seq = self.mean_action.unsqueeze(-3) + scaled_delta
cat_list = [act_seq]
if self.neg_per_env > 0:
if self.neg_per_problem > 0:
neg_action = -1.0 * self.mean_action
neg_act_seqs = neg_action.unsqueeze(-3).expand(-1, self.neg_per_env, -1, -1)
neg_act_seqs = neg_action.unsqueeze(-3).expand(-1, self.neg_per_problem, -1, -1)
cat_list.append(neg_act_seqs)
if self.null_per_env > 0:
if self.null_per_problem > 0:
cat_list.append(
self.null_act_seqs[: self.null_per_env].unsqueeze(0).expand(self.n_envs, -1, -1, -1)
self.null_act_seqs[: self.null_per_problem]
.unsqueeze(0)
.expand(self.n_problems, -1, -1, -1)
)
act_seq = torch.cat(
@@ -343,8 +347,8 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
def update_init_mean(self, init_mean):
# update mean:
# init_mean = init_mean.clone()
if init_mean.shape[0] != self.n_envs:
init_mean = init_mean.expand(self.n_envs, -1, -1)
if init_mean.shape[0] != self.n_problems:
init_mean = init_mean.expand(self.n_problems, -1, -1)
if not copy_tensor(init_mean, self.mean_action):
self.mean_action = init_mean.clone()
if not copy_tensor(init_mean, self.best_traj):
@@ -353,27 +357,27 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
def reset_mean(self):
with profiler.record_function("mppi/reset_mean"):
if self.random_mean:
mean = self.mean_lib.get_samples([self.n_envs])
mean = self.mean_lib.get_samples([self.n_problems])
self.update_init_mean(mean)
else:
self.update_init_mean(self.init_mean)
def reset_covariance(self):
with profiler.record_function("mppi/reset_cov"):
# init_cov can either be a single value, or n_envs x 1 or n_envs x d_action
# init_cov can either be a single value, or n_problems x 1 or n_problems x d_action
if self.cov_type == CovType.SIGMA_I:
# init_cov can either be a single value, or n_envs x 1
# init_cov can either be a single value, or n_problems x 1
self.cov_action = self.init_cov
if self.init_cov.shape[0] != self.n_envs:
self.cov_action = self.init_cov.unsqueeze(0).expand(self.n_envs, -1)
if self.init_cov.shape[0] != self.n_problems:
self.cov_action = self.init_cov.unsqueeze(0).expand(self.n_problems, -1)
self.inv_cov_action = 1.0 / self.cov_action
a = torch.sqrt(self.cov_action)
if not copy_tensor(a, self.scale_tril):
self.scale_tril = a
elif self.cov_type == CovType.DIAG_A:
# init_cov can either be a single value, or n_envs x 1 or n_envs x 7
# init_cov can either be a single value, or n_problems x 1 or n_problems x 7
init_cov = self.init_cov.clone()
# if(init_cov.shape[-1] != self.d_action):
@@ -382,8 +386,8 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
if len(init_cov.shape) == 2 and init_cov.shape[-1] != self.d_action:
init_cov = init_cov.expand(-1, self.d_action)
init_cov = init_cov.unsqueeze(1)
if init_cov.shape[0] != self.n_envs:
init_cov = init_cov.expand(self.n_envs, -1, -1)
if init_cov.shape[0] != self.n_problems:
init_cov = init_cov.expand(self.n_problems, -1, -1)
if not copy_tensor(init_cov.clone(), self.cov_action):
self.cov_action = init_cov.clone()
self.inv_cov_action = 1.0 / self.cov_action
@@ -523,16 +527,18 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
n_iters = 1
else:
n_iters = self.n_iters
if self.sample_per_env:
if self.sample_per_problem:
s_set = (
self.sample_lib.get_samples(
sample_shape=[self.sampled_particles_per_env * self.n_envs * n_iters],
sample_shape=[
self.sampled_particles_per_problem * self.n_problems * n_iters
],
base_seed=self.seed,
)
.view(
n_iters,
self.n_envs,
self.sampled_particles_per_env,
self.n_problems,
self.sampled_particles_per_problem,
self.action_horizon,
self.d_action,
)
@@ -540,13 +546,17 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
)
else:
s_set = self.sample_lib.get_samples(
sample_shape=[n_iters * (self.sampled_particles_per_env)],
sample_shape=[n_iters * (self.sampled_particles_per_problem)],
base_seed=self.seed,
)
s_set = s_set.view(
n_iters, 1, self.sampled_particles_per_env, self.action_horizon, self.d_action
n_iters,
1,
self.sampled_particles_per_problem,
self.action_horizon,
self.d_action,
)
s_set = s_set.repeat(1, self.n_envs, 1, 1, 1).clone()
s_set = s_set.repeat(1, self.n_problems, 1, 1, 1).clone()
s_set[:, :, -1, :, :] = 0.0
if not copy_tensor(s_set, self._sample_set):
log_info("ParallelMPPI: Updating sample set")
@@ -575,7 +585,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
Parameters
----------
state : dict or np.ndarray
Initial state to set the simulation env to
Initial state to set the simulation problem to
"""
return super().generate_rollouts(init_act)

View File

@@ -1,4 +1,3 @@
#!/usr/bin/env python
#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
@@ -95,7 +94,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
self.trajectories = None
self.cu_opt_init = False
self.info = ParticleOptInfo()
self.update_num_particles_per_env(self.num_particles)
self.update_num_particles_per_problem(self.num_particles)
@abstractmethod
def _get_action_seq(self, mode=SampleMode):
@@ -172,7 +171,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
Parameters
----------
state : dict or np.ndarray
Initial state to set the simulation env to
Initial state to set the simulation problem to
"""
act_seq = self.sample_actions(init_act)
@@ -259,10 +258,10 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
# generate random simulated trajectories
trajectory = self.generate_rollouts()
trajectory.actions = trajectory.actions.view(
self.n_envs, self.particles_per_env, self.action_horizon, self.d_action
self.n_problems, self.particles_per_problem, self.action_horizon, self.d_action
)
trajectory.costs = trajectory.costs.view(
self.n_envs, self.particles_per_env, self.horizon
self.n_problems, self.particles_per_problem, self.horizon
)
with profiler.record_function("mpc/mppi/update_distribution"):
self._update_distribution(trajectory)
@@ -275,26 +274,26 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
curr_action_seq = self._get_action_seq(mode=self.sample_mode)
return curr_action_seq
def update_nenvs(self, n_envs):
assert n_envs > 0
self.total_num_particles = n_envs * self.num_particles
def update_nproblems(self, n_problems):
assert n_problems > 0
self.total_num_particles = n_problems * self.num_particles
self.cu_opt_init = False
super().update_nenvs(n_envs)
super().update_nproblems(n_problems)
def update_num_particles_per_env(self, num_particles_per_env):
self.null_per_env = round(int(self.null_act_frac * num_particles_per_env * 0.5))
def update_num_particles_per_problem(self, num_particles_per_problem):
self.null_per_problem = round(int(self.null_act_frac * num_particles_per_problem * 0.5))
self.neg_per_env = (
round(int(self.null_act_frac * num_particles_per_env)) - self.null_per_env
self.neg_per_problem = (
round(int(self.null_act_frac * num_particles_per_problem)) - self.null_per_problem
)
self.sampled_particles_per_env = (
num_particles_per_env - self.null_per_env - self.neg_per_env
self.sampled_particles_per_problem = (
num_particles_per_problem - self.null_per_problem - self.neg_per_problem
)
self.particles_per_env = num_particles_per_env
if self.null_per_env > 0:
self.particles_per_problem = num_particles_per_problem
if self.null_per_problem > 0:
self.null_act_seqs = torch.zeros(
self.null_per_env,
self.null_per_problem,
self.action_horizon,
self.d_action,
device=self.tensor_args.device,

View File

@@ -121,7 +121,7 @@ def get_stomp_cov_jit(
else:
A[k * horizon + i, k * horizon + index] = fd_array[j + 3]
R = torch.matmul(A.transpose(-2, -1), A)
R = torch.matmul(A.transpose(-2, -1).clone(), A.clone())
M = torch.inverse(R)
scaled_M = (1 / horizon) * M / (torch.max(torch.abs(M), dim=1)[0].unsqueeze(0))
cov = M / torch.max(torch.abs(M))
@@ -132,7 +132,6 @@ def get_stomp_cov_jit(
scale_tril = torch.linalg.cholesky(cov)
else:
scale_tril = cov
"""
k = 0
act_cov_matrix = cov[k * horizon:k * horizon + horizon, k * horizon:k * horizon + horizon]

View File

@@ -39,7 +39,7 @@ from curobo.rollout.rollout_base import Goal, RolloutBase, RolloutConfig, Rollou
from curobo.types.base import TensorDeviceType
from curobo.types.robot import CSpaceConfig, RobotConfig
from curobo.types.state import JointState
from curobo.util.logger import log_info, log_warn
from curobo.util.logger import log_error, log_info, log_warn
from curobo.util.tensor_util import cat_sum
@@ -366,6 +366,7 @@ class ArmBase(RolloutBase, ArmBaseConfig):
)
cost_list.append(coll_cost)
if return_list:
return cost_list
cost = cat_sum(cost_list)
return cost
@@ -424,6 +425,7 @@ class ArmBase(RolloutBase, ArmBaseConfig):
out_metrics = self.constraint_fn(state)
out_metrics.state = state
out_metrics = self.convergence_fn(state, out_metrics)
out_metrics.cost = self.cost_fn(state)
return out_metrics
def get_metrics_cuda_graph(self, state: JointState):
@@ -451,6 +453,8 @@ class ArmBase(RolloutBase, ArmBaseConfig):
with torch.cuda.graph(self.cu_metrics_graph, stream=s):
self._cu_out_metrics = self.get_metrics(self._cu_metrics_state_in)
self._metrics_cuda_graph_init = True
if self._cu_metrics_state_in.position.shape != state.position.shape:
log_error("cuda graph changed")
self._cu_metrics_state_in.copy_(state)
self.cu_metrics_graph.replay()
out_metrics = self._cu_out_metrics
@@ -462,17 +466,6 @@ class ArmBase(RolloutBase, ArmBaseConfig):
):
if out_metrics is None:
out_metrics = RolloutMetrics()
if (
self.convergence_cfg.null_space_cfg is not None
and self.null_convergence.enabled
and self._goal_buffer.batch_retract_state_idx is not None
):
out_metrics.cost = self.null_convergence.forward_target_idx(
self._goal_buffer.retract_state,
state.state_seq.position,
self._goal_buffer.batch_retract_state_idx,
)
return out_metrics
def _get_augmented_state(self, state: JointState) -> KinematicModelState:
@@ -688,9 +681,11 @@ class ArmBase(RolloutBase, ArmBaseConfig):
act_seq = self.dynamics_model.init_action_mean.unsqueeze(0).repeat(self.batch_size, 1, 1)
return act_seq
def reset_cuda_graph(self):
def reset_shape(self):
self._goal_idx_update = True
super().reset_shape()
def reset_cuda_graph(self):
super().reset_cuda_graph()
def get_action_from_state(self, state: JointState):

View File

@@ -20,16 +20,16 @@ import torch.autograd.profiler as profiler
from curobo.geom.sdf.world import WorldCollision
from curobo.rollout.cost.cost_base import CostConfig
from curobo.rollout.cost.dist_cost import DistCost, DistCostConfig
from curobo.rollout.cost.pose_cost import PoseCost, PoseCostConfig
from curobo.rollout.cost.pose_cost import PoseCost, PoseCostConfig, PoseCostMetric
from curobo.rollout.cost.straight_line_cost import StraightLineCost
from curobo.rollout.cost.zero_cost import ZeroCost
from curobo.rollout.dynamics_model.kinematic_model import KinematicModelState
from curobo.rollout.rollout_base import Goal, RolloutMetrics
from curobo.types.base import TensorDeviceType
from curobo.types.robot import RobotConfig
from curobo.types.tensor import T_BValue_float
from curobo.types.tensor import T_BValue_float, T_BValue_int
from curobo.util.helpers import list_idx_if_not_none
from curobo.util.logger import log_info
from curobo.util.logger import log_error, log_info
from curobo.util.tensor_util import cat_max, cat_sum
# Local Folder
@@ -42,6 +42,8 @@ class ArmReacherMetrics(RolloutMetrics):
position_error: Optional[T_BValue_float] = None
rotation_error: Optional[T_BValue_float] = None
pose_error: Optional[T_BValue_float] = None
goalset_index: Optional[T_BValue_int] = None
null_space_error: Optional[T_BValue_float] = None
def __getitem__(self, idx):
d_list = [
@@ -53,6 +55,8 @@ class ArmReacherMetrics(RolloutMetrics):
self.position_error,
self.rotation_error,
self.pose_error,
self.goalset_index,
self.null_space_error,
]
idx_vals = list_idx_if_not_none(d_list, idx)
return ArmReacherMetrics(*idx_vals)
@@ -65,10 +69,14 @@ class ArmReacherMetrics(RolloutMetrics):
constraint=None if self.constraint is None else self.constraint.clone(),
feasible=None if self.feasible is None else self.feasible.clone(),
state=None if self.state is None else self.state,
cspace_error=None if self.cspace_error is None else self.cspace_error,
position_error=None if self.position_error is None else self.position_error,
rotation_error=None if self.rotation_error is None else self.rotation_error,
pose_error=None if self.pose_error is None else self.pose_error,
cspace_error=None if self.cspace_error is None else self.cspace_error.clone(),
position_error=None if self.position_error is None else self.position_error.clone(),
rotation_error=None if self.rotation_error is None else self.rotation_error.clone(),
pose_error=None if self.pose_error is None else self.pose_error.clone(),
goalset_index=None if self.goalset_index is None else self.goalset_index.clone(),
null_space_error=(
None if self.null_space_error is None else self.null_space_error.clone()
),
)
@@ -254,6 +262,7 @@ class ArmReacher(ArmBase, ArmReacherConfig):
goal_cost = self.goal_cost.forward(
ee_pos_batch, ee_quat_batch, self._goal_buffer
)
cost_list.append(goal_cost)
with profiler.record_function("cost/link_poses"):
if self._goal_buffer.links_goal_pose is not None and self.cost_cfg.pose_cfg is not None:
@@ -296,36 +305,21 @@ class ArmReacher(ArmBase, ArmReacherConfig):
g_dist,
)
# cost += z_acc
cost_list.append(z_acc)
# print(self.cost_cfg.zero_jerk_cfg)
if (
self.cost_cfg.zero_jerk_cfg is not None
and self.zero_jerk_cost.enabled
# and g_dist is not None
):
# jerk = self.dynamics_model._aux_matrix @ state_batch.acceleration
if self.cost_cfg.zero_jerk_cfg is not None and self.zero_jerk_cost.enabled:
z_jerk = self.zero_jerk_cost.forward(
state_batch.jerk,
g_dist,
)
cost_list.append(z_jerk)
# cost += z_jerk
if (
self.cost_cfg.zero_vel_cfg is not None
and self.zero_vel_cost.enabled
# and g_dist is not None
):
if self.cost_cfg.zero_vel_cfg is not None and self.zero_vel_cost.enabled:
z_vel = self.zero_vel_cost.forward(
state_batch.velocity,
g_dist,
)
# cost += z_vel
# print(z_vel.shape)
cost_list.append(z_vel)
cost = cat_sum(cost_list)
# print(cost[:].T)
return cost
def convergence_fn(
@@ -350,6 +344,7 @@ class ArmReacher(ArmBase, ArmReacherConfig):
) = self.pose_convergence.forward_out_distance(
state.ee_pos_seq, state.ee_quat_seq, self._goal_buffer
)
out_metrics.goalset_index = self.pose_convergence.goalset_index_buffer # .clone()
if (
self._goal_buffer.links_goal_pose is not None
and self.convergence_cfg.pose_cfg is not None
@@ -389,6 +384,17 @@ class ArmReacher(ArmBase, ArmReacherConfig):
True,
)
if (
self.convergence_cfg.null_space_cfg is not None
and self.null_convergence.enabled
and self._goal_buffer.batch_retract_state_idx is not None
):
out_metrics.null_space_error = self.null_convergence.forward_target_idx(
self._goal_buffer.retract_state,
state.state_seq.position,
self._goal_buffer.batch_retract_state_idx,
)
return out_metrics
def update_params(
@@ -420,3 +426,43 @@ class ArmReacher(ArmBase, ArmReacherConfig):
else:
self.dist_cost.disable_cost()
self.cspace_convergence.disable_cost()
def get_pose_costs(self, include_link_pose: bool = False, include_convergence: bool = True):
pose_costs = [self.goal_cost]
if include_convergence:
pose_costs += [self.pose_convergence]
if include_link_pose:
log_error("Not implemented yet")
return pose_costs
def update_pose_cost_metric(
self,
metric: PoseCostMetric,
):
pose_costs = self.get_pose_costs()
if metric.hold_partial_pose:
if metric.hold_vec_weight is None:
log_error("hold_vec_weight is required")
[x.hold_partial_pose(metric.hold_vec_weight) for x in pose_costs]
if metric.release_partial_pose:
[x.release_partial_pose() for x in pose_costs]
if metric.reach_partial_pose:
if metric.reach_vec_weight is None:
log_error("reach_vec_weight is required")
[x.reach_partial_pose(metric.reach_vec_weight) for x in pose_costs]
if metric.reach_full_pose:
[x.reach_full_pose() for x in pose_costs]
pose_costs = self.get_pose_costs(include_convergence=False)
if metric.remove_offset_waypoint:
[x.remove_offset_waypoint() for x in pose_costs]
if metric.offset_position is not None or metric.offset_rotation is not None:
[
x.update_offset_waypoint(
offset_position=metric.offset_position,
offset_rotation=metric.offset_rotation,
offset_tstep_fraction=metric.offset_tstep_fraction,
)
for x in pose_costs
]

View File

@@ -257,13 +257,13 @@ class BoundCost(CostBase, BoundCostConfig):
return cost
def update_dt(self, dt: Union[float, torch.Tensor]):
# return super().update_dt(dt)
if self.cost_type == BoundCostType.BOUNDS_SMOOTH:
v_scale = dt / self._dt
a_scale = v_scale**2
j_scale = v_scale**3
self.smooth_weight[1] *= a_scale
self.smooth_weight[2] *= j_scale
return super().update_dt(dt)

View File

@@ -8,19 +8,23 @@
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
from __future__ import annotations
# Standard Library
import math
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional
# Third Party
import torch
from torch.autograd import Function
# CuRobo
from curobo.curobolib.geom import get_pose_distance, get_pose_distance_backward
from curobo.curobolib.geom import PoseError, PoseErrorDistance
from curobo.rollout.rollout_base import Goal
from curobo.types.base import TensorDeviceType
from curobo.types.math import OrientationError, Pose
from curobo.util.logger import log_error
# Local Folder
from .cost_base import CostBase, CostConfig
@@ -37,7 +41,11 @@ class PoseErrorType(Enum):
class PoseCostConfig(CostConfig):
cost_type: PoseErrorType = PoseErrorType.BATCH_GOAL
use_metric: bool = False
project_distance: bool = True
run_vec_weight: Optional[List[float]] = None
use_projected_distance: bool = True
offset_waypoint: List[float] = None
offset_tstep_fraction: float = -1.0
def __post_init__(self):
if self.run_vec_weight is not None:
@@ -54,392 +62,85 @@ class PoseCostConfig(CostConfig):
self.vec_convergence = torch.zeros(
2, device=self.tensor_args.device, dtype=self.tensor_args.dtype
)
if self.offset_waypoint is None:
self.offset_waypoint = [0, 0, 0, 0, 0, 0]
if self.run_weight is None:
self.run_weight = 1
self.offset_waypoint = self.tensor_args.to_device(self.offset_waypoint)
if isinstance(self.offset_tstep_fraction, float):
self.offset_tstep_fraction = self.tensor_args.to_device([self.offset_tstep_fraction])
return super().__post_init__()
@torch.jit.script
def backward_PoseError_jit(grad_g_dist, grad_out_distance, weight, g_vec):
grad_vec = grad_g_dist + (grad_out_distance * weight)
grad = 1.0 * (grad_vec).unsqueeze(-1) * g_vec
return grad
@dataclass
class PoseCostMetric:
hold_partial_pose: bool = False
release_partial_pose: bool = False
hold_vec_weight: Optional[torch.Tensor] = None
reach_partial_pose: bool = False
reach_full_pose: bool = False
reach_vec_weight: Optional[torch.Tensor] = None
offset_position: Optional[torch.Tensor] = None
offset_rotation: Optional[torch.Tensor] = None
offset_tstep_fraction: float = -1.0
remove_offset_waypoint: bool = False
def clone(self):
# full method:
@torch.jit.script
def backward_full_PoseError_jit(
grad_out_distance, grad_g_dist, grad_r_err, p_w, q_w, g_vec_p, g_vec_q
):
p_grad = (grad_g_dist + (grad_out_distance * p_w)).unsqueeze(-1) * g_vec_p
q_grad = (grad_r_err + (grad_out_distance * q_w)).unsqueeze(-1) * g_vec_q
# p_grad = ((grad_out_distance * p_w)).unsqueeze(-1) * g_vec_p
# q_grad = ((grad_out_distance * q_w)).unsqueeze(-1) * g_vec_q
return p_grad, q_grad
class PoseErrorDistance(Function):
@staticmethod
def forward(
ctx,
current_position,
goal_position,
current_quat,
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
out_p_grad,
out_q_grad,
batch_size,
horizon,
mode=PoseErrorType.BATCH_GOAL.value,
num_goals=1,
use_metric=False,
):
# out_distance = current_position[..., 0].detach().clone() * 0.0
# out_position_distance = out_distance.detach().clone()
# out_rotation_distance = out_distance.detach().clone()
# out_vec = (
# torch.cat((current_position.detach().clone(), current_quat.detach().clone()), dim=-1)
# * 0.0
# )
# out_idx = out_distance.clone().to(dtype=torch.long)
(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
) = get_pose_distance(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
current_position.contiguous(),
goal_position,
current_quat.contiguous(),
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
batch_size,
horizon,
mode,
num_goals,
current_position.requires_grad,
True,
use_metric,
)
ctx.save_for_backward(out_p_vec, out_r_vec, weight, out_p_grad, out_q_grad)
return out_distance, out_position_distance, out_rotation_distance, out_idx # .view(-1,1)
@staticmethod
def backward(ctx, grad_out_distance, grad_g_dist, grad_r_err, grad_out_idx):
(g_vec_p, g_vec_q, weight, out_grad_p, out_grad_q) = ctx.saved_tensors
pos_grad = None
quat_grad = None
batch_size = g_vec_p.shape[0] * g_vec_p.shape[1]
if ctx.needs_input_grad[0] and ctx.needs_input_grad[2]:
pos_grad, quat_grad = get_pose_distance_backward(
out_grad_p,
out_grad_q,
grad_out_distance.contiguous(),
grad_g_dist.contiguous(),
grad_r_err.contiguous(),
weight,
g_vec_p,
g_vec_q,
batch_size,
use_distance=True,
)
# pos_grad, quat_grad = backward_full_PoseError_jit(
# grad_out_distance,
# grad_g_dist, grad_r_err, p_w, q_w, g_vec_p, g_vec_q
# )
elif ctx.needs_input_grad[0]:
pos_grad = backward_PoseError_jit(grad_g_dist, grad_out_distance, p_w, g_vec_p)
# grad_vec = grad_g_dist + (grad_out_distance * weight[1])
# pos_grad = 1.0 * (grad_vec).unsqueeze(-1) * g_vec[..., 4:]
elif ctx.needs_input_grad[2]:
quat_grad = backward_PoseError_jit(grad_r_err, grad_out_distance, q_w, g_vec_q)
# grad_vec = grad_r_err + (grad_out_distance * weight[0])
# quat_grad = 1.0 * (grad_vec).unsqueeze(-1) * g_vec[..., :4]
return (
pos_grad,
None,
quat_grad,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
return PoseCostMetric(
hold_partial_pose=self.hold_partial_pose,
release_partial_pose=self.release_partial_pose,
hold_vec_weight=None if self.hold_vec_weight is None else self.hold_vec_weight.clone(),
reach_partial_pose=self.reach_partial_pose,
reach_full_pose=self.reach_full_pose,
reach_vec_weight=(
None if self.reach_vec_weight is None else self.reach_vec_weight.clone()
),
offset_position=None if self.offset_position is None else self.offset_position.clone(),
offset_rotation=None if self.offset_rotation is None else self.offset_rotation.clone(),
offset_tstep_fraction=self.offset_tstep_fraction,
remove_offset_waypoint=self.remove_offset_waypoint,
)
@classmethod
def create_grasp_approach_metric(
cls,
offset_position: float = 0.5,
linear_axis: int = 2,
tstep_fraction: float = 0.6,
tensor_args: TensorDeviceType = TensorDeviceType(),
) -> PoseCostMetric:
"""Enables moving to a pregrasp and then locked orientation movement to final grasp.
class PoseLoss(Function):
@staticmethod
def forward(
ctx,
current_position,
goal_position,
current_quat,
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
out_p_grad,
out_q_grad,
batch_size,
horizon,
mode=PoseErrorType.BATCH_GOAL.value,
num_goals=1,
use_metric=False,
):
# out_distance = current_position[..., 0].detach().clone() * 0.0
# out_position_distance = out_distance.detach().clone()
# out_rotation_distance = out_distance.detach().clone()
# out_vec = (
# torch.cat((current_position.detach().clone(), current_quat.detach().clone()), dim=-1)
# * 0.0
# )
# out_idx = out_distance.clone().to(dtype=torch.long)
Since this is added as a cost, the trajectory will not reach the exact offset, instead it
will try to take a blended path to the final grasp without stopping at the offset.
(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
) = get_pose_distance(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
current_position.contiguous(),
goal_position,
current_quat.contiguous(),
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
batch_size,
horizon,
mode,
num_goals,
current_position.requires_grad,
False,
use_metric,
)
ctx.save_for_backward(out_p_vec, out_r_vec)
return out_distance
Args:
offset_position: offset in meters.
linear_axis: specifies the x y or z axis.
tstep_fraction: specifies the timestep fraction to start activating this constraint.
tensor_args: cuda device.
@staticmethod
def backward(ctx, grad_out_distance): # , grad_g_dist, grad_r_err, grad_out_idx):
pos_grad = None
quat_grad = None
if ctx.needs_input_grad[0] and ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p * grad_out_distance.unsqueeze(1)
quat_grad = g_vec_q * grad_out_distance.unsqueeze(1)
pos_grad = pos_grad.unsqueeze(-2)
quat_grad = quat_grad.unsqueeze(-2)
elif ctx.needs_input_grad[0]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p * grad_out_distance.unsqueeze(1)
pos_grad = pos_grad.unsqueeze(-2)
elif ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
quat_grad = g_vec_q * grad_out_distance.unsqueeze(1)
quat_grad = quat_grad.unsqueeze(-2)
return (
pos_grad,
None,
quat_grad,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
Returns:
cost metric.
"""
hold_vec_weight = tensor_args.to_device([1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
hold_vec_weight[3 + linear_axis] = 0.0
offset_position_vec = tensor_args.to_device([0.0, 0.0, 0.0])
offset_position_vec[linear_axis] = offset_position
return cls(
hold_partial_pose=True,
hold_vec_weight=hold_vec_weight,
offset_position=offset_position_vec,
offset_tstep_fraction=tstep_fraction,
)
class PoseError(Function):
@staticmethod
def forward(
ctx,
current_position,
goal_position,
current_quat,
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
out_p_grad,
out_q_grad,
batch_size,
horizon,
mode=PoseErrorType.BATCH_GOAL.value,
num_goals=1,
use_metric=False,
):
# out_distance = current_position[..., 0].detach().clone() * 0.0
# out_position_distance = out_distance.detach().clone()
# out_rotation_distance = out_distance.detach().clone()
# out_vec = (
# torch.cat((current_position.detach().clone(), current_quat.detach().clone()), dim=-1)
# * 0.0
# )
# out_idx = out_distance.clone().to(dtype=torch.long)
(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
) = get_pose_distance(
out_distance,
out_position_distance,
out_rotation_distance,
out_p_vec,
out_r_vec,
out_idx,
current_position.contiguous(),
goal_position,
current_quat.contiguous(),
goal_quat,
vec_weight,
weight,
vec_convergence,
run_weight,
run_vec_weight,
batch_pose_idx,
batch_size,
horizon,
mode,
num_goals,
current_position.requires_grad,
False,
use_metric,
)
ctx.save_for_backward(out_p_vec, out_r_vec)
return out_distance
@staticmethod
def backward(ctx, grad_out_distance): # , grad_g_dist, grad_r_err, grad_out_idx):
pos_grad = None
quat_grad = None
if ctx.needs_input_grad[0] and ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p
quat_grad = g_vec_q
elif ctx.needs_input_grad[0]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
pos_grad = g_vec_p
elif ctx.needs_input_grad[2]:
(g_vec_p, g_vec_q) = ctx.saved_tensors
quat_grad = g_vec_q
return (
pos_grad,
None,
quat_grad,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
@classmethod
def reset_metric(cls) -> PoseCostMetric:
return PoseCostMetric(
remove_offset_waypoint=True,
reach_full_pose=True,
release_partial_pose=True,
)
@@ -449,13 +150,88 @@ class PoseCost(CostBase, PoseCostConfig):
CostBase.__init__(self)
self.rot_weight = self.vec_weight[0:3]
self.pos_weight = self.vec_weight[3:6]
self._vec_convergence = self.tensor_args.to_device(self.vec_convergence)
self._batch_size = 0
self._horizon = 0
def update_metric(self, metric: PoseCostMetric):
if metric.hold_partial_pose:
if metric.hold_vec_weight is None:
log_error("hold_vec_weight is required")
self.hold_partial_pose(metric.hold_vec_weight)
if metric.release_partial_pose:
self.release_partial_pose()
if metric.reach_partial_pose:
if metric.reach_vec_weight is None:
log_error("reach_vec_weight is required")
self.reach_partial_pose(metric.reach_vec_weight)
if metric.reach_full_pose:
self.reach_full_pose()
if metric.remove_offset_waypoint:
self.remove_offset_waypoint()
if metric.offset_position is not None or metric.offset_rotation is not None:
self.update_offset_waypoint(
offset_position=self.offset_position,
offset_rotation=self.offset_rotation,
offset_tstep_fraction=self.offset_tstep_fraction,
)
def hold_partial_pose(self, run_vec_weight: torch.Tensor):
self.run_vec_weight.copy_(run_vec_weight)
def release_partial_pose(self):
self.run_vec_weight[:] = 0.0
def reach_partial_pose(self, vec_weight: torch.Tensor):
self.vec_weight[:] = vec_weight
def reach_full_pose(self):
self.vec_weight[:] = 1.0
def update_offset_waypoint(
self,
offset_position: Optional[torch.Tensor] = None,
offset_rotation: Optional[torch.Tensor] = None,
offset_tstep_fraction: float = 0.75,
):
if offset_position is not None:
self.offset_waypoint[3:].copy_(offset_position)
if offset_rotation is not None:
self.offset_waypoint[:3].copy_(offset_rotation)
self.offset_tstep_fraction[:] = offset_tstep_fraction
if self._horizon <= 0:
print(self.weight)
log_error(
"Updating offset waypoint is only possible after initializing motion gen"
+ " run motion_gen.warmup() before adding offset_waypoint"
)
self.update_run_weight(run_tstep_fraction=offset_tstep_fraction)
def remove_offset_waypoint(self):
self.offset_tstep_fraction[:] = -1.0
self.update_run_weight()
def update_run_weight(
self, run_tstep_fraction: float = 0.0, run_weight: Optional[float] = None
):
if self._horizon == 1:
return
if run_weight is None:
run_weight = self.run_weight
active_steps = math.floor(self._horizon * run_tstep_fraction)
self._run_weight_vec[:, :active_steps] = 0
self._run_weight_vec[:, active_steps:-1] = run_weight
def update_batch_size(self, batch_size, horizon):
if batch_size != self._batch_size or horizon != self._horizon:
# print(self.weight)
# print(batch_size, horizon, self._batch_size, self._horizon)
# batch_size = b*h
self.out_distance = torch.zeros(
(batch_size, horizon), device=self.tensor_args.device, dtype=self.tensor_args.dtype
@@ -493,12 +269,16 @@ class PoseCost(CostBase, PoseCostConfig):
self._run_weight_vec = torch.ones(
(1, horizon), device=self.tensor_args.device, dtype=self.tensor_args.dtype
)
if self.terminal and self.run_weight is not None:
self._run_weight_vec[:, :-1] *= self.run_weight
if self.terminal and self.run_weight is not None and horizon > 1:
self._run_weight_vec[:, :-1] = self.run_weight
self._batch_size = batch_size
self._horizon = horizon
@property
def goalset_index_buffer(self):
return self.out_idx
def _forward_goal_distribution(self, ee_pos_batch, ee_rot_batch, ee_goal_pos, ee_goal_rot):
ee_goal_pos = ee_goal_pos.unsqueeze(1)
ee_goal_pos = ee_goal_pos.unsqueeze(1)
@@ -563,13 +343,13 @@ class PoseCost(CostBase, PoseCostConfig):
def _update_cost_type(self, ee_goal_pos, ee_pos_batch, num_goals):
d_g = len(ee_goal_pos.shape)
b_sze = ee_goal_pos.shape[0]
if d_g == 2 and b_sze == 1: # 1, 3
if d_g == 2 and b_sze == 1: # [1, 3]
self.cost_type = PoseErrorType.SINGLE_GOAL
elif d_g == 2 and b_sze == ee_pos_batch.shape[0]: # b, 3
elif d_g == 2 and b_sze > 1: # [b, 3]
self.cost_type = PoseErrorType.BATCH_GOAL
elif d_g == 3:
elif d_g == 3 and b_sze == 1: # [1, goalset, 3]
self.cost_type = PoseErrorType.GOALSET
elif len(ee_goal_pos.shape) == 4 and b_sze == ee_pos_bath.shape[0]:
elif d_g == 3 and b_sze > 1: # [b, goalset,3]
self.cost_type = PoseErrorType.BATCH_GOALSET
def forward_out_distance(
@@ -599,6 +379,8 @@ class PoseCost(CostBase, PoseCostConfig):
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
self.offset_waypoint,
self.offset_tstep_fraction,
goal.batch_pose_idx,
self.out_distance,
self.out_position_distance,
@@ -613,7 +395,9 @@ class PoseCost(CostBase, PoseCostConfig):
self.cost_type.value,
num_goals,
self.use_metric,
self.project_distance,
)
# print(self.out_idx.shape, self.out_idx[:,-1])
# print(goal.batch_pose_idx.shape)
cost = distance # .view(b, h)#.clone()
r_err = r_err # .view(b, h)
@@ -632,65 +416,46 @@ class PoseCost(CostBase, PoseCostConfig):
ee_goal_rot = goal_pose.quaternion
num_goals = goal_pose.n_goalset
self._update_cost_type(ee_goal_pos, ee_pos_batch, num_goals)
# print(self.cost_type)
b, h, _ = ee_pos_batch.shape
self.update_batch_size(b, h)
# return self.out_distance
# print(b,h, ee_goal_pos.shape)
if self.return_loss:
distance = PoseLoss.apply(
ee_pos_batch,
ee_goal_pos,
ee_rot_batch, # .view(-1, 4).contiguous(),
ee_goal_rot,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
goal.batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
)
else:
distance = PoseError.apply(
ee_pos_batch,
ee_goal_pos,
ee_rot_batch, # .view(-1, 4).contiguous(),
ee_goal_rot,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
goal.batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
)
distance = PoseError.apply(
ee_pos_batch,
ee_goal_pos,
ee_rot_batch, # .view(-1, 4).contiguous(),
ee_goal_rot,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
self.offset_waypoint,
self.offset_tstep_fraction,
goal.batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
self.project_distance,
self.return_loss,
)
cost = distance
# if link_name is None and cost.shape[0]==8:
# print(ee_pos_batch[...,-1].squeeze())
# print(cost.shape)
return cost
def forward_pose(
@@ -708,56 +473,34 @@ class PoseCost(CostBase, PoseCostConfig):
b = query_pose.position.shape[0]
h = query_pose.position.shape[1]
num_goals = 1
if self.return_loss:
distance = PoseLoss.apply(
query_pose.position.unsqueeze(1),
ee_goal_pos,
query_pose.quaternion.unsqueeze(1),
ee_goal_quat,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
)
else:
distance = PoseError.apply(
query_pose.position.unsqueeze(1),
ee_goal_pos,
query_pose.quaternion.unsqueeze(1),
ee_goal_quat,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
)
distance = PoseError.apply(
query_pose.position.unsqueeze(1),
ee_goal_pos,
query_pose.quaternion.unsqueeze(1),
ee_goal_quat,
self.vec_weight,
self.weight,
self._vec_convergence,
self._run_weight_vec,
self.run_vec_weight,
self.offset_waypoint,
self.offset_tstep_fraction,
batch_pose_idx,
self.out_distance,
self.out_position_distance,
self.out_rotation_distance,
self.out_p_vec,
self.out_q_vec,
self.out_idx,
self.out_p_grad,
self.out_q_grad,
b,
h,
self.cost_type.value,
num_goals,
self.use_metric,
self.project_distance,
self.return_loss,
)
return distance

View File

@@ -68,9 +68,14 @@ class StopCost(CostBase, StopCostConfig):
self.max_vel = (sum_matrix @ delta_vel).unsqueeze(-1)
def forward(self, vels):
vel_abs = torch.abs(vels)
vel_abs = torch.nn.functional.relu(vel_abs - self.max_vel)
cost = self.weight * (torch.sum(vel_abs**2, dim=-1))
cost = velocity_cost(vels, self.weight, self.max_vel)
return cost
@torch.jit.script
def velocity_cost(vels, weight, max_vel):
vel_abs = torch.abs(vels)
vel_abs = torch.nn.functional.relu(vel_abs - max_vel[: vels.shape[1]])
cost = weight * (torch.sum(vel_abs**2, dim=-1))
return cost

View File

@@ -23,7 +23,7 @@ import torch
# CuRobo
from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
from curobo.types.robot import CSpaceConfig, State, JointState
from curobo.types.robot import CSpaceConfig, State
from curobo.types.tensor import (
T_BDOF,
T_DOF,
@@ -33,6 +33,7 @@ from curobo.types.tensor import (
T_BValue_float,
)
from curobo.util.helpers import list_idx_if_not_none
from curobo.util.logger import log_info
from curobo.util.sample_lib import HaltonGenerator
from curobo.util.tensor_util import copy_tensor
@@ -235,6 +236,7 @@ class Goal(Sequence):
batch_retract_state_idx=self.batch_retract_state_idx,
batch_goal_state_idx=self.batch_goal_state_idx,
links_goal_pose=self.links_goal_pose,
n_goalset=self.n_goalset,
)
def _tensor_repeat_seeds(self, tensor, num_seeds):
@@ -353,7 +355,7 @@ class Goal(Sequence):
def _copy_tensor(self, ref_buffer, buffer):
if buffer is not None:
if ref_buffer is not None:
if ref_buffer is not None and buffer.shape == ref_buffer.shape:
if not copy_tensor(buffer, ref_buffer):
ref_buffer = buffer.clone()
else:
@@ -553,6 +555,10 @@ class RolloutBase:
self._rollout_constraint_cuda_graph_init = False
if self.cu_rollout_constraint_graph is not None:
self.cu_rollout_constraint_graph.reset()
self.reset_shape()
def reset_shape(self):
pass
@abstractmethod
def get_action_from_state(self, state: State):

View File

@@ -16,10 +16,15 @@ from typing import List, Optional
# Third Party
import torch
from torch.profiler import record_function
# CuRobo
from curobo.geom.cv import (
get_projection_rays,
project_depth_using_rays,
project_pointcloud_to_depth,
)
from curobo.types.math import Pose
from curobo.util.logger import log_error, log_warn
@dataclass
@@ -30,25 +35,82 @@ class CameraObservation:
depth_image: Optional[torch.Tensor] = None
image_segmentation: Optional[torch.Tensor] = None
projection_matrix: Optional[torch.Tensor] = None
projection_rays: Optional[torch.Tensor] = None
resolution: Optional[List[int]] = None
pose: Optional[Pose] = None
intrinsics: Optional[torch.Tensor] = None
timestamp: float = 0.0
def filter_depth(self, distance: float = 0.01):
self.depth_image = torch.where(self.depth_image < distance, 0, self.depth_image)
@property
def shape(self):
return self.rgb_image.shape
@record_function("camera/copy_")
def copy_(self, new_data: CameraObservation):
self.rgb_image.copy_(new_data.rgb_image)
self.depth_image.copy_(new_data.depth_image)
self.image_segmentation.copy_(new_data.image_segmentation)
self.projection_matrix.copy_(new_data.projection_matrix)
if self.rgb_image is not None:
self.rgb_image.copy_(new_data.rgb_image)
if self.depth_image is not None:
self.depth_image.copy_(new_data.depth_image)
if self.image_segmentation is not None:
self.image_segmentation.copy_(new_data.image_segmentation)
if self.projection_matrix is not None:
self.projection_matrix.copy_(new_data.projection_matrix)
if self.projection_rays is not None:
self.projection_rays.copy_(new_data.projection_rays)
if self.pose is not None:
self.pose.copy_(new_data.pose)
self.resolution = new_data.resolution
@record_function("camera/clone")
def clone(self):
return CameraObservation(
depth_image=self.depth_image.clone() if self.depth_image is not None else None,
rgb_image=self.rgb_image.clone() if self.rgb_image is not None else None,
intrinsics=self.intrinsics.clone() if self.intrinsics is not None else None,
resolution=self.resolution,
pose=self.pose.clone() if self.pose is not None else None,
)
def to(self, device: torch.device):
if self.rgb_image is not None:
self.rgb_image = self.rgb_image.to(device=device)
if self.depth_image is not None:
self.depth_image = self.depth_image.to(device=device)
return self
def get_pointcloud(self):
if self.projection_rays is None:
self.update_projection_rays()
depth_image = self.depth_image
if len(self.depth_image.shape) == 2:
depth_image = self.depth_image.unsqueeze(0)
point_cloud = project_depth_using_rays(depth_image, self.projection_rays)
return point_cloud
def get_image_from_pointcloud(self, pointcloud, output_image: Optional[torch.Tensor] = None):
if output_image is None:
output_image = torch.zeros(
(self.depth_image.shape[0], self.depth_image.shape[1]),
dtype=pointcloud.dtype,
device=pointcloud.device,
)
depth_image = project_pointcloud_to_depth(pointcloud, output_image=output_image)
return depth_image
def update_projection_rays(self):
intrinsics = self.intrinsics
if len(intrinsics.shape) == 2:
intrinsics = intrinsics.unsqueeze(0)
project_rays = get_projection_rays(
self.depth_image.shape[-2], self.depth_image.shape[-1], intrinsics
)
if self.projection_rays is None:
self.projection_rays = project_rays
self.projection_rays.copy_(project_rays)

View File

@@ -19,6 +19,7 @@ import numpy as np
import torch
import torch.autograd.profiler as profiler
from torch.autograd import Function
from torch.profiler import record_function
# CuRobo
from curobo.geom.transform import (
@@ -33,7 +34,7 @@ from curobo.geom.transform import (
from curobo.types.base import TensorDeviceType
from curobo.util.helpers import list_idx_if_not_none
from curobo.util.logger import log_error, log_info, log_warn
from curobo.util.tensor_util import copy_if_not_none, copy_tensor
from curobo.util.tensor_util import clone_if_not_none, copy_tensor
# Local Folder
from .tensor import T_BPosition, T_BQuaternion, T_BRotation
@@ -256,10 +257,10 @@ class Pose(Sequence):
def clone(self):
return Pose(
position=copy_if_not_none(self.position),
quaternion=copy_if_not_none(self.quaternion),
position=clone_if_not_none(self.position),
quaternion=clone_if_not_none(self.quaternion),
normalize_rotation=False,
# rotation=copy_if_not_none(self.rotation),
# rotation=clone_if_not_none(self.rotation),
)
def to(self, tensor_args: TensorDeviceType):
@@ -408,6 +409,7 @@ class Pose(Sequence):
gpt_out,
)
@record_function("math/pose/transform_points")
def batch_transform_points(
self,
points: torch.Tensor,
@@ -432,6 +434,12 @@ class Pose(Sequence):
def shape(self):
return self.position.shape
def compute_offset_pose(self, offset: Pose) -> Pose:
return self.multiply(offset)
def compute_local_pose(self, world_pose: Pose) -> Pose:
return self.inverse().multiply(world_pose)
def quat_multiply(q1, q2, q_res):
a_w = q1[..., 0]

View File

@@ -25,7 +25,7 @@ from curobo.types.tensor import T_BDOF, T_DOF
from curobo.util.logger import log_error, log_info, log_warn
from curobo.util.tensor_util import (
check_tensor_shapes,
copy_if_not_none,
clone_if_not_none,
copy_tensor,
fd_tensor,
tensor_repeat_seeds,
@@ -121,12 +121,14 @@ class JointState(State):
def repeat_seeds(self, num_seeds: int):
return JointState(
position=tensor_repeat_seeds(self.position, num_seeds),
velocity=tensor_repeat_seeds(self.velocity, num_seeds)
if self.velocity is not None
else None,
acceleration=tensor_repeat_seeds(self.acceleration, num_seeds)
if self.acceleration is not None
else None,
velocity=(
tensor_repeat_seeds(self.velocity, num_seeds) if self.velocity is not None else None
),
acceleration=(
tensor_repeat_seeds(self.acceleration, num_seeds)
if self.acceleration is not None
else None
),
joint_names=self.joint_names,
)
@@ -153,10 +155,10 @@ class JointState(State):
if self.joint_names is not None:
j_names = self.joint_names.copy()
return JointState(
position=copy_if_not_none(self.position),
velocity=copy_if_not_none(self.velocity),
acceleration=copy_if_not_none(self.acceleration),
jerk=copy_if_not_none(self.jerk),
position=clone_if_not_none(self.position),
velocity=clone_if_not_none(self.velocity),
acceleration=clone_if_not_none(self.acceleration),
jerk=clone_if_not_none(self.jerk),
joint_names=j_names,
tensor_args=self.tensor_args,
)

View File

@@ -0,0 +1,64 @@
#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Standard Library
from dataclasses import dataclass
from typing import List, Optional
# Third Party
import numpy as np
try:
# Third Party
from robometrics.statistics import (
Statistic,
TrajectoryGroupMetrics,
TrajectoryMetrics,
percent_true,
)
except ImportError:
raise ImportError(
"Benchmarking library not found, pip install "
+ '"robometrics[evaluator] @ git+https://github.com/fishbotics/robometrics.git"'
)
@dataclass
class CuroboMetrics(TrajectoryMetrics):
time: float = np.inf
cspace_path_length: float = 0.0
perception_success: bool = False
perception_interpolated_success: bool = False
jerk: float = np.inf
@dataclass
class CuroboGroupMetrics(TrajectoryGroupMetrics):
time: float = np.inf
cspace_path_length: Optional[Statistic] = None
perception_success: float = 0.0
perception_interpolated_success: float = 0.0
jerk: float = np.inf
@classmethod
def from_list(cls, group: List[CuroboMetrics]):
unskipped = [m for m in group if not m.skip]
successes = [m for m in unskipped if m.success]
data = super().from_list(group)
data.time = Statistic.from_list([m.time for m in successes])
data.cspace_path_length = Statistic.from_list([m.cspace_path_length for m in successes])
data.perception_success = percent_true([m.perception_success for m in group])
data.perception_interpolated_success = percent_true(
[m.perception_interpolated_success for m in group]
)
data.jerk = Statistic.from_list([m.jerk for m in successes])
return data

View File

@@ -111,7 +111,7 @@ class HaltonSampleLib(BaseSampleLib):
super().__init__(sample_config)
# create halton generator:
self.halton_generator = HaltonGenerator(
self.ndims, seed=self.seed, tensor_args=self.tensor_args
self.d_action, seed=self.seed, tensor_args=self.tensor_args
)
def get_samples(self, sample_shape, base_seed=None, filter_smooth=False, **kwargs):
@@ -122,8 +122,10 @@ class HaltonSampleLib(BaseSampleLib):
seed = self.seed if base_seed is None else base_seed
self.sample_shape = sample_shape
self.seed = seed
self.samples = self.halton_generator.get_gaussian_samples(sample_shape[0])
self.samples = self.samples.view(self.samples.shape[0], self.horizon, self.d_action)
self.samples = self.halton_generator.get_gaussian_samples(
sample_shape[0] * self.horizon
)
self.samples = self.samples.view(sample_shape[0], self.horizon, self.d_action)
if filter_smooth:
self.samples = self.filter_smooth(self.samples)
@@ -301,7 +303,7 @@ class StompSampleLib(BaseSampleLib):
self.filter_coeffs = None
self.halton_generator = HaltonGenerator(
self.ndims, seed=self.seed, tensor_args=self.tensor_args
self.d_action, seed=self.seed, tensor_args=self.tensor_args
)
def get_samples(self, sample_shape, base_seed=None, **kwargs):
@@ -314,7 +316,10 @@ class StompSampleLib(BaseSampleLib):
# self.seed = seed
# torch.manual_seed(self.seed)
halton_samples = self.halton_generator.get_gaussian_samples(sample_shape[0])
halton_samples = self.halton_generator.get_gaussian_samples(
sample_shape[0] * self.horizon
).view(sample_shape[0], self.horizon * self.d_action)
halton_samples = (
self.stomp_scale_tril.unsqueeze(0) @ halton_samples.unsqueeze(-1)
).squeeze(-1)
@@ -415,7 +420,7 @@ class HaltonGenerator:
up_bounds=[1],
low_bounds=[0],
seed=123,
store_buffer: Optional[int] = 1000,
store_buffer: Optional[int] = 2000,
):
self._seed = seed
self.tensor_args = tensor_args
@@ -499,7 +504,7 @@ class HaltonGenerator:
@profiler.record_function("halton_generator/gaussian_samples")
def get_gaussian_samples(self, num_samples, variance=1.0):
std_dev = np.sqrt(variance)
uniform_samples = self.get_samples(num_samples)
uniform_samples = self.get_samples(num_samples, False)
gaussian_halton_samples = gaussian_transform(
uniform_samples, self.proj_mat, self.i_mat, std_dev
)
@@ -508,7 +513,7 @@ class HaltonGenerator:
@torch.jit.script
def gaussian_transform(
uniform_samples: torch.Tensor, proj_mat: torch.Tensor, i_mat: torch.Tensor, variance: float
uniform_samples: torch.Tensor, proj_mat: torch.Tensor, i_mat: torch.Tensor, std_dev: float
):
"""Compute a guassian transform of uniform samples.
@@ -525,7 +530,7 @@ def gaussian_transform(
# these values.
changed_samples = 1.99 * uniform_samples - 0.99
gaussian_halton_samples = proj_mat * torch.erfinv(changed_samples)
i_mat = i_mat * variance
i_mat = i_mat * std_dev
gaussian_halton_samples = torch.matmul(gaussian_halton_samples, i_mat)
return gaussian_halton_samples

View File

@@ -31,11 +31,29 @@ def copy_tensor(new_tensor: torch.Tensor, mem_tensor: torch.Tensor):
return False
def copy_if_not_none(x):
def copy_if_not_none(new_tensor, ref_tensor):
"""Clones x if it's not None.
TODO: Rename this to clone_if_not_none
Args:
x (torch.Tensor): _description_
Returns:
_type_: _description_
"""
if ref_tensor is not None and new_tensor is not None:
ref_tensor.copy_(new_tensor)
elif ref_tensor is None and new_tensor is not None:
ref_tensor = new_tensor
return ref_tensor
def clone_if_not_none(x):
"""Clones x if it's not None.
Args:
x (torch.Tensor): _description_

View File

@@ -34,3 +34,10 @@ def is_cuda_graph_available():
log_warn("WARNING: Disabling CUDA Graph as pytorch < 1.10")
return False
return True
def is_torch_compile_available():
if version.parse(torch.__version__) < version.parse("2.0"):
log_warn("WARNING: Disabling compile as pytorch < 2.0")
return False
return True

View File

@@ -16,7 +16,6 @@ from typing import Dict, List, Optional, Union
# Third Party
import numpy as np
import torch
from pxr import Gf, Sdf, Usd, UsdGeom, UsdPhysics, UsdShade
from tqdm import tqdm
# CuRobo
@@ -47,6 +46,15 @@ from curobo.util_file import (
)
from curobo.wrap.reacher.motion_gen import MotionGenResult
try:
# Third Party
from pxr import Gf, Sdf, Usd, UsdGeom, UsdPhysics, UsdShade
except ImportError:
raise ImportError(
"usd-core failed to import, install with pip install usd-core"
+ " NOTE: Do not install this if using with ISAAC SIM."
)
def set_prim_translate(prim, translation):
UsdGeom.Xformable(prim).AddTranslateOp().Set(Gf.Vec3d(translation))

View File

@@ -24,5 +24,5 @@ def init_warp(quiet=True, tensor_args: TensorDeviceType = TensorDeviceType()):
# wp.config.enable_backward = True
wp.init()
wp.force_load(wp.device_from_torch(tensor_args.device))
# wp.force_load(wp.device_from_torch(tensor_args.device))
return True

View File

@@ -0,0 +1,204 @@
#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Standard Library
from typing import Dict, Optional, Tuple, Union
# Third Party
import torch
from torch.profiler import record_function
# CuRobo
from curobo.geom.cv import (
get_projection_rays,
project_depth_using_rays,
)
from curobo.geom.types import PointCloud
from curobo.types.base import TensorDeviceType
from curobo.types.camera import CameraObservation
from curobo.types.math import Pose
from curobo.types.robot import RobotConfig
from curobo.types.state import JointState
from curobo.util.logger import log_error
from curobo.util_file import get_robot_configs_path, join_path, load_yaml
from curobo.wrap.model.robot_world import RobotWorld, RobotWorldConfig
class RobotSegmenter:
def __init__(
self,
robot_world: RobotWorld,
distance_threshold: float = 0.05,
use_cuda_graph: bool = True,
):
self._robot_world = robot_world
self._projection_rays = None
self.ready = False
self._out_points_buffer = None
self._out_gp = None
self._out_gq = None
self._out_gpt = None
self._cu_graph = None
self._use_cuda_graph = use_cuda_graph
self.tensor_args = robot_world.tensor_args
self.distance_threshold = distance_threshold
@staticmethod
def from_robot_file(
robot_file: Union[str, Dict],
collision_sphere_buffer: Optional[float],
distance_threshold: float = 0.05,
use_cuda_graph: bool = True,
tensor_args: TensorDeviceType = TensorDeviceType(),
):
robot_dict = load_yaml(join_path(get_robot_configs_path(), robot_file))["robot_cfg"]
if collision_sphere_buffer is not None:
robot_dict["kinematics"]["collision_sphere_buffer"] = collision_sphere_buffer
robot_cfg = RobotConfig.from_dict(robot_dict, tensor_args=tensor_args)
config = RobotWorldConfig.load_from_config(
robot_cfg,
None,
collision_activation_distance=0.0,
tensor_args=tensor_args,
)
robot_world = RobotWorld(config)
return RobotSegmenter(
robot_world, distance_threshold=distance_threshold, use_cuda_graph=use_cuda_graph
)
def get_pointcloud_from_depth(self, camera_obs: CameraObservation):
if self._projection_rays is None:
self.update_camera_projection(camera_obs)
depth_image = camera_obs.depth_image
if len(depth_image.shape) == 2:
depth_image = depth_image.unsqueeze(0)
points = project_depth_using_rays(depth_image, self._projection_rays)
return points
def update_camera_projection(self, camera_obs: CameraObservation):
intrinsics = camera_obs.intrinsics
if len(intrinsics.shape) == 2:
intrinsics = intrinsics.unsqueeze(0)
project_rays = get_projection_rays(
camera_obs.depth_image.shape[-2], camera_obs.depth_image.shape[-1], intrinsics
)
if self._projection_rays is None:
self._projection_rays = project_rays
self._projection_rays.copy_(project_rays)
self.ready = True
@record_function("robot_segmenter/get_robot_mask")
def get_robot_mask(
self,
camera_obs: CameraObservation,
joint_state: JointState,
):
"""
Assumes 1 robot and batch of depth images, batch of poses
"""
if len(camera_obs.depth_image.shape) != 3:
log_error("Send depth image as (batch, height, width)")
active_js = self._robot_world.get_active_js(joint_state)
mask, filtered_image = self.get_robot_mask_from_active_js(camera_obs, active_js)
return mask, filtered_image
def get_robot_mask_from_active_js(
self, camera_obs: CameraObservation, active_joint_state: JointState
):
q = active_joint_state.position
mask, filtered_image = self._call_op(camera_obs, q)
return mask, filtered_image
def _create_cg_graph(self, cam_obs, q):
self._cu_cam_obs = cam_obs.clone()
self._cu_q = q.clone()
s = torch.cuda.Stream(device=self.tensor_args.device)
s.wait_stream(torch.cuda.current_stream(device=self.tensor_args.device))
with torch.cuda.stream(s):
for _ in range(3):
self._cu_out, self._cu_filtered_out = self._mask_op(self._cu_cam_obs, self._cu_q)
torch.cuda.current_stream(device=self.tensor_args.device).wait_stream(s)
self._cu_graph = torch.cuda.CUDAGraph()
with torch.cuda.graph(self._cu_graph, stream=s):
self._cu_out, self._cu_filtered_out = self._mask_op(
self._cu_cam_obs,
self._cu_q,
)
def _call_op(self, cam_obs, q):
if self._use_cuda_graph:
if self._cu_graph is None:
self._create_cg_graph(cam_obs, q)
self._cu_cam_obs.copy_(cam_obs)
self._cu_q.copy_(q)
self._cu_graph.replay()
return self._cu_out.clone(), self._cu_filtered_out.clone()
return self._mask_op(cam_obs, q)
@record_function("robot_segmenter/_mask_op")
def _mask_op(self, camera_obs, q):
if len(q.shape) == 1:
q = q.unsqueeze(0)
points = self.get_pointcloud_from_depth(camera_obs)
camera_to_robot = camera_obs.pose
if self._out_points_buffer is None:
self._out_points_buffer = points.clone()
if self._out_gpt is None:
self._out_gpt = torch.zeros((points.shape[0], points.shape[1], 3), device=points.device)
if self._out_gp is None:
self._out_gp = torch.zeros((camera_to_robot.position.shape[0], 3), device=points.device)
if self._out_gq is None:
self._out_gq = torch.zeros(
(camera_to_robot.quaternion.shape[0], 4), device=points.device
)
points_in_robot_frame = camera_to_robot.batch_transform_points(
points,
out_buffer=self._out_points_buffer,
gp_out=self._out_gp,
gq_out=self._out_gq,
gpt_out=self._out_gpt,
)
out_points = points_in_robot_frame
dist = self._robot_world.get_point_robot_distance(out_points, q)
mask, filtered_image = mask_image(camera_obs.depth_image, dist, self.distance_threshold)
return mask, filtered_image
@torch.jit.script
def mask_image(
image: torch.Tensor, distance: torch.Tensor, distance_threshold: float
) -> Tuple[torch.Tensor, torch.Tensor]:
distance = distance.view(
image.shape[0],
image.shape[1],
image.shape[2],
)
mask = torch.logical_and((image > 0.0), (distance > -distance_threshold))
filtered_image = torch.where(mask, 0, image)
return mask, filtered_image

View File

@@ -34,6 +34,7 @@ from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
from curobo.types.robot import RobotConfig
from curobo.types.state import JointState
from curobo.util.logger import log_error
from curobo.util.sample_lib import HaltonGenerator
from curobo.util.warp import init_warp
from curobo.util_file import get_robot_configs_path, get_world_configs_path, join_path, load_yaml
@@ -60,8 +61,8 @@ class RobotWorldConfig:
world_model: Union[None, str, Dict, WorldConfig, List[WorldConfig], List[str]] = None,
tensor_args: TensorDeviceType = TensorDeviceType(),
n_envs: int = 1,
n_meshes: int = 10,
n_cuboids: int = 10,
n_meshes: int = 50,
n_cuboids: int = 50,
collision_activation_distance: float = 0.2,
self_collision_activation_distance: float = 0.0,
max_collision_distance: float = 1.0,
@@ -74,6 +75,8 @@ class RobotWorldConfig:
if isinstance(robot_config, str):
robot_config = load_yaml(join_path(get_robot_configs_path(), robot_config))["robot_cfg"]
if isinstance(robot_config, Dict):
if "robot_cfg" in robot_config:
robot_config = robot_config["robot_cfg"]
robot_config = RobotConfig.from_dict(robot_config, tensor_args)
kinematics = CudaRobotModel(robot_config.kinematics)
@@ -178,8 +181,11 @@ class RobotWorld(RobotWorldConfig):
def __init__(self, config: RobotWorldConfig) -> None:
RobotWorldConfig.__init__(self, **vars(config))
self._batch_pose_idx = None
self._camera_projection_rays = None
def get_kinematics(self, q: torch.Tensor) -> CudaRobotModelState:
if len(q.shape) == 1:
log_error("q should be of shape [b, dof]")
state = self.kinematics.get_state(q)
return state
@@ -344,6 +350,37 @@ class RobotWorld(RobotWorldConfig):
distance = self.pose_cost.forward_pose(x_des, x_current, self._batch_pose_idx)
return distance
def get_point_robot_distance(self, points: torch.Tensor, q: torch.Tensor):
"""Compute distance from the robot at q joint configuration to points (e.g., pointcloud)
Args:
points: [b,n,3]
q: [1, dof]
Returns:
distance: [b,1] Positive is in collision with robot
NOTE: This currently does not support batched robot but can be done easily.
"""
if len(q.shape) == 1:
log_error("q should be of shape [b, dof]")
kin_state = self.get_kinematics(q)
b, n = None, None
if len(points.shape) == 3:
b, n, _ = points.shape
points = points.view(b * n, 3)
pt_distance = point_robot_distance(kin_state.link_spheres_tensor, points)
if b is not None:
pt_distance = pt_distance.view(b, n)
return pt_distance
def get_active_js(self, full_js: JointState):
active_jnames = self.kinematics.joint_names
out_js = full_js.get_ordered_joint_state(active_jnames)
return out_js
@torch.jit.script
def sum_mask(d1, d2, d3):
@@ -357,3 +394,15 @@ def mask(d1, d2, d3):
d_total = d1 + d2 + d3
d_mask = d_total == 0.0
return d_mask
@torch.jit.script
def point_robot_distance(link_spheres_tensor, points):
robot_spheres = link_spheres_tensor.view(1, -1, 4).contiguous()
robot_radius = robot_spheres[:, :, 3]
points = points.unsqueeze(1)
sph_distance = (
torch.linalg.norm(points - robot_spheres[:, :, :3], dim=-1) - robot_radius
) # b, n_spheres
pt_distance = torch.max(-1 * sph_distance, dim=-1)[0]
return pt_distance

View File

@@ -52,7 +52,8 @@ def smooth_cost(abs_acc, abs_jerk, opt_dt):
# jerk = torch.max(torch.max(abs_jerk, dim=-1)[0], dim=-1)[0]
jerk = torch.mean(torch.max(abs_jerk, dim=-1)[0], dim=-1)
mean_acc = torch.mean(torch.max(abs_acc, dim=-1)[0], dim=-1) # [0]
a = (jerk * 0.001) + opt_dt + (mean_acc * 0.01)
a = (jerk * 0.001) + 5.0 * opt_dt + (mean_acc * 0.01)
return a

View File

@@ -26,6 +26,7 @@ from curobo.opt.newton.newton_base import NewtonOptBase, NewtonOptConfig
from curobo.opt.particle.parallel_es import ParallelES, ParallelESConfig
from curobo.opt.particle.parallel_mppi import ParallelMPPI, ParallelMPPIConfig
from curobo.rollout.arm_reacher import ArmReacher, ArmReacherConfig
from curobo.rollout.cost.pose_cost import PoseCostMetric
from curobo.rollout.rollout_base import Goal, RolloutBase, RolloutMetrics
from curobo.types.base import TensorDeviceType
from curobo.types.math import Pose
@@ -56,6 +57,7 @@ class IKSolverConfig:
world_coll_checker: Optional[WorldCollision] = None
sample_rejection_ratio: int = 50
tensor_args: TensorDeviceType = TensorDeviceType()
use_cuda_graph: bool = True
@staticmethod
@profiler.record_function("ik_solver/load_from_robot_config")
@@ -72,12 +74,12 @@ class IKSolverConfig:
base_cfg_file: str = "base_cfg.yml",
particle_file: str = "particle_ik.yml",
gradient_file: str = "gradient_ik.yml",
use_cuda_graph: Optional[bool] = None,
use_cuda_graph: bool = True,
self_collision_check: bool = True,
self_collision_opt: bool = True,
grad_iters: Optional[int] = None,
use_particle_opt: bool = True,
collision_checker_type: Optional[CollisionCheckerType] = CollisionCheckerType.PRIMITIVE,
collision_checker_type: Optional[CollisionCheckerType] = CollisionCheckerType.MESH,
sync_cuda_time: Optional[bool] = None,
use_gradient_descent: bool = False,
collision_cache: Optional[Dict[str, int]] = None,
@@ -90,6 +92,7 @@ class IKSolverConfig:
regularization: bool = True,
collision_activation_distance: Optional[float] = None,
high_precision: bool = False,
project_pose_to_goal_frame: bool = True,
):
if position_threshold <= 0.001:
high_precision = True
@@ -116,6 +119,9 @@ class IKSolverConfig:
base_config_data["convergence"]["cspace_cfg"]["weight"] = 0.0
config_data["cost"]["bound_cfg"]["null_space_weight"] = 0.0
grad_config_data["cost"]["bound_cfg"]["null_space_weight"] = 0.0
if isinstance(robot_cfg, str):
robot_cfg = load_yaml(join_path(get_robot_configs_path(), robot_cfg))["robot_cfg"]
if ee_link_name is not None:
if isinstance(robot_cfg, RobotConfig):
raise NotImplementedError("ee link cannot be changed after creating RobotConfig")
@@ -123,8 +129,6 @@ class IKSolverConfig:
robot_cfg.kinematics.ee_link = ee_link_name
else:
robot_cfg["kinematics"]["ee_link"] = ee_link_name
if isinstance(robot_cfg, str):
robot_cfg = load_yaml(join_path(get_robot_configs_path(), robot_cfg))["robot_cfg"]
if isinstance(robot_cfg, dict):
robot_cfg = RobotConfig.from_dict(robot_cfg, tensor_args)
@@ -160,8 +164,8 @@ class IKSolverConfig:
grad_config_data["cost"]["self_collision_cfg"]["weight"] = 0.0
if grad_iters is not None:
grad_config_data["lbfgs"]["n_iters"] = grad_iters
config_data["mppi"]["n_envs"] = 1
grad_config_data["lbfgs"]["n_envs"] = 1
config_data["mppi"]["n_problems"] = 1
grad_config_data["lbfgs"]["n_problems"] = 1
grad_cfg = ArmReacherConfig.from_dict(
robot_cfg,
grad_config_data["model"],
@@ -241,6 +245,7 @@ class IKSolverConfig:
world_coll_checker=world_coll_checker,
rollout_fn=aux_rollout,
tensor_args=tensor_args,
use_cuda_graph=use_cuda_graph,
)
return ik_cfg
@@ -259,6 +264,7 @@ class IKResult(Sequence):
error: T_BValue_float
solve_time: float
debug_info: Optional[Any] = None
goalset_index: Optional[torch.Tensor] = None
def __getitem__(self, idx):
success = self.success[idx]
@@ -272,6 +278,7 @@ class IKResult(Sequence):
position_error=self.position_error[idx],
rotation_error=self.rotation_error[idx],
debug_info=self.debug_info,
goalset_index=None if self.goalset_index is None else self.goalset_index[idx],
)
def __len__(self):
@@ -317,7 +324,7 @@ class IKSolver(IKSolverConfig):
self.solver.rollout_fn.retract_state.unsqueeze(0)
)
self.dof = self.solver.safety_rollout.d_action
self._col = torch.arange(0, 1, device=self.tensor_args.device, dtype=torch.long)
self._col = None # torch.arange(0, 1, device=self.tensor_args.device, dtype=torch.long)
# self.fixed_seeds = self.solver.safety_rollout.sample_random_actions(100 * 200)
# create random seeder:
@@ -355,10 +362,14 @@ class IKSolver(IKSolverConfig):
self._goal_buffer,
self.tensor_args,
)
# print("Goal:", self._goal_buffer.links_goal_pose)
if update_reference:
self.solver.update_nenvs(self._solve_state.get_ik_batch_size())
self.reset_cuda_graph()
self.reset_shape()
if self.use_cuda_graph and self._col is not None:
log_error("changing goal type, breaking previous cuda graph.")
self.reset_cuda_graph()
self.solver.update_nproblems(self._solve_state.get_ik_batch_size())
self._goal_buffer.current_state = self.init_state.repeat_seeds(goal_pose.batch)
self._col = torch.arange(
0,
@@ -676,6 +687,8 @@ class IKSolver(IKSolverConfig):
if newton_iters is not None:
self.solver.newton_optimizer.outer_iters = self.og_newton_iters
ik_result = self.get_result(num_seeds, result, goal_buffer.goal_pose, return_seeds)
if ik_result.goalset_index is not None:
ik_result.goalset_index[ik_result.goalset_index >= goal_pose.n_goalset] = 0
return ik_result
@@ -684,15 +697,18 @@ class IKSolver(IKSolverConfig):
self, num_seeds: int, result: WrapResult, goal_pose: Pose, return_seeds: int
) -> IKResult:
success = self.get_success(result.metrics, num_seeds=num_seeds)
if result.metrics.cost is not None:
result.metrics.pose_error += result.metrics.cost
# if result.metrics.cost is not None:
# result.metrics.pose_error += result.metrics.cost * 0.0001
if result.metrics.null_space_error is not None:
result.metrics.pose_error += result.metrics.null_space_error
if result.metrics.cspace_error is not None:
result.metrics.pose_error += result.metrics.cspace_error
q_sol, success, position_error, rotation_error, total_error = get_result(
q_sol, success, position_error, rotation_error, total_error, goalset_index = get_result(
result.metrics.pose_error,
result.metrics.position_error,
result.metrics.rotation_error,
result.metrics.goalset_index,
success,
result.action.position,
self._col,
@@ -717,6 +733,7 @@ class IKSolver(IKSolverConfig):
solve_time=result.solve_time,
error=total_error,
debug_info={"solver": result.debug},
goalset_index=goalset_index,
)
return ik_result
@@ -959,6 +976,10 @@ class IKSolver(IKSolverConfig):
self.solver.reset_cuda_graph()
self.rollout_fn.reset_cuda_graph()
def reset_shape(self):
self.solver.reset_shape()
self.rollout_fn.reset_shape()
def attach_object_to_robot(
self,
sphere_radius: float,
@@ -977,6 +998,17 @@ class IKSolver(IKSolverConfig):
def get_retract_config(self):
return self.rollout_fn.dynamics_model.retract_config
def update_pose_cost_metric(
self,
metric: PoseCostMetric,
):
rollouts = self.get_all_rollout_instances()
[
rollout.update_pose_cost_metric(metric)
for rollout in rollouts
if isinstance(rollout, ArmReacher)
]
@torch.jit.script
def get_success(
@@ -1001,6 +1033,7 @@ def get_result(
pose_error,
position_error,
rotation_error,
goalset_index: Union[torch.Tensor, None],
success,
sol_position,
col,
@@ -1018,4 +1051,6 @@ def get_result(
position_error = position_error[idx].view(batch_size, return_seeds)
rotation_error = rotation_error[idx].view(batch_size, return_seeds)
total_error = position_error + rotation_error
return q_sol, success, position_error, rotation_error, total_error
if goalset_index is not None:
goalset_index = goalset_index[idx].view(batch_size, return_seeds)
return q_sol, success, position_error, rotation_error, total_error, goalset_index

File diff suppressed because it is too large Load Diff

View File

@@ -80,7 +80,7 @@ class MpcSolverConfig:
task_file = "particle_mpc.yml"
config_data = load_yaml(join_path(get_task_configs_path(), task_file))
config_data["mppi"]["n_envs"] = 1
config_data["mppi"]["n_problems"] = 1
if step_dt is not None:
config_data["model"]["dt_traj_params"]["base_dt"] = step_dt
if particle_opt_iters is not None:
@@ -238,7 +238,7 @@ class MpcSolver(MpcSolverConfig):
self.tensor_args,
)
if update_reference:
self.solver.update_nenvs(self._solve_state.get_batch_size())
self.solver.update_nproblems(self._solve_state.get_batch_size())
self.reset()
self.reset_cuda_graph()
self._col = torch.arange(

View File

@@ -30,6 +30,7 @@ from curobo.opt.newton.newton_base import NewtonOptBase, NewtonOptConfig
from curobo.opt.particle.parallel_es import ParallelES, ParallelESConfig
from curobo.opt.particle.parallel_mppi import ParallelMPPI, ParallelMPPIConfig
from curobo.rollout.arm_reacher import ArmReacher, ArmReacherConfig
from curobo.rollout.cost.pose_cost import PoseCostMetric
from curobo.rollout.dynamics_model.integration_utils import (
action_interpolate_kernel,
interpolate_kernel,
@@ -39,7 +40,7 @@ from curobo.types.base import TensorDeviceType
from curobo.types.robot import JointState, RobotConfig
from curobo.types.tensor import T_BDOF, T_DOF, T_BValue_bool, T_BValue_float
from curobo.util.helpers import list_idx_if_not_none
from curobo.util.logger import log_info, log_warn
from curobo.util.logger import log_error, log_info, log_warn
from curobo.util.trajectory import (
InterpolateType,
calculate_dt_no_clamp,
@@ -78,6 +79,7 @@ class TrajOptSolverConfig:
trim_steps: Optional[List[int]] = None
store_debug_in_result: bool = False
optimize_dt: bool = True
use_cuda_graph: bool = True
@staticmethod
@profiler.record_function("trajopt_config/load_from_robot_config")
@@ -98,14 +100,14 @@ class TrajOptSolverConfig:
interpolation_type: InterpolateType = InterpolateType.LINEAR_CUDA,
interpolation_steps: int = 10000,
interpolation_dt: float = 0.01,
use_cuda_graph: Optional[bool] = None,
use_cuda_graph: bool = True,
self_collision_check: bool = False,
self_collision_opt: bool = True,
grad_trajopt_iters: Optional[int] = None,
num_seeds: int = 2,
seed_ratio: Dict[str, int] = {"linear": 1.0, "bias": 0.0, "start": 0.0, "end": 0.0},
use_particle_opt: bool = True,
collision_checker_type: Optional[CollisionCheckerType] = CollisionCheckerType.PRIMITIVE,
collision_checker_type: Optional[CollisionCheckerType] = CollisionCheckerType.MESH,
traj_evaluator_config: TrajEvaluatorConfig = TrajEvaluatorConfig(),
traj_evaluator: Optional[TrajEvaluator] = None,
minimize_jerk: bool = True,
@@ -128,6 +130,7 @@ class TrajOptSolverConfig:
state_finite_difference_mode: Optional[str] = None,
filter_robot_command: bool = False,
optimize_dt: bool = True,
project_pose_to_goal_frame: bool = True,
):
# NOTE: Don't have default optimize_dt, instead read from a configuration file.
# use default values, disable environment collision checking
@@ -163,6 +166,16 @@ class TrajOptSolverConfig:
if traj_tsteps is None:
traj_tsteps = grad_config_data["model"]["horizon"]
base_config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
base_config_data["convergence"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
grad_config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
base_config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
base_config_data["convergence"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
grad_config_data["cost"]["pose_cfg"]["project_distance"] = project_pose_to_goal_frame
config_data["model"]["horizon"] = traj_tsteps
grad_config_data["model"]["horizon"] = traj_tsteps
if minimize_jerk is not None:
@@ -212,8 +225,8 @@ class TrajOptSolverConfig:
if not self_collision_opt:
config_data["cost"]["self_collision_cfg"]["weight"] = 0.0
grad_config_data["cost"]["self_collision_cfg"]["weight"] = 0.0
config_data["mppi"]["n_envs"] = 1
grad_config_data["lbfgs"]["n_envs"] = 1
config_data["mppi"]["n_problems"] = 1
grad_config_data["lbfgs"]["n_problems"] = 1
if fixed_iters is not None:
grad_config_data["lbfgs"]["fixed_iters"] = fixed_iters
@@ -256,9 +269,10 @@ class TrajOptSolverConfig:
world_coll_checker=world_coll_checker,
tensor_args=tensor_args,
)
arm_rollout_mppi = None
with profiler.record_function("trajopt_config/create_rollouts"):
arm_rollout_mppi = ArmReacher(cfg)
if use_particle_opt:
arm_rollout_mppi = ArmReacher(cfg)
arm_rollout_grad = ArmReacher(grad_cfg)
arm_rollout_safety = ArmReacher(safety_cfg)
@@ -266,20 +280,22 @@ class TrajOptSolverConfig:
aux_rollout = ArmReacher(safety_cfg)
interpolate_rollout = ArmReacher(safety_cfg)
if trajopt_dt is not None:
arm_rollout_mppi.update_traj_dt(dt=trajopt_dt)
if arm_rollout_mppi is not None:
arm_rollout_mppi.update_traj_dt(dt=trajopt_dt)
aux_rollout.update_traj_dt(dt=trajopt_dt)
arm_rollout_grad.update_traj_dt(dt=trajopt_dt)
arm_rollout_safety.update_traj_dt(dt=trajopt_dt)
config_dict = ParallelMPPIConfig.create_data_dict(
config_data["mppi"], arm_rollout_mppi, tensor_args
)
if arm_rollout_mppi is not None:
config_dict = ParallelMPPIConfig.create_data_dict(
config_data["mppi"], arm_rollout_mppi, tensor_args
)
parallel_mppi = None
if use_es is not None and use_es:
mppi_cfg = ParallelESConfig(**config_dict)
if es_learning_rate is not None:
mppi_cfg.learning_rate = es_learning_rate
parallel_mppi = ParallelES(mppi_cfg)
else:
elif use_particle_opt:
mppi_cfg = ParallelMPPIConfig(**config_dict)
parallel_mppi = ParallelMPPI(mppi_cfg)
@@ -307,7 +323,7 @@ class TrajOptSolverConfig:
cfg = WrapConfig(
safety_rollout=arm_rollout_safety,
optimizers=opt_list,
compute_metrics=not evaluate_interpolated_trajectory,
compute_metrics=True, # not evaluate_interpolated_trajectory,
use_cuda_graph_metrics=grad_config_data["lbfgs"]["use_cuda_graph"],
sync_cuda_time=sync_cuda_time,
)
@@ -337,6 +353,7 @@ class TrajOptSolverConfig:
trim_steps=trim_steps,
store_debug_in_result=store_debug_in_result,
optimize_dt=optimize_dt,
use_cuda_graph=use_cuda_graph,
)
return trajopt_cfg
@@ -360,6 +377,7 @@ class TrajResult(Sequence):
optimized_dt: Optional[torch.Tensor] = None
raw_solution: Optional[JointState] = None
raw_action: Optional[torch.Tensor] = None
goalset_index: Optional[torch.Tensor] = None
def __getitem__(self, idx):
# position_error = rotation_error = cspace_error = path_buffer_last_tstep = None
@@ -372,6 +390,7 @@ class TrajResult(Sequence):
self.position_error,
self.rotation_error,
self.cspace_error,
self.goalset_index,
]
idx_vals = list_idx_if_not_none(d_list, idx)
@@ -388,6 +407,7 @@ class TrajResult(Sequence):
position_error=idx_vals[3],
rotation_error=idx_vals[4],
cspace_error=idx_vals[5],
goalset_index=idx_vals[6],
)
def __len__(self):
@@ -405,7 +425,7 @@ class TrajOptSolver(TrajOptSolverConfig):
3, int(self.action_horizon / 2), self.tensor_args
).unsqueeze(0)
assert self.action_horizon / 2 != 0.0
self.solver.update_nenvs(self.num_seeds)
self.solver.update_nproblems(self.num_seeds)
self._max_joint_vel = (
self.solver.safety_rollout.state_bounds.velocity.view(2, self.dof)[1, :].reshape(
1, 1, self.dof
@@ -469,12 +489,18 @@ class TrajOptSolver(TrajOptSolverConfig):
self._goal_buffer,
self.tensor_args,
)
if update_reference:
self.solver.update_nenvs(self._solve_state.get_batch_size())
self.reset_cuda_graph()
if self.use_cuda_graph and self._col is not None:
log_error("changing goal type, breaking previous cuda graph.")
self.reset_cuda_graph()
self.solver.update_nproblems(self._solve_state.get_batch_size())
self._col = torch.arange(
0, goal.batch, device=self.tensor_args.device, dtype=torch.long
)
self.reset_shape()
return self._goal_buffer
def solve_any(
@@ -586,6 +612,8 @@ class TrajOptSolver(TrajOptSolverConfig):
num_seeds,
solve_state.batch_mode,
)
if traj_result.goalset_index is not None:
traj_result.goalset_index[traj_result.goalset_index >= goal.goal_pose.n_goalset] = 0
if newton_iters is not None:
self.solver.newton_optimizer.outer_iters = self._og_newton_iters
self.solver.newton_optimizer.fixed_iters = self._og_newton_fixed_iters
@@ -839,16 +867,18 @@ class TrajOptSolver(TrajOptSolverConfig):
if self.evaluate_interpolated_trajectory:
with profiler.record_function("trajopt/evaluate_interpolated"):
if self.use_cuda_graph_metrics:
result.metrics = self.interpolate_rollout.get_metrics_cuda_graph(
interpolated_trajs
)
metrics = self.interpolate_rollout.get_metrics_cuda_graph(interpolated_trajs)
else:
result.metrics = self.interpolate_rollout.get_metrics(interpolated_trajs)
metrics = self.interpolate_rollout.get_metrics(interpolated_trajs)
result.metrics.feasible = metrics.feasible
result.metrics.position_error = metrics.position_error
result.metrics.rotation_error = metrics.rotation_error
result.metrics.cspace_error = metrics.cspace_error
result.metrics.goalset_index = metrics.goalset_index
st_time = time.time()
feasible = torch.all(result.metrics.feasible, dim=-1)
# if self.num_seeds == 1:
# print(result.metrics)
if result.metrics.position_error is not None:
converge = torch.logical_and(
result.metrics.position_error[..., -1] <= self.position_threshold,
@@ -877,10 +907,10 @@ class TrajOptSolver(TrajOptSolverConfig):
optimized_dt=opt_dt,
raw_solution=result.action,
raw_action=result.raw_action,
goalset_index=result.metrics.goalset_index,
)
else:
# get path length:
# max_vel =
if self.evaluate_interpolated_trajectory:
smooth_label, smooth_cost = self.traj_evaluator.evaluate_interpolated_smootheness(
interpolated_trajs,
@@ -896,7 +926,6 @@ class TrajOptSolver(TrajOptSolverConfig):
self.solver.rollout_fn.dynamics_model.cspace_distance_weight,
self._velocity_bounds,
)
# print(smooth_label, success, self._velocity_bounds.shape, self.solver.rollout_fn.dynamics_model.cspace_distance_weight)
with profiler.record_function("trajopt/best_select"):
success[~smooth_label] = False
@@ -907,7 +936,8 @@ class TrajOptSolver(TrajOptSolverConfig):
convergence_error = result.metrics.cspace_error[..., -1]
else:
raise ValueError("convergence check requires either goal_pose or goal_state")
error = convergence_error + smooth_cost
running_cost = torch.mean(result.metrics.cost, dim=-1) * 0.0001
error = convergence_error + smooth_cost + running_cost
error[~success] += 10000.0
if batch_mode:
idx = torch.argmin(error.view(goal.batch, num_seeds), dim=-1)
@@ -923,13 +953,16 @@ class TrajOptSolver(TrajOptSolverConfig):
best_act_seq = result.action[idx]
best_raw_action = result.raw_action[idx]
interpolated_traj = interpolated_trajs[idx]
position_error = rotation_error = cspace_error = None
goalset_index = position_error = rotation_error = cspace_error = None
if result.metrics.position_error is not None:
position_error = result.metrics.position_error[idx, -1]
if result.metrics.rotation_error is not None:
rotation_error = result.metrics.rotation_error[idx, -1]
if result.metrics.cspace_error is not None:
cspace_error = result.metrics.cspace_error[idx, -1]
if result.metrics.goalset_index is not None:
goalset_index = result.metrics.goalset_index[idx, -1]
opt_dt = opt_dt[idx]
if self.sync_cuda_time:
torch.cuda.synchronize()
@@ -965,6 +998,7 @@ class TrajOptSolver(TrajOptSolverConfig):
optimized_dt=opt_dt,
raw_solution=best_act_seq,
raw_action=best_raw_action,
goalset_index=goalset_index,
)
return traj_result
@@ -999,7 +1033,6 @@ class TrajOptSolver(TrajOptSolverConfig):
return self.solve_batch_goalset(
goal, seed_traj, use_nn_seed, return_all_solutions, num_seeds, seed_success
)
return traj_result
def get_linear_seed(self, start_state, goal_state):
start_q = start_state.position.view(-1, 1, self.dof)
@@ -1173,6 +1206,11 @@ class TrajOptSolver(TrajOptSolverConfig):
self.interpolate_rollout.reset_cuda_graph()
self.rollout_fn.reset_cuda_graph()
def reset_shape(self):
self.solver.reset_shape()
self.interpolate_rollout.reset_shape()
self.rollout_fn.reset_shape()
@property
def kinematics(self) -> CudaRobotModel:
return self.rollout_fn.dynamics_model.robot_model
@@ -1205,3 +1243,14 @@ class TrajOptSolver(TrajOptSolverConfig):
def get_full_js(self, active_js: JointState) -> JointState:
return self.rollout_fn.get_full_dof_from_solution(active_js)
def update_pose_cost_metric(
self,
metric: PoseCostMetric,
):
rollouts = self.get_all_rollout_instances()
[
rollout.update_pose_cost_metric(metric)
for rollout in rollouts
if isinstance(rollout, ArmReacher)
]

View File

@@ -13,7 +13,7 @@ from __future__ import annotations
# Standard Library
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
# CuRobo
from curobo.rollout.rollout_base import Goal
@@ -126,16 +126,26 @@ class ReacherSolveState:
if (
current_solve_state is None
or current_goal_buffer is None
or current_solve_state != solve_state
or (current_goal_buffer.retract_state is None and retract_config is not None)
or (current_goal_buffer.goal_state is None and goal_state is not None)
or (current_goal_buffer.links_goal_pose is None and link_poses is not None)
):
update_reference = True
elif current_solve_state != solve_state:
new_goal_pose = get_padded_goalset(
solve_state, current_solve_state, current_goal_buffer, goal_pose
)
if new_goal_pose is not None:
goal_pose = new_goal_pose
else:
update_reference = True
if update_reference:
current_solve_state = solve_state
current_goal_buffer = solve_state.create_goal_buffer(
goal_pose, goal_state, retract_config, link_poses, tensor_args
)
update_reference = True
else:
current_goal_buffer.goal_pose.copy_(goal_pose)
if retract_config is not None:
@@ -145,6 +155,7 @@ class ReacherSolveState:
if link_poses is not None:
for k in link_poses.keys():
current_goal_buffer.links_goal_pose[k].copy_(link_poses[k])
return current_solve_state, current_goal_buffer, update_reference
def update_goal(
@@ -155,17 +166,26 @@ class ReacherSolveState:
tensor_args: TensorDeviceType = TensorDeviceType(),
):
solve_state = self
update_reference = False
if (
current_solve_state is None
or current_goal_buffer is None
or current_solve_state != solve_state
or (current_goal_buffer.goal_state is None and goal.goal_state is not None)
or (current_goal_buffer.goal_state is not None and goal.goal_state is None)
):
# TODO: Check for change in update idx buffers, currently we assume
# that solve_state captures difference in idx buffers
update_reference = True
elif current_solve_state != solve_state:
new_goal_pose = get_padded_goalset(
solve_state, current_solve_state, current_goal_buffer, goal.goal_pose
)
if new_goal_pose is not None:
goal = goal.clone()
goal.goal_pose = new_goal_pose
else:
update_reference = True
if update_reference:
current_solve_state = solve_state
current_goal_buffer = goal.create_index_buffers(
solve_state.batch_size,
@@ -174,7 +194,6 @@ class ReacherSolveState:
solve_state.num_seeds,
tensor_args,
)
update_reference = True
else:
current_goal_buffer.copy_(goal, update_idx_buffers=False)
return current_solve_state, current_goal_buffer, update_reference
@@ -185,3 +204,92 @@ class MotionGenSolverState:
solve_type: ReacherSolveType
ik_solve_state: ReacherSolveState
trajopt_solve_state: ReacherSolveState
def get_padded_goalset(
solve_state: ReacherSolveState,
current_solve_state: ReacherSolveState,
current_goal_buffer: Goal,
new_goal_pose: Pose,
) -> Union[Pose, None]:
if (
current_solve_state.solve_type == ReacherSolveType.GOALSET
and solve_state.solve_type == ReacherSolveType.SINGLE
):
# convert single goal to goal set
# solve_state.solve_type = ReacherSolveType.GOALSET
# solve_state.n_goalset = current_solve_state.n_goalset
goal_pose = current_goal_buffer.goal_pose.clone()
goal_pose.position[:] = new_goal_pose.position
goal_pose.quaternion[:] = new_goal_pose.quaternion
return goal_pose
elif (
current_solve_state.solve_type == ReacherSolveType.BATCH_GOALSET
and solve_state.solve_type == ReacherSolveType.BATCH
and new_goal_pose.n_goalset <= current_solve_state.n_goalset
and new_goal_pose.batch == current_solve_state.batch_size
):
goal_pose = current_goal_buffer.goal_pose.clone()
if len(new_goal_pose.position.shape) == 2:
new_goal_pose = new_goal_pose.unsqueeze(1)
goal_pose.position[..., :, :] = new_goal_pose.position
goal_pose.quaternion[..., :, :] = new_goal_pose.quaternion
return goal_pose
elif (
current_solve_state.solve_type == ReacherSolveType.BATCH_ENV_GOALSET
and solve_state.solve_type == ReacherSolveType.BATCH_ENV
and new_goal_pose.n_goalset <= current_solve_state.n_goalset
and new_goal_pose.batch == current_solve_state.batch_size
):
goal_pose = current_goal_buffer.goal_pose.clone()
if len(new_goal_pose.position.shape) == 2:
new_goal_pose = new_goal_pose.unsqueeze(1)
goal_pose.position[..., :, :] = new_goal_pose.position
goal_pose.quaternion[..., :, :] = new_goal_pose.quaternion
return goal_pose
elif (
current_solve_state.solve_type == ReacherSolveType.GOALSET
and solve_state.solve_type == ReacherSolveType.GOALSET
and new_goal_pose.n_goalset <= current_solve_state.n_goalset
):
goal_pose = current_goal_buffer.goal_pose.clone()
goal_pose.position[..., : new_goal_pose.n_goalset, :] = new_goal_pose.position
goal_pose.quaternion[..., : new_goal_pose.n_goalset, :] = new_goal_pose.quaternion
goal_pose.position[..., new_goal_pose.n_goalset :, :] = new_goal_pose.position[..., :1, :]
goal_pose.quaternion[..., new_goal_pose.n_goalset :, :] = new_goal_pose.quaternion[
..., :1, :
]
return goal_pose
elif (
current_solve_state.solve_type == ReacherSolveType.BATCH_GOALSET
and solve_state.solve_type == ReacherSolveType.BATCH_GOALSET
and new_goal_pose.n_goalset <= current_solve_state.n_goalset
and new_goal_pose.batch == current_solve_state.batch_size
):
goal_pose = current_goal_buffer.goal_pose.clone()
goal_pose.position[..., : new_goal_pose.n_goalset, :] = new_goal_pose.position
goal_pose.quaternion[..., : new_goal_pose.n_goalset, :] = new_goal_pose.quaternion
goal_pose.position[..., new_goal_pose.n_goalset :, :] = new_goal_pose.position[..., :1, :]
goal_pose.quaternion[..., new_goal_pose.n_goalset :, :] = new_goal_pose.quaternion[
..., :1, :
]
return goal_pose
elif (
current_solve_state.solve_type == ReacherSolveType.BATCH_ENV_GOALSET
and solve_state.solve_type == ReacherSolveType.BATCH_ENV_GOALSET
and new_goal_pose.n_goalset <= current_solve_state.n_goalset
and new_goal_pose.batch == current_solve_state.batch_size
):
goal_pose = current_goal_buffer.goal_pose.clone()
goal_pose.position[..., : new_goal_pose.n_goalset, :] = new_goal_pose.position
goal_pose.quaternion[..., : new_goal_pose.n_goalset, :] = new_goal_pose.quaternion
goal_pose.position[..., new_goal_pose.n_goalset :, :] = new_goal_pose.position[..., :1, :]
goal_pose.quaternion[..., new_goal_pose.n_goalset :, :] = new_goal_pose.quaternion[
..., :1, :
]
return goal_pose
return None

View File

@@ -54,7 +54,7 @@ class WrapBase(WrapConfig):
def __init__(self, config: Optional[WrapConfig] = None):
if config is not None:
WrapConfig.__init__(self, **vars(config))
self.n_envs = 1
self.n_problems = 1
self.opt_dt = 0
self._rollout_list = None
self._opt_rollouts = None
@@ -83,11 +83,11 @@ class WrapBase(WrapConfig):
debug_list.append(opt.debug_cost)
return debug_list
def update_nenvs(self, n_envs):
if n_envs != self.n_envs:
self.n_envs = n_envs
def update_nproblems(self, n_problems):
if n_problems != self.n_problems:
self.n_problems = n_problems
for opt in self.optimizers:
opt.update_nenvs(self.n_envs)
opt.update_nproblems(self.n_problems)
def update_params(self, goal: Goal):
with profiler.record_function("wrap_base/safety/update_params"):
@@ -117,6 +117,12 @@ class WrapBase(WrapConfig):
opt.reset_cuda_graph()
self._init_solver = False
def reset_shape(self):
self.safety_rollout.reset_shape()
for opt in self.optimizers:
opt.reset_shape()
self._init_solver = False
@property
def rollout_fn(self):
return self.safety_rollout