From 14b554bf36e499ea1f723a47cd5fd4b456024f1a Mon Sep 17 00:00:00 2001 From: Balakumar Sundaralingam Date: Fri, 28 Jun 2024 18:25:07 -0700 Subject: [PATCH] Fix warp kernel error in isaac sim --- CHANGELOG.md | 3 +++ src/curobo/rollout/cost/bound_cost.py | 20 ++++++++++++++------ src/curobo/rollout/cost/dist_cost.py | 11 +++++++++-- src/curobo/util/warp.py | 15 +++++++++++++++ src/curobo/util_file.py | 2 ++ src/curobo/wrap/reacher/motion_gen.py | 5 ++++- 6 files changed, 47 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37a211f..8d7b401 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ its affiliates is strictly prohibited. newer warp versions. - Add override option to mpc dataclass. - Fix bug in ``PoseCost.forward_pose()`` which caused ``torch_layers_example.py`` to fail. +- Add warp constants to make module hash depend on robot dof, for modules that generate runtime +warp kernels. This fixes issues using cuRobo in isaac sim. +- Add ``plan_config.timeout`` check to ``plan_single_js()``. ## Version 0.7.3 diff --git a/src/curobo/rollout/cost/bound_cost.py b/src/curobo/rollout/cost/bound_cost.py index 8fe6509..625f24a 100644 --- a/src/curobo/rollout/cost/bound_cost.py +++ b/src/curobo/rollout/cost/bound_cost.py @@ -23,7 +23,7 @@ from curobo.types.robot import JointState from curobo.types.tensor import T_DOF from curobo.util.logger import log_error from curobo.util.torch_utils import get_cache_fn_decorator, get_torch_jit_decorator -from curobo.util.warp import init_warp +from curobo.util.warp import init_warp, warp_support_kernel_key # Local Folder from .cost_base import CostBase, CostConfig @@ -106,8 +106,15 @@ class BoundCost(CostBase, BoundCostConfig): ) self._out_gv_buffer = self._out_ga_buffer = self._out_gj_buffer = empty_buffer if self.use_l2_kernel: + if not warp_support_kernel_key(): + # define a compile-time constant so that warp hash is different for different dof + # this is required in older warp versions < 1.2.1 as warp hash didn't consider the + # name of kernels. Newer warp versions have fixed this issue. + WARP_CUROBO_BOUNDCOST_DOF_GLOBAL_CONSTANT = wp.constant(self.dof) + if self.cost_type == BoundCostType.POSITION: self._l2_cost = make_bound_pos_kernel(self.dof) + if self.cost_type == BoundCostType.BOUNDS_SMOOTH: self._l2_cost = make_bound_pos_smooth_kernel(self.dof) @@ -1552,8 +1559,8 @@ def make_bound_pos_smooth_kernel(dof_template: int): module = wp.get_module(forward_bound_smooth_loop_warp.__module__) key = "forward_bound_smooth_loop_warp_" + str(dof_template) - - return wp.Kernel(forward_bound_smooth_loop_warp, key=key, module=module) + new_kernel = wp.Kernel(forward_bound_smooth_loop_warp, key=key, module=module) + return new_kernel @get_cache_fn_decorator() @@ -1650,6 +1657,7 @@ def make_bound_pos_kernel(dof_template: int): for i in range(dof_template): out_grad_p[b_addrs + i] = g_p[i] - module = wp.get_module(forward_bound_pos_loop_warp.__module__) - key = "forward_bound_pos_loop_warp_" + str(dof_template) - return wp.Kernel(forward_bound_pos_loop_warp, key=key, module=module) + wp_module = wp.get_module(forward_bound_pos_loop_warp.__module__) + key = "bound_pos_loop_warp_" + str(dof_template) + new_kernel = wp.Kernel(forward_bound_pos_loop_warp, key=key, module=wp_module) + return new_kernel diff --git a/src/curobo/rollout/cost/dist_cost.py b/src/curobo/rollout/cost/dist_cost.py index 50c4543..7434bc0 100644 --- a/src/curobo/rollout/cost/dist_cost.py +++ b/src/curobo/rollout/cost/dist_cost.py @@ -20,7 +20,7 @@ import warp as wp # CuRobo from curobo.util.logger import log_error from curobo.util.torch_utils import get_cache_fn_decorator, get_torch_jit_decorator -from curobo.util.warp import init_warp +from curobo.util.warp import init_warp, warp_support_kernel_key # Local Folder from .cost_base import CostBase, CostConfig @@ -226,7 +226,8 @@ def make_l2_kernel(dof_template: int): module = wp.get_module(forward_l2_loop_warp.__module__) key = "forward_l2_loop" + str(dof_template) - return wp.Kernel(forward_l2_loop_warp, key=key, module=module) + new_kernel = wp.Kernel(forward_l2_loop_warp, key=key, module=module) + return new_kernel # create a bound cost tensor: @@ -342,6 +343,12 @@ class DistCost(CostBase, DistCostConfig): self._init_post_config() init_warp() if self.use_l2_kernel: + if not warp_support_kernel_key(): + # define a compile-time constant so that warp hash is different for different dof + # this is required in older warp versions < 1.2.1 as warp hash didn't consider the + # name of kernels. Newer warp versions have fixed this issue. + WARP_CUROBO_DISTCOST_DOF_GLOBAL_CONSTANT = wp.constant(self.dof) + self._l2_dof_kernel = make_l2_kernel(self.dof) def _init_post_config(self): diff --git a/src/curobo/util/warp.py b/src/curobo/util/warp.py index c2fa902..8a39a17 100644 --- a/src/curobo/util/warp.py +++ b/src/curobo/util/warp.py @@ -45,3 +45,18 @@ def warp_support_sdf_struct(wp_module=None): ) return False return True + + +def warp_support_kernel_key(wp_module=None): + if wp_module is None: + wp_module = wp + wp_version = wp_module.config.version + + if version.parse(wp_version) < version.parse("1.2.1"): + log_info( + "Warp version is " + + wp_version + + " < 1.2.1, using, creating global constant to trigger kernel generation." + ) + return False + return True diff --git a/src/curobo/util_file.py b/src/curobo/util_file.py index f6ef5fe..b50889e 100644 --- a/src/curobo/util_file.py +++ b/src/curobo/util_file.py @@ -155,6 +155,8 @@ def get_files_from_dir(dir_path, extension: List[str], contains: str): def file_exists(path): + if path is None: + return False isExist = os.path.exists(path) return isExist diff --git a/src/curobo/wrap/reacher/motion_gen.py b/src/curobo/wrap/reacher/motion_gen.py index 543d1f0..3224294 100644 --- a/src/curobo/wrap/reacher/motion_gen.py +++ b/src/curobo/wrap/reacher/motion_gen.py @@ -123,7 +123,6 @@ class MotionGenConfig: #: instance of trajectory optimization solver for final fine tuning for joint space targets. finetune_js_trajopt_solver: TrajOptSolver - #: instance of trajectory optimization solver for final fine tuning. finetune_trajopt_solver: TrajOptSolver @@ -2031,6 +2030,8 @@ class MotionGen(MotionGenConfig): attribute to see if the query was successful. """ + start_time = time.time() + time_dict = { "solve_time": 0, "ik_time": 0, @@ -2083,6 +2084,8 @@ class MotionGen(MotionGenConfig): break if not result.valid_query: break + if time.time() - start_time > plan_config.timeout: + break result.graph_time = time_dict["graph_time"] result.finetune_time = time_dict["finetune_time"]