Isaac Sim 4.0 support, Kinematics API doc, Windows support

2024-07-20 14:51:43 -07:00
parent 2ae381f328
commit 3690d28c54
83 changed files with 2818 additions and 497 deletions
--- a/src/curobo/curobolib/cpp/kinematics_fused_kernel.cu
+++ b/src/curobo/curobolib/cpp/kinematics_fused_kernel.cu
@@ -67,6 +67,20 @@ namespace Curobo
      q[0] = length * q[0];
    }

+
+    __device__ __forceinline__ void normalize_quaternion(float4 &q)
+    {
+      // get length:
+      float inv_length = 1.0 / length(q);
+
+      if (q.w < 0.0)
+      {
+        inv_length = -1.0 * inv_length;
+      }
+      q = inv_length * q;
+
+    }
+
    /**
     * @brief get quaternion from transformation matrix
     *
@@ -129,8 +143,9 @@ namespace Curobo
     * @param t # rotation matrix 3x3
     * @param q quaternion in wxyz format
     */
-    __device__ __forceinline__ void rot_to_quat(float *t, float *q)
+    __device__ __forceinline__ void rot_to_quat(float *t, float4 &q)
    {
+      // q.x = w, q.y = x, q.z = y,  q.w = z,
      float n;
      float n_sqrt;

@@ -140,19 +155,19 @@ namespace Curobo
        {
          n      = 1 + t[0] - t[4] - t[8];
          n_sqrt = 0.5 * rsqrtf(n);
-          q[1]   = n * n_sqrt;
-          q[2]   = (t[1] + t[3]) * n_sqrt;
-          q[3]   = (t[6] + t[2]) * n_sqrt;
-          q[0]   = -1 * (t[5] - t[7]) * n_sqrt; // * -1 ; // this is the wrong one?
+          q.x   = n * n_sqrt;
+          q.y   = (t[1] + t[3]) * n_sqrt;
+          q.z   = (t[6] + t[2]) * n_sqrt;
+          q.w   = -1 * (t[5] - t[7]) * n_sqrt; // * -1 ; // this is the wrong one?
        }
        else
        {
          n      = 1 - t[0] + t[4] - t[8];
          n_sqrt = 0.5 * rsqrtf(n);
-          q[1]   = (t[1] + t[3]) * n_sqrt;
-          q[2]   = n * n_sqrt;
-          q[3]   = (t[5] + t[7]) * n_sqrt;
-          q[0]   = -1 * (t[6] - t[2]) * n_sqrt;
+          q.x   = (t[1] + t[3]) * n_sqrt;
+          q.y   = n * n_sqrt;
+          q.z   = (t[5] + t[7]) * n_sqrt;
+          q.w   = -1 * (t[6] - t[2]) * n_sqrt;
        }
      }
      else
@@ -161,19 +176,19 @@ namespace Curobo
        {
          n      = 1 - t[0] - t[4] + t[8];
          n_sqrt = 0.5 * rsqrtf(n);
-          q[1]   = (t[6] + t[2]) * n_sqrt;
-          q[2]   = (t[5] + t[7]) * n_sqrt;
-          q[3]   = n * n_sqrt;
-          q[0]   = -1 * (t[1] - t[3]) * n_sqrt;
+          q.x   = (t[6] + t[2]) * n_sqrt;
+          q.y   = (t[5] + t[7]) * n_sqrt;
+          q.z   = n * n_sqrt;
+          q.w   = -1 * (t[1] - t[3]) * n_sqrt;
        }
        else
        {
          n      = 1 + t[0] + t[4] + t[8];
          n_sqrt = 0.5 * rsqrtf(n);
-          q[1]   = (t[5] - t[7]) * n_sqrt;
-          q[2]   = (t[6] - t[2]) * n_sqrt;
-          q[3]   = (t[1] - t[3]) * n_sqrt;
-          q[0]   = -1 * n * n_sqrt;
+          q.x   = (t[5] - t[7]) * n_sqrt;
+          q.y   = (t[6] - t[2]) * n_sqrt;
+          q.z   = (t[1] - t[3]) * n_sqrt;
+          q.w   = -1 * n * n_sqrt;
        }
      }
      normalize_quaternion(q);
@@ -244,6 +259,23 @@ namespace Curobo
      C[3] = sphere_pos.w;
    }

+    template<typename scalar_t>
+    __device__ __forceinline__ void
+    transform_sphere_float4(const float *transform_mat, const scalar_t *sphere, float4 &C)
+    {
+      float4 sphere_pos = *(float4 *)&sphere[0];
+      int    st_idx     = 0;
+
+      C.x = transform_mat[0] * sphere_pos.x + transform_mat[1] * sphere_pos.y +
+            transform_mat[2] * sphere_pos.z + transform_mat[3];
+      C.y = transform_mat[4] * sphere_pos.x + transform_mat[5] * sphere_pos.y +
+            transform_mat[6] * sphere_pos.z + transform_mat[7];
+      C.z = transform_mat[8] * sphere_pos.x + transform_mat[9] * sphere_pos.y +
+            transform_mat[10] * sphere_pos.z + transform_mat[11];
+      C.w = sphere_pos.w;
+
+    }
+
    template<typename scalar_t>
    __device__ __forceinline__ void fixed_joint_fn(const scalar_t *fixedTransform,
                                                   float          *JM)
@@ -440,7 +472,7 @@ namespace Curobo
      e_pos.z = cumul_mat[4 + 4 + 3];

      // compute position gradient:
-      j_pos = *(float3 *)&l_pos[0] - e_pos;            // - e_pos;
+      j_pos = make_float3(l_pos[0], l_pos[1], l_pos[2]) - e_pos;            // - e_pos;
      float3 scale_grad = axis_sign * loc_grad;
      scale_cross_sum(vec, j_pos, scale_grad, grad_q); // cross product
    }
@@ -688,7 +720,7 @@ namespace Curobo
        for (int i = 0; i < M; i++)
        {
          cumul_mat[outAddrStart + (i * M) + col_idx] =
-            dot(*(float4 *)&cumul_mat[inAddrStart + (i * M)], *(float4 *)JM);
+            dot(*(float4 *)&cumul_mat[inAddrStart + (i * M)], make_float4(JM[0], JM[1], JM[2], JM[3]));
        }

        if (use_global_cumul)
@@ -723,13 +755,21 @@ namespace Curobo

        // read cumul idx:
        read_cumul_idx = linkSphereMap[sph_idx];
-        float spheres_mem[4];
+        float4 spheres_mem = make_float4(0.0, 0.0, 0.0, 0.0);
        const int16_t sphAddrs = sph_idx * 4;

-        transform_sphere(&cumul_mat[matAddrBase + read_cumul_idx * 16],
-                         &robot_spheres[sphAddrs], &spheres_mem[0]);
-        *(float4 *)&b_robot_spheres[batchAddrs + sphAddrs] =
-          *(float4 *)&spheres_mem[0];
+        transform_sphere_float4(&cumul_mat[matAddrBase + (read_cumul_idx * 16)],
+                         &robot_spheres[sphAddrs], spheres_mem);
+
+        //b_robot_spheres[batchAddrs + sphAddrs] = spheres_mem[0];
+        //b_robot_spheres[batchAddrs + sphAddrs + 1] = spheres_mem[1];
+        //b_robot_spheres[batchAddrs + sphAddrs + 2] = spheres_mem[2];
+        //b_robot_spheres[batchAddrs + sphAddrs + 3] = spheres_mem[3];
+
+        //float4 test_sphere =  *(float4 *)&spheres_mem[0];// make_float4(spheres_mem[0],spheres_mem[1],spheres_mem[2],spheres_mem[3]);
+
+        *(float4 *)&b_robot_spheres[batchAddrs + sphAddrs] = spheres_mem;
+
      }

      // write position and rotation, we convert rotation matrix to a quaternion and
@@ -857,7 +897,7 @@ namespace Curobo
          // fetch one row of cumul_mat, multiply with a column, which is in JM
          cumul_mat[outAddrStart + elem_idx] =
            dot(*(float4 *)&cumul_mat[inAddrStart + ((elem_idx / 4) * M)],
-                *(float4 *)JM);
+                make_float4(JM[0], JM[1], JM[2], JM[3]));
        }
      }

@@ -906,7 +946,7 @@ namespace Curobo

        // read cumul idx:
        read_cumul_idx = linkSphereMap[sph_idx];
-        float spheres_mem[4];
+        float spheres_mem[4] = {0.0,0.0,0.0,0.0};
        transform_sphere(&cumul_mat[matAddrBase + read_cumul_idx * 16],
                         &robotSpheres[sphAddrs], &spheres_mem[0]);

@@ -925,7 +965,7 @@ namespace Curobo

          int j_type = jointMapType[j];

-          
+
          if (j_type == Z_ROT)
          {
            float result = 0.0;
@@ -1137,19 +1177,26 @@ namespace Curobo
      {
        return;
      }
-      float q[4] = { 0.0 }; // initialize array
-
+      //float q[4] = { 0.0 }; // initialize array
+      float4 q = make_float4(0,0,0,0);
      float rot[9];

      // read rot
-      *(float3 *)&rot[0] = *(float3 *)&in_rot_mat[batch_idx * 9];
-      *(float3 *)&rot[3] = *(float3 *)&in_rot_mat[batch_idx * 9 + 3];
-      *(float3 *)&rot[6] = *(float3 *)&in_rot_mat[batch_idx * 9 + 6];
+      #pragma unroll 9
+      for (int k = 0; k<9; k++)
+      {
+        rot[k] = in_rot_mat[batch_idx * 9 + k];
+      }

-      rot_to_quat(&rot[0], &q[0]);
+      // *(float3 *)&rot[0] = *(float3 *)&in_rot_mat[batch_idx * 9];
+      // *(float3 *)&rot[3] = *(float3 *)&in_rot_mat[batch_idx * 9 + 3];
+      // *(float3 *)&rot[6] = *(float3 *)&in_rot_mat[batch_idx * 9 + 6];
+
+      rot_to_quat(&rot[0], q);

      // write quaternion:
-      *(float4 *)&out_quat[batch_idx * 4] = *(float4 *)&q[0];
+
+      *(float4 *)&out_quat[batch_idx * 4] = q;
    }
  } // namespace Kinematics
 }   // namespace Curobo
@@ -1161,8 +1208,8 @@ std::vector<torch::Tensor>kin_fused_forward(
  const torch::Tensor robot_spheres, const torch::Tensor link_map,
  const torch::Tensor joint_map, const torch::Tensor joint_map_type,
  const torch::Tensor store_link_map, const torch::Tensor link_sphere_map,
-  const torch::Tensor joint_offset_map, 
-  const int batch_size, 
+  const torch::Tensor joint_offset_map,
+  const int batch_size,
  const int n_joints,const int n_spheres,
  const bool use_global_cumul = false)
 {
@@ -1179,9 +1226,8 @@ std::vector<torch::Tensor>kin_fused_forward(
  CHECK_INPUT(joint_map_type);
  CHECK_INPUT(store_link_map);
  CHECK_INPUT(link_sphere_map);
-  //CHECK_INPUT(link_chain_map);
  CHECK_INPUT(joint_offset_map);
-  
+
  const int n_links       = link_map.size(0);
  const int store_n_links = link_pos.size(1);
  assert(joint_map.dtype() == torch::kInt16);
@@ -1232,7 +1278,7 @@ std::vector<torch::Tensor>kin_fused_forward(
        joint_map_type.data_ptr<int8_t>(),
        joint_map.data_ptr<int16_t>(), link_map.data_ptr<int16_t>(),
        store_link_map.data_ptr<int16_t>(),
-        link_sphere_map.data_ptr<int16_t>(), 
+        link_sphere_map.data_ptr<int16_t>(),
        joint_offset_map.data_ptr<float>(),
        batch_size, n_spheres,
        n_links, n_joints, store_n_links);
@@ -1253,7 +1299,7 @@ std::vector<torch::Tensor>kin_fused_forward(
        joint_map_type.data_ptr<int8_t>(),
        joint_map.data_ptr<int16_t>(), link_map.data_ptr<int16_t>(),
        store_link_map.data_ptr<int16_t>(),
-        link_sphere_map.data_ptr<int16_t>(), 
+        link_sphere_map.data_ptr<int16_t>(),
        joint_offset_map.data_ptr<float>(),
        batch_size, n_spheres,
        n_links, n_joints, store_n_links);
@@ -1285,7 +1331,7 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
  using namespace Curobo::Kinematics;
  CHECK_INPUT_GUARD(joint_vec);
  CHECK_INPUT(grad_out);
-  CHECK_INPUT(grad_nlinks_pos);  
+  CHECK_INPUT(grad_nlinks_pos);
  CHECK_INPUT(grad_nlinks_quat);
  CHECK_INPUT(global_cumul_mat);
  CHECK_INPUT(fixed_transform);
@@ -1297,10 +1343,10 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
  CHECK_INPUT(link_sphere_map);
  CHECK_INPUT(link_chain_map);
  CHECK_INPUT(joint_offset_map);
-  
+
  const int n_links       = link_map.size(0);
  const int store_n_links = store_link_map.size(0);
-  
+
  // assert(n_links < 128);
  assert(n_joints < 128); // for larger num. of joints, change kernel3's
                          // MAX_JOINTS template value.
@@ -1362,7 +1408,7 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
          joint_map.data_ptr<int16_t>(), link_map.data_ptr<int16_t>(),
          store_link_map.data_ptr<int16_t>(),
          link_sphere_map.data_ptr<int16_t>(),
-          link_chain_map.data_ptr<int16_t>(), 
+          link_chain_map.data_ptr<int16_t>(),
          joint_offset_map.data_ptr<float>(),
          batch_size, n_spheres,
          n_links, n_joints, store_n_links);
@@ -1386,7 +1432,7 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
          joint_map.data_ptr<int16_t>(), link_map.data_ptr<int16_t>(),
          store_link_map.data_ptr<int16_t>(),
          link_sphere_map.data_ptr<int16_t>(),
-          link_chain_map.data_ptr<int16_t>(), 
+          link_chain_map.data_ptr<int16_t>(),
          joint_offset_map.data_ptr<float>(),
          batch_size, n_spheres,
          n_links, n_joints, store_n_links);
@@ -1410,7 +1456,7 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
          joint_map.data_ptr<int16_t>(), link_map.data_ptr<int16_t>(),
          store_link_map.data_ptr<int16_t>(),
          link_sphere_map.data_ptr<int16_t>(),
-          link_chain_map.data_ptr<int16_t>(), 
+          link_chain_map.data_ptr<int16_t>(),
          joint_offset_map.data_ptr<float>(),
          batch_size, n_spheres,
          n_links, n_joints, store_n_links);
@@ -1435,11 +1481,11 @@ std::vector<torch::Tensor>kin_fused_backward_16t(
        fixed_transform.data_ptr<float>(),
        robot_spheres.data_ptr<float>(),
        joint_map_type.data_ptr<int8_t>(),
-        joint_map.data_ptr<int16_t>(), 
+        joint_map.data_ptr<int16_t>(),
        link_map.data_ptr<int16_t>(),
        store_link_map.data_ptr<int16_t>(),
        link_sphere_map.data_ptr<int16_t>(),
-        link_chain_map.data_ptr<int16_t>(), 
+        link_chain_map.data_ptr<int16_t>(),
        joint_offset_map.data_ptr<float>(),
        batch_size, n_spheres,
        n_links, n_joints, store_n_links);
--- a/src/curobo/curobolib/cpp/pose_distance_kernel.cu
+++ b/src/curobo/curobolib/cpp/pose_distance_kernel.cu
@@ -434,15 +434,25 @@ namespace Curobo
                                                                                // is ? horizon, not
                                                                                // in this mode
      float d_vec_weight[6] = { 0.0 };
-      *(float3 *)&d_vec_weight[0] = *(float3 *)&vec_weight[0];
-      *(float3 *)&d_vec_weight[3] = *(float3 *)&vec_weight[3];
+      #pragma unroll 6
+      for (int k = 0; k < 6; k++)
+      {
+        d_vec_weight[k] = vec_weight[k];
+      }
+      //*(float3 *)&d_vec_weight[0] = *(float3 *)&vec_weight[0]; // TODO
+      //*(float3 *)&d_vec_weight[3] = *(float3 *)&vec_weight[3];
      float3 offset_rotation = *(float3 *)&offset_waypoint[0];
      float3 offset_position = *(float3 *)&offset_waypoint[3];

      if ((h_idx < horizon - 1) && (h_idx != horizon - offset_tstep))
      {
-        *(float3 *)&d_vec_weight[0] *= *(float3 *)&run_vec_weight[0];
-        *(float3 *)&d_vec_weight[3] *= *(float3 *)&run_vec_weight[3];
+        #pragma unroll 6
+        for (int k = 0; k < 6; k++)
+        {
+          d_vec_weight[k] *= run_vec_weight[k];
+        }
+        //*(float3 *)&d_vec_weight[0] *= *(float3 *)&run_vec_weight[0];
+        //*(float3 *)&d_vec_weight[3] *= *(float3 *)&run_vec_weight[3];
      }

      if (!write_distance)
@@ -450,8 +460,8 @@ namespace Curobo
        position_weight *= run_weight[h_idx];
        rotation_weight *= run_weight[h_idx];
        float sum_weight = 0;
-    #pragma unroll 6

+        #pragma unroll 6
        for (int i = 0; i < 6; i++)
        {
          sum_weight += d_vec_weight[i];
@@ -480,7 +490,9 @@ namespace Curobo
      float  best_distance_vec[6]   = { 0.0 };
      float  d_vec_convergence[2];

-      *(float2 *)&d_vec_convergence[0] = *(float2 *)&vec_convergence[0];
+      //*(float2 *)&d_vec_convergence[0] = *(float2 *)&vec_convergence[0]; // TODO
+      d_vec_convergence[0] = vec_convergence[0];
+      d_vec_convergence[1] = vec_convergence[1];

      int best_idx = -1;

--- a/src/curobo/curobolib/cpp/self_collision_kernel.cu
+++ b/src/curobo/curobolib/cpp/self_collision_kernel.cu
@@ -131,7 +131,7 @@ namespace Curobo
      int i         = ndpt * (warp_idx / nwpr); // starting row number for this warp
      int j         = (warp_idx % nwpr) * 32;   // starting column number for this warp

-      dist_t max_d = { .d = 0.0, .i = 0, .j = 0 };
+      dist_t max_d = {0.0, 0.0, 0.0 };// .d, .i, .j
      __shared__ dist_t max_darr[32];

      // Optimization: About 1/3 of the warps will have no work.
@@ -354,7 +354,7 @@ namespace Curobo
      // in registers (max_d).
      // Each thread computes upto ndpt distances.
      //////////////////////////////////////////////////////
-      dist_t  max_d[NBPB] = {{ .d = 0.0, .i = 0, .j = 0 } };
+      dist_t  max_d[NBPB] = {{ 0.0, 0.0, 0.0}};
      int16_t indices[ndpt * 2];

      for (uint8_t i = 0; i < ndpt * 2; i++)
@@ -710,9 +710,7 @@ std::vector<torch::Tensor>self_collision_distance(
    threadsPerBlock = warpsPerBlock * 32;
    blocksPerGrid   = batch_size;

-    // printf("Blocks: %d, threads/block:%d, ndpt=%d, nwpr=%d\n", blocksPerGrid,
-    // threadsPerBlock, ndpt, nwpr);
-
+    assert(collision_matrix.size(0) == nspheres * nspheres);
    int smemSize = nspheres * sizeof(float4);


--- a/src/curobo/curobolib/geom.py
+++ b/src/curobo/curobolib/geom.py
@@ -104,7 +104,7 @@ class SelfCollisionDistance(torch.autograd.Function):
            robot_spheres,  # .view(-1, 4),
            sphere_offset,
            weight,
-            coll_matrix,
+            coll_matrix.view(-1),
            thread_locations,
            max_thread,
            b * h,
--- a/src/curobo/curobolib/kinematics.py
+++ b/src/curobo/curobolib/kinematics.py
@@ -74,20 +74,20 @@ class KinematicsFusedFunction(Function):
            link_pos,
            link_quat,
            b_robot_spheres,
-            global_cumul_mat,
+            global_cumul_mat.view(-1),
            joint_seq,
-            fixed_transform,
-            robot_spheres,
+            fixed_transform.view(-1),
+            robot_spheres.view(-1),
            link_map,
            joint_map,
-            joint_map_type,
+            joint_map_type.view(-1),
            store_link_map,
            link_sphere_map,
-            joint_offset_map,  # offset_joint_map
+            joint_offset_map,
            b_size,
            n_joints,
            n_spheres,
-            True,
+            use_global_cumul,
        )
        out_link_pos = r[0]
        out_link_quat = r[1]