update to 0.6.2

2023-12-15 02:01:33 -08:00
parent d85ae41fba
commit 58958bbcce
105 changed files with 2514 additions and 934 deletions
--- a/src/curobo/opt/newton/lbfgs.py
+++ b/src/curobo/opt/newton/lbfgs.py
@@ -24,9 +24,16 @@ from curobo.util.logger import log_warn


 # kernel for l-bfgs:
-@torch.jit.script
+# @torch.jit.script
 def compute_step_direction(
-    alpha_buffer, rho_buffer, y_buffer, s_buffer, grad_q, m: int, epsilon, stable_mode: bool = True
+    alpha_buffer,
+    rho_buffer,
+    y_buffer,
+    s_buffer,
+    grad_q,
+    m: int,
+    epsilon: float,
+    stable_mode: bool = True,
 ):
    # m = 15 (int)
    # y_buffer, s_buffer: m x b x 175
@@ -70,12 +77,12 @@ class LBFGSOpt(NewtonOptBase, LBFGSOptConfig):
        if config is not None:
            LBFGSOptConfig.__init__(self, **vars(config))
        NewtonOptBase.__init__(self)
-        if self.d_opt >= 1024 or self.history >= 512:
-            log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>=512")
+        if self.d_opt >= 1024 or self.history > 15:
+            log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>15")
            self.use_cuda_kernel = False
-        if self.history > self.d_opt:
+        if self.history >= self.d_opt:
            log_warn("LBFGS: history >= d_opt, reducing history to d_opt-1")
-            self.history = self.d_opt
+            self.history = self.d_opt - 1

    @profiler.record_function("lbfgs/reset")
    def reset(self):
--- a/src/curobo/opt/newton/newton_base.py
+++ b/src/curobo/opt/newton/newton_base.py
@@ -72,7 +72,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
    ):
        if config is not None:
            NewtonOptConfig.__init__(self, **vars(config))
-        self.d_opt = self.horizon * self.d_action
+        self.d_opt = self.action_horizon * self.d_action
        self.line_scale = self._create_box_line_search(self.line_search_scale)
        Optimizer.__init__(self)
        self.i = -1
@@ -84,8 +84,8 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
        self.reset()

        # reshape action lows and highs:
-        self.action_lows = self.action_lows.repeat(self.horizon)
-        self.action_highs = self.action_highs.repeat(self.horizon)
+        self.action_lows = self.action_lows.repeat(self.action_horizon)
+        self.action_highs = self.action_highs.repeat(self.action_horizon)
        self.action_range = self.action_highs - self.action_lows
        self.action_step_max = self.step_scale * torch.abs(self.action_range)
        self.c_1 = 1e-5
@@ -99,10 +99,13 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
            self.use_cuda_line_search_kernel = False
        if self.use_temporal_smooth:
            self._temporal_mat = build_fd_matrix(
-                self.horizon, order=2, device=self.tensor_args.device, dtype=self.tensor_args.dtype
+                self.action_horizon,
+                order=2,
+                device=self.tensor_args.device,
+                dtype=self.tensor_args.dtype,
            ).unsqueeze(0)
            eye_mat = torch.eye(
-                self.horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
+                self.action_horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
            ).unsqueeze(0)
            self._temporal_mat += eye_mat

@@ -130,9 +133,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
            self._shift(shift_steps)
        # reshape q:
        if self.store_debug:
-            self.debug.append(q.view(-1, self.horizon, self.d_action).clone())
+            self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
        with profiler.record_function("newton_base/init_opt"):
-            q = q.view(self.n_envs, self.horizon * self.d_action)
+            q = q.view(self.n_envs, self.action_horizon * self.d_action)
            grad_q = q.detach() * 0.0
        # run opt graph
        if not self.cu_opt_init:
@@ -147,7 +150,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
                if check_convergence(self.best_iteration, self.current_iteration, self.last_best):
                    break

-        best_q = best_q.view(self.n_envs, self.horizon, self.d_action)
+        best_q = best_q.view(self.n_envs, self.action_horizon, self.d_action)
        return best_q

    def reset(self):
@@ -166,8 +169,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
            self.i += 1
            cost_n, q, grad_q = self._opt_step(q.detach(), grad_q.detach())
        if self.store_debug:
-            self.debug.append(self.best_q.view(-1, self.horizon, self.d_action).clone())
+            self.debug.append(self.best_q.view(-1, self.action_horizon, self.d_action).clone())
            self.debug_cost.append(self.best_cost.detach().view(-1, 1).clone())
+            # self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
+            # self.debug_cost.append(cost_n.detach().view(-1, 1).clone())
+            # print(grad_q)

        return self.best_q.detach(), self.best_cost.detach(), q.detach(), grad_q.detach()

@@ -186,9 +192,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):

    def scale_step_direction(self, dx):
        if self.use_temporal_smooth:
-            dx_v = dx.view(-1, self.horizon, self.d_action)
+            dx_v = dx.view(-1, self.action_horizon, self.d_action)
            dx_new = self._temporal_mat @ dx_v  # 1,h,h x b, h, dof -> b, h, dof
-            dx = dx_new.view(-1, self.horizon * self.d_action)
+            dx = dx_new.view(-1, self.action_horizon * self.d_action)
        dx_scaled = scale_action(dx, self.action_step_max)

        return dx_scaled
@@ -216,11 +222,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
    def _compute_cost_gradient(self, x):
        x_n = x.detach().requires_grad_(True)
        x_in = x_n.view(
-            self.n_envs * self.num_particles, self.rollout_fn.horizon, self.rollout_fn.d_action
+            self.n_envs * self.num_particles, self.action_horizon, self.rollout_fn.d_action
        )
        trajectories = self.rollout_fn(x_in)  # x_n = (batch*line_search_scale) x horizon x d_action
        cost = torch.sum(
-            trajectories.costs.view(self.n_envs, self.num_particles, self.rollout_fn.horizon),
+            trajectories.costs.view(self.n_envs, self.num_particles, self.horizon),
            dim=-1,
            keepdim=True,
        )
--- a/src/curobo/opt/opt_base.py
+++ b/src/curobo/opt/opt_base.py
@@ -43,6 +43,7 @@ class OptimizerConfig:
    n_envs: int
    sync_cuda_time: bool
    use_coo_sparse: bool
+    action_horizon: int

    def __post_init__(self):
        object.__setattr__(self, "action_highs", self.tensor_args.to_device(self.action_highs))
@@ -68,6 +69,8 @@ class OptimizerConfig:
        child_dict["rollout_fn"] = rollout_fn
        child_dict["tensor_args"] = tensor_args
        child_dict["horizon"] = rollout_fn.horizon
+        child_dict["action_horizon"] = rollout_fn.action_horizon
+
        if "num_particles" not in child_dict:
            child_dict["num_particles"] = 1
        return child_dict
--- a/src/curobo/opt/particle/parallel_mppi.py
+++ b/src/curobo/opt/particle/parallel_mppi.py
@@ -89,7 +89,7 @@ class ParallelMPPIConfig(ParticleOptConfig):
        child_dict["squash_fn"] = SquashType[child_dict["squash_fn"]]
        child_dict["cov_type"] = CovType[child_dict["cov_type"]]
        child_dict["sample_params"]["d_action"] = rollout_fn.d_action
-        child_dict["sample_params"]["horizon"] = child_dict["horizon"]
+        child_dict["sample_params"]["horizon"] = rollout_fn.action_horizon
        child_dict["sample_params"]["tensor_args"] = tensor_args
        child_dict["sample_params"] = SampleConfig(**child_dict["sample_params"])

@@ -112,7 +112,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
        # initialize covariance types:
        if self.cov_type == CovType.FULL_HA:
            self.I = torch.eye(
-                self.horizon * self.d_action,
+                self.action_horizon * self.d_action,
                device=self.tensor_args.device,
                dtype=self.tensor_args.dtype,
            )
@@ -124,7 +124,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):

        self.Z_seq = torch.zeros(
            1,
-            self.horizon,
+            self.action_horizon,
            self.d_action,
            device=self.tensor_args.device,
            dtype=self.tensor_args.dtype,
@@ -145,7 +145,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):

        self.mean_lib = HaltonSampleLib(
            SampleConfig(
-                self.horizon,
+                self.action_horizon,
                self.d_action,
                tensor_args=self.tensor_args,
                **{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
@@ -330,7 +330,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
            (cat_list),
            dim=-3,
        )
-        act_seq = act_seq.reshape(self.total_num_particles, self.horizon, self.d_action)
+        act_seq = act_seq.reshape(self.total_num_particles, self.action_horizon, self.d_action)
        act_seq = scale_ctrl(act_seq, self.action_lows, self.action_highs, squash_fn=self.squash_fn)

        # if not copy_tensor(act_seq, self.act_seq):
@@ -399,7 +399,8 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
            act_seq = self.mean_action  # .clone()  # [self.mean_idx]#.clone()
        elif mode == SampleMode.SAMPLE:
            delta = self.generate_noise(
-                shape=torch.Size((1, self.horizon)), base_seed=self.seed + 123 * self.num_steps
+                shape=torch.Size((1, self.action_horizon)),
+                base_seed=self.seed + 123 * self.num_steps,
            )
            act_seq = self.mean_action + torch.matmul(delta, self.full_scale_tril)
        elif mode == SampleMode.BEST:
@@ -426,9 +427,11 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
            Tensor: dimension is (d_action, d_action)
        """
        if self.cov_type == CovType.SIGMA_I:
-            return self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
+            return (
+                self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
+            )
        elif self.cov_type == CovType.DIAG_A:
-            return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1)  # .cl
+            return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)  # .cl
        elif self.cov_type == CovType.FULL_A:
            return self.scale_tril
        elif self.cov_type == CovType.FULL_HA:
@@ -486,10 +489,10 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
    def full_scale_tril(self):
        if self.cov_type == CovType.SIGMA_I:
            return (
-                self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
+                self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
            )  # .cl
        elif self.cov_type == CovType.DIAG_A:
-            return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1)  # .cl
+            return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)  # .cl
        elif self.cov_type == CovType.FULL_A:
            return self.scale_tril
        elif self.cov_type == CovType.FULL_HA:
@@ -504,7 +507,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
        self.sample_lib = SampleLib(self.sample_params)
        self.mean_lib = HaltonSampleLib(
            SampleConfig(
-                self.horizon,
+                self.action_horizon,
                self.d_action,
                tensor_args=self.tensor_args,
                **{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
@@ -530,7 +533,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
                        n_iters,
                        self.n_envs,
                        self.sampled_particles_per_env,
-                        self.horizon,
+                        self.action_horizon,
                        self.d_action,
                    )
                    .clone()
@@ -541,7 +544,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
                    base_seed=self.seed,
                )
                s_set = s_set.view(
-                    n_iters, 1, self.sampled_particles_per_env, self.horizon, self.d_action
+                    n_iters, 1, self.sampled_particles_per_env, self.action_horizon, self.d_action
                )
                s_set = s_set.repeat(1, self.n_envs, 1, 1, 1).clone()
            s_set[:, :, -1, :, :] = 0.0
--- a/src/curobo/opt/particle/particle_opt_base.py
+++ b/src/curobo/opt/particle/particle_opt_base.py
@@ -259,7 +259,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
            # generate random simulated trajectories
            trajectory = self.generate_rollouts()
            trajectory.actions = trajectory.actions.view(
-                self.n_envs, self.particles_per_env, self.horizon, self.d_action
+                self.n_envs, self.particles_per_env, self.action_horizon, self.d_action
            )
            trajectory.costs = trajectory.costs.view(
                self.n_envs, self.particles_per_env, self.horizon
@@ -295,7 +295,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
        if self.null_per_env > 0:
            self.null_act_seqs = torch.zeros(
                self.null_per_env,
-                self.horizon,
+                self.action_horizon,
                self.d_action,
                device=self.tensor_args.device,
                dtype=self.tensor_args.dtype,
--- a/src/curobo/opt/particle/particle_opt_utils.py
+++ b/src/curobo/opt/particle/particle_opt_utils.py
@@ -64,7 +64,9 @@ def get_stomp_cov(
    Coefficients from here: https://en.wikipedia.org/wiki/Finite_difference_coefficient
    More info here: https://github.com/ros-industrial/stomp_ros/blob/7fe40fbe6ad446459d8d4889916c64e276dbf882/stomp_core/src/utils.cpp#L36
    """
-    cov, scale_tril, scaled_M = get_stomp_cov_jit(horizon, d_action, cov_mode)
+    cov, scale_tril, scaled_M = get_stomp_cov_jit(
+        horizon, d_action, cov_mode, device=tensor_args.device
+    )
    cov = tensor_args.to_device(cov)
    scale_tril = tensor_args.to_device(scale_tril)
    if RETURN_M:
@@ -77,13 +79,16 @@ def get_stomp_cov_jit(
    horizon: int,
    d_action: int,
    cov_mode: str = "acc",
+    device: torch.device = torch.device("cuda:0"),
 ):
+    # This function can lead to nans. There are checks to raise an error when nan occurs.
    vel_fd_array = [0.0, 0.0, 1.0, -2.0, 1.0, 0.0, 0.0]

    fd_array = vel_fd_array
    A = torch.zeros(
        (d_action * horizon, d_action * horizon),
-        dtype=torch.float64,
+        dtype=torch.float32,
+        device=device,
    )

    if cov_mode == "vel":
@@ -117,14 +122,17 @@ def get_stomp_cov_jit(
                        A[k * horizon + i, k * horizon + index] = fd_array[j + 3]

    R = torch.matmul(A.transpose(-2, -1), A)
-
    M = torch.inverse(R)
    scaled_M = (1 / horizon) * M / (torch.max(torch.abs(M), dim=1)[0].unsqueeze(0))
    cov = M / torch.max(torch.abs(M))

    # also compute the cholesky decomposition:
    # scale_tril = torch.zeros((d_action * horizon, d_action * horizon), **tensor_args)
-    scale_tril = torch.linalg.cholesky(cov)
+    if (cov == cov.T).all() and (torch.linalg.eigvals(cov).real >= 0).all():
+        scale_tril = torch.linalg.cholesky(cov)
+    else:
+        scale_tril = cov
+
    """
    k = 0
    act_cov_matrix = cov[k * horizon:k * horizon + horizon, k * horizon:k * horizon + horizon]