update to 0.6.2

This commit is contained in:
Balakumar Sundaralingam
2023-12-15 02:01:33 -08:00
parent d85ae41fba
commit 58958bbcce
105 changed files with 2514 additions and 934 deletions

View File

@@ -24,9 +24,16 @@ from curobo.util.logger import log_warn
# kernel for l-bfgs:
@torch.jit.script
# @torch.jit.script
def compute_step_direction(
alpha_buffer, rho_buffer, y_buffer, s_buffer, grad_q, m: int, epsilon, stable_mode: bool = True
alpha_buffer,
rho_buffer,
y_buffer,
s_buffer,
grad_q,
m: int,
epsilon: float,
stable_mode: bool = True,
):
# m = 15 (int)
# y_buffer, s_buffer: m x b x 175
@@ -70,12 +77,12 @@ class LBFGSOpt(NewtonOptBase, LBFGSOptConfig):
if config is not None:
LBFGSOptConfig.__init__(self, **vars(config))
NewtonOptBase.__init__(self)
if self.d_opt >= 1024 or self.history >= 512:
log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>=512")
if self.d_opt >= 1024 or self.history > 15:
log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>15")
self.use_cuda_kernel = False
if self.history > self.d_opt:
if self.history >= self.d_opt:
log_warn("LBFGS: history >= d_opt, reducing history to d_opt-1")
self.history = self.d_opt
self.history = self.d_opt - 1
@profiler.record_function("lbfgs/reset")
def reset(self):

View File

@@ -72,7 +72,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
):
if config is not None:
NewtonOptConfig.__init__(self, **vars(config))
self.d_opt = self.horizon * self.d_action
self.d_opt = self.action_horizon * self.d_action
self.line_scale = self._create_box_line_search(self.line_search_scale)
Optimizer.__init__(self)
self.i = -1
@@ -84,8 +84,8 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
self.reset()
# reshape action lows and highs:
self.action_lows = self.action_lows.repeat(self.horizon)
self.action_highs = self.action_highs.repeat(self.horizon)
self.action_lows = self.action_lows.repeat(self.action_horizon)
self.action_highs = self.action_highs.repeat(self.action_horizon)
self.action_range = self.action_highs - self.action_lows
self.action_step_max = self.step_scale * torch.abs(self.action_range)
self.c_1 = 1e-5
@@ -99,10 +99,13 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
self.use_cuda_line_search_kernel = False
if self.use_temporal_smooth:
self._temporal_mat = build_fd_matrix(
self.horizon, order=2, device=self.tensor_args.device, dtype=self.tensor_args.dtype
self.action_horizon,
order=2,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
).unsqueeze(0)
eye_mat = torch.eye(
self.horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
self.action_horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
).unsqueeze(0)
self._temporal_mat += eye_mat
@@ -130,9 +133,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
self._shift(shift_steps)
# reshape q:
if self.store_debug:
self.debug.append(q.view(-1, self.horizon, self.d_action).clone())
self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
with profiler.record_function("newton_base/init_opt"):
q = q.view(self.n_envs, self.horizon * self.d_action)
q = q.view(self.n_envs, self.action_horizon * self.d_action)
grad_q = q.detach() * 0.0
# run opt graph
if not self.cu_opt_init:
@@ -147,7 +150,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
if check_convergence(self.best_iteration, self.current_iteration, self.last_best):
break
best_q = best_q.view(self.n_envs, self.horizon, self.d_action)
best_q = best_q.view(self.n_envs, self.action_horizon, self.d_action)
return best_q
def reset(self):
@@ -166,8 +169,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
self.i += 1
cost_n, q, grad_q = self._opt_step(q.detach(), grad_q.detach())
if self.store_debug:
self.debug.append(self.best_q.view(-1, self.horizon, self.d_action).clone())
self.debug.append(self.best_q.view(-1, self.action_horizon, self.d_action).clone())
self.debug_cost.append(self.best_cost.detach().view(-1, 1).clone())
# self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
# self.debug_cost.append(cost_n.detach().view(-1, 1).clone())
# print(grad_q)
return self.best_q.detach(), self.best_cost.detach(), q.detach(), grad_q.detach()
@@ -186,9 +192,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
def scale_step_direction(self, dx):
if self.use_temporal_smooth:
dx_v = dx.view(-1, self.horizon, self.d_action)
dx_v = dx.view(-1, self.action_horizon, self.d_action)
dx_new = self._temporal_mat @ dx_v # 1,h,h x b, h, dof -> b, h, dof
dx = dx_new.view(-1, self.horizon * self.d_action)
dx = dx_new.view(-1, self.action_horizon * self.d_action)
dx_scaled = scale_action(dx, self.action_step_max)
return dx_scaled
@@ -216,11 +222,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
def _compute_cost_gradient(self, x):
x_n = x.detach().requires_grad_(True)
x_in = x_n.view(
self.n_envs * self.num_particles, self.rollout_fn.horizon, self.rollout_fn.d_action
self.n_envs * self.num_particles, self.action_horizon, self.rollout_fn.d_action
)
trajectories = self.rollout_fn(x_in) # x_n = (batch*line_search_scale) x horizon x d_action
cost = torch.sum(
trajectories.costs.view(self.n_envs, self.num_particles, self.rollout_fn.horizon),
trajectories.costs.view(self.n_envs, self.num_particles, self.horizon),
dim=-1,
keepdim=True,
)

View File

@@ -43,6 +43,7 @@ class OptimizerConfig:
n_envs: int
sync_cuda_time: bool
use_coo_sparse: bool
action_horizon: int
def __post_init__(self):
object.__setattr__(self, "action_highs", self.tensor_args.to_device(self.action_highs))
@@ -68,6 +69,8 @@ class OptimizerConfig:
child_dict["rollout_fn"] = rollout_fn
child_dict["tensor_args"] = tensor_args
child_dict["horizon"] = rollout_fn.horizon
child_dict["action_horizon"] = rollout_fn.action_horizon
if "num_particles" not in child_dict:
child_dict["num_particles"] = 1
return child_dict

View File

@@ -89,7 +89,7 @@ class ParallelMPPIConfig(ParticleOptConfig):
child_dict["squash_fn"] = SquashType[child_dict["squash_fn"]]
child_dict["cov_type"] = CovType[child_dict["cov_type"]]
child_dict["sample_params"]["d_action"] = rollout_fn.d_action
child_dict["sample_params"]["horizon"] = child_dict["horizon"]
child_dict["sample_params"]["horizon"] = rollout_fn.action_horizon
child_dict["sample_params"]["tensor_args"] = tensor_args
child_dict["sample_params"] = SampleConfig(**child_dict["sample_params"])
@@ -112,7 +112,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
# initialize covariance types:
if self.cov_type == CovType.FULL_HA:
self.I = torch.eye(
self.horizon * self.d_action,
self.action_horizon * self.d_action,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
)
@@ -124,7 +124,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
self.Z_seq = torch.zeros(
1,
self.horizon,
self.action_horizon,
self.d_action,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,
@@ -145,7 +145,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
self.mean_lib = HaltonSampleLib(
SampleConfig(
self.horizon,
self.action_horizon,
self.d_action,
tensor_args=self.tensor_args,
**{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
@@ -330,7 +330,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
(cat_list),
dim=-3,
)
act_seq = act_seq.reshape(self.total_num_particles, self.horizon, self.d_action)
act_seq = act_seq.reshape(self.total_num_particles, self.action_horizon, self.d_action)
act_seq = scale_ctrl(act_seq, self.action_lows, self.action_highs, squash_fn=self.squash_fn)
# if not copy_tensor(act_seq, self.act_seq):
@@ -399,7 +399,8 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
act_seq = self.mean_action # .clone() # [self.mean_idx]#.clone()
elif mode == SampleMode.SAMPLE:
delta = self.generate_noise(
shape=torch.Size((1, self.horizon)), base_seed=self.seed + 123 * self.num_steps
shape=torch.Size((1, self.action_horizon)),
base_seed=self.seed + 123 * self.num_steps,
)
act_seq = self.mean_action + torch.matmul(delta, self.full_scale_tril)
elif mode == SampleMode.BEST:
@@ -426,9 +427,11 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
Tensor: dimension is (d_action, d_action)
"""
if self.cov_type == CovType.SIGMA_I:
return self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
return (
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
)
elif self.cov_type == CovType.DIAG_A:
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1) # .cl
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1) # .cl
elif self.cov_type == CovType.FULL_A:
return self.scale_tril
elif self.cov_type == CovType.FULL_HA:
@@ -486,10 +489,10 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
def full_scale_tril(self):
if self.cov_type == CovType.SIGMA_I:
return (
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
) # .cl
elif self.cov_type == CovType.DIAG_A:
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1) # .cl
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1) # .cl
elif self.cov_type == CovType.FULL_A:
return self.scale_tril
elif self.cov_type == CovType.FULL_HA:
@@ -504,7 +507,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
self.sample_lib = SampleLib(self.sample_params)
self.mean_lib = HaltonSampleLib(
SampleConfig(
self.horizon,
self.action_horizon,
self.d_action,
tensor_args=self.tensor_args,
**{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
@@ -530,7 +533,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
n_iters,
self.n_envs,
self.sampled_particles_per_env,
self.horizon,
self.action_horizon,
self.d_action,
)
.clone()
@@ -541,7 +544,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
base_seed=self.seed,
)
s_set = s_set.view(
n_iters, 1, self.sampled_particles_per_env, self.horizon, self.d_action
n_iters, 1, self.sampled_particles_per_env, self.action_horizon, self.d_action
)
s_set = s_set.repeat(1, self.n_envs, 1, 1, 1).clone()
s_set[:, :, -1, :, :] = 0.0

View File

@@ -259,7 +259,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
# generate random simulated trajectories
trajectory = self.generate_rollouts()
trajectory.actions = trajectory.actions.view(
self.n_envs, self.particles_per_env, self.horizon, self.d_action
self.n_envs, self.particles_per_env, self.action_horizon, self.d_action
)
trajectory.costs = trajectory.costs.view(
self.n_envs, self.particles_per_env, self.horizon
@@ -295,7 +295,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
if self.null_per_env > 0:
self.null_act_seqs = torch.zeros(
self.null_per_env,
self.horizon,
self.action_horizon,
self.d_action,
device=self.tensor_args.device,
dtype=self.tensor_args.dtype,

View File

@@ -64,7 +64,9 @@ def get_stomp_cov(
Coefficients from here: https://en.wikipedia.org/wiki/Finite_difference_coefficient
More info here: https://github.com/ros-industrial/stomp_ros/blob/7fe40fbe6ad446459d8d4889916c64e276dbf882/stomp_core/src/utils.cpp#L36
"""
cov, scale_tril, scaled_M = get_stomp_cov_jit(horizon, d_action, cov_mode)
cov, scale_tril, scaled_M = get_stomp_cov_jit(
horizon, d_action, cov_mode, device=tensor_args.device
)
cov = tensor_args.to_device(cov)
scale_tril = tensor_args.to_device(scale_tril)
if RETURN_M:
@@ -77,13 +79,16 @@ def get_stomp_cov_jit(
horizon: int,
d_action: int,
cov_mode: str = "acc",
device: torch.device = torch.device("cuda:0"),
):
# This function can lead to nans. There are checks to raise an error when nan occurs.
vel_fd_array = [0.0, 0.0, 1.0, -2.0, 1.0, 0.0, 0.0]
fd_array = vel_fd_array
A = torch.zeros(
(d_action * horizon, d_action * horizon),
dtype=torch.float64,
dtype=torch.float32,
device=device,
)
if cov_mode == "vel":
@@ -117,14 +122,17 @@ def get_stomp_cov_jit(
A[k * horizon + i, k * horizon + index] = fd_array[j + 3]
R = torch.matmul(A.transpose(-2, -1), A)
M = torch.inverse(R)
scaled_M = (1 / horizon) * M / (torch.max(torch.abs(M), dim=1)[0].unsqueeze(0))
cov = M / torch.max(torch.abs(M))
# also compute the cholesky decomposition:
# scale_tril = torch.zeros((d_action * horizon, d_action * horizon), **tensor_args)
scale_tril = torch.linalg.cholesky(cov)
if (cov == cov.T).all() and (torch.linalg.eigvals(cov).real >= 0).all():
scale_tril = torch.linalg.cholesky(cov)
else:
scale_tril = cov
"""
k = 0
act_cov_matrix = cov[k * horizon:k * horizon + horizon, k * horizon:k * horizon + horizon]