update to 0.6.2
This commit is contained in:
@@ -24,9 +24,16 @@ from curobo.util.logger import log_warn
|
||||
|
||||
|
||||
# kernel for l-bfgs:
|
||||
@torch.jit.script
|
||||
# @torch.jit.script
|
||||
def compute_step_direction(
|
||||
alpha_buffer, rho_buffer, y_buffer, s_buffer, grad_q, m: int, epsilon, stable_mode: bool = True
|
||||
alpha_buffer,
|
||||
rho_buffer,
|
||||
y_buffer,
|
||||
s_buffer,
|
||||
grad_q,
|
||||
m: int,
|
||||
epsilon: float,
|
||||
stable_mode: bool = True,
|
||||
):
|
||||
# m = 15 (int)
|
||||
# y_buffer, s_buffer: m x b x 175
|
||||
@@ -70,12 +77,12 @@ class LBFGSOpt(NewtonOptBase, LBFGSOptConfig):
|
||||
if config is not None:
|
||||
LBFGSOptConfig.__init__(self, **vars(config))
|
||||
NewtonOptBase.__init__(self)
|
||||
if self.d_opt >= 1024 or self.history >= 512:
|
||||
log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>=512")
|
||||
if self.d_opt >= 1024 or self.history > 15:
|
||||
log_warn("LBFGS: Not using LBFGS Cuda Kernel as d_opt>1024 or history>15")
|
||||
self.use_cuda_kernel = False
|
||||
if self.history > self.d_opt:
|
||||
if self.history >= self.d_opt:
|
||||
log_warn("LBFGS: history >= d_opt, reducing history to d_opt-1")
|
||||
self.history = self.d_opt
|
||||
self.history = self.d_opt - 1
|
||||
|
||||
@profiler.record_function("lbfgs/reset")
|
||||
def reset(self):
|
||||
|
||||
@@ -72,7 +72,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
):
|
||||
if config is not None:
|
||||
NewtonOptConfig.__init__(self, **vars(config))
|
||||
self.d_opt = self.horizon * self.d_action
|
||||
self.d_opt = self.action_horizon * self.d_action
|
||||
self.line_scale = self._create_box_line_search(self.line_search_scale)
|
||||
Optimizer.__init__(self)
|
||||
self.i = -1
|
||||
@@ -84,8 +84,8 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
self.reset()
|
||||
|
||||
# reshape action lows and highs:
|
||||
self.action_lows = self.action_lows.repeat(self.horizon)
|
||||
self.action_highs = self.action_highs.repeat(self.horizon)
|
||||
self.action_lows = self.action_lows.repeat(self.action_horizon)
|
||||
self.action_highs = self.action_highs.repeat(self.action_horizon)
|
||||
self.action_range = self.action_highs - self.action_lows
|
||||
self.action_step_max = self.step_scale * torch.abs(self.action_range)
|
||||
self.c_1 = 1e-5
|
||||
@@ -99,10 +99,13 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
self.use_cuda_line_search_kernel = False
|
||||
if self.use_temporal_smooth:
|
||||
self._temporal_mat = build_fd_matrix(
|
||||
self.horizon, order=2, device=self.tensor_args.device, dtype=self.tensor_args.dtype
|
||||
self.action_horizon,
|
||||
order=2,
|
||||
device=self.tensor_args.device,
|
||||
dtype=self.tensor_args.dtype,
|
||||
).unsqueeze(0)
|
||||
eye_mat = torch.eye(
|
||||
self.horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
|
||||
self.action_horizon, device=self.tensor_args.device, dtype=self.tensor_args.dtype
|
||||
).unsqueeze(0)
|
||||
self._temporal_mat += eye_mat
|
||||
|
||||
@@ -130,9 +133,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
self._shift(shift_steps)
|
||||
# reshape q:
|
||||
if self.store_debug:
|
||||
self.debug.append(q.view(-1, self.horizon, self.d_action).clone())
|
||||
self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
|
||||
with profiler.record_function("newton_base/init_opt"):
|
||||
q = q.view(self.n_envs, self.horizon * self.d_action)
|
||||
q = q.view(self.n_envs, self.action_horizon * self.d_action)
|
||||
grad_q = q.detach() * 0.0
|
||||
# run opt graph
|
||||
if not self.cu_opt_init:
|
||||
@@ -147,7 +150,7 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
if check_convergence(self.best_iteration, self.current_iteration, self.last_best):
|
||||
break
|
||||
|
||||
best_q = best_q.view(self.n_envs, self.horizon, self.d_action)
|
||||
best_q = best_q.view(self.n_envs, self.action_horizon, self.d_action)
|
||||
return best_q
|
||||
|
||||
def reset(self):
|
||||
@@ -166,8 +169,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
self.i += 1
|
||||
cost_n, q, grad_q = self._opt_step(q.detach(), grad_q.detach())
|
||||
if self.store_debug:
|
||||
self.debug.append(self.best_q.view(-1, self.horizon, self.d_action).clone())
|
||||
self.debug.append(self.best_q.view(-1, self.action_horizon, self.d_action).clone())
|
||||
self.debug_cost.append(self.best_cost.detach().view(-1, 1).clone())
|
||||
# self.debug.append(q.view(-1, self.action_horizon, self.d_action).clone())
|
||||
# self.debug_cost.append(cost_n.detach().view(-1, 1).clone())
|
||||
# print(grad_q)
|
||||
|
||||
return self.best_q.detach(), self.best_cost.detach(), q.detach(), grad_q.detach()
|
||||
|
||||
@@ -186,9 +192,9 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
|
||||
def scale_step_direction(self, dx):
|
||||
if self.use_temporal_smooth:
|
||||
dx_v = dx.view(-1, self.horizon, self.d_action)
|
||||
dx_v = dx.view(-1, self.action_horizon, self.d_action)
|
||||
dx_new = self._temporal_mat @ dx_v # 1,h,h x b, h, dof -> b, h, dof
|
||||
dx = dx_new.view(-1, self.horizon * self.d_action)
|
||||
dx = dx_new.view(-1, self.action_horizon * self.d_action)
|
||||
dx_scaled = scale_action(dx, self.action_step_max)
|
||||
|
||||
return dx_scaled
|
||||
@@ -216,11 +222,11 @@ class NewtonOptBase(Optimizer, NewtonOptConfig):
|
||||
def _compute_cost_gradient(self, x):
|
||||
x_n = x.detach().requires_grad_(True)
|
||||
x_in = x_n.view(
|
||||
self.n_envs * self.num_particles, self.rollout_fn.horizon, self.rollout_fn.d_action
|
||||
self.n_envs * self.num_particles, self.action_horizon, self.rollout_fn.d_action
|
||||
)
|
||||
trajectories = self.rollout_fn(x_in) # x_n = (batch*line_search_scale) x horizon x d_action
|
||||
cost = torch.sum(
|
||||
trajectories.costs.view(self.n_envs, self.num_particles, self.rollout_fn.horizon),
|
||||
trajectories.costs.view(self.n_envs, self.num_particles, self.horizon),
|
||||
dim=-1,
|
||||
keepdim=True,
|
||||
)
|
||||
|
||||
@@ -43,6 +43,7 @@ class OptimizerConfig:
|
||||
n_envs: int
|
||||
sync_cuda_time: bool
|
||||
use_coo_sparse: bool
|
||||
action_horizon: int
|
||||
|
||||
def __post_init__(self):
|
||||
object.__setattr__(self, "action_highs", self.tensor_args.to_device(self.action_highs))
|
||||
@@ -68,6 +69,8 @@ class OptimizerConfig:
|
||||
child_dict["rollout_fn"] = rollout_fn
|
||||
child_dict["tensor_args"] = tensor_args
|
||||
child_dict["horizon"] = rollout_fn.horizon
|
||||
child_dict["action_horizon"] = rollout_fn.action_horizon
|
||||
|
||||
if "num_particles" not in child_dict:
|
||||
child_dict["num_particles"] = 1
|
||||
return child_dict
|
||||
|
||||
@@ -89,7 +89,7 @@ class ParallelMPPIConfig(ParticleOptConfig):
|
||||
child_dict["squash_fn"] = SquashType[child_dict["squash_fn"]]
|
||||
child_dict["cov_type"] = CovType[child_dict["cov_type"]]
|
||||
child_dict["sample_params"]["d_action"] = rollout_fn.d_action
|
||||
child_dict["sample_params"]["horizon"] = child_dict["horizon"]
|
||||
child_dict["sample_params"]["horizon"] = rollout_fn.action_horizon
|
||||
child_dict["sample_params"]["tensor_args"] = tensor_args
|
||||
child_dict["sample_params"] = SampleConfig(**child_dict["sample_params"])
|
||||
|
||||
@@ -112,7 +112,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
# initialize covariance types:
|
||||
if self.cov_type == CovType.FULL_HA:
|
||||
self.I = torch.eye(
|
||||
self.horizon * self.d_action,
|
||||
self.action_horizon * self.d_action,
|
||||
device=self.tensor_args.device,
|
||||
dtype=self.tensor_args.dtype,
|
||||
)
|
||||
@@ -124,7 +124,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
|
||||
self.Z_seq = torch.zeros(
|
||||
1,
|
||||
self.horizon,
|
||||
self.action_horizon,
|
||||
self.d_action,
|
||||
device=self.tensor_args.device,
|
||||
dtype=self.tensor_args.dtype,
|
||||
@@ -145,7 +145,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
|
||||
self.mean_lib = HaltonSampleLib(
|
||||
SampleConfig(
|
||||
self.horizon,
|
||||
self.action_horizon,
|
||||
self.d_action,
|
||||
tensor_args=self.tensor_args,
|
||||
**{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
|
||||
@@ -330,7 +330,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
(cat_list),
|
||||
dim=-3,
|
||||
)
|
||||
act_seq = act_seq.reshape(self.total_num_particles, self.horizon, self.d_action)
|
||||
act_seq = act_seq.reshape(self.total_num_particles, self.action_horizon, self.d_action)
|
||||
act_seq = scale_ctrl(act_seq, self.action_lows, self.action_highs, squash_fn=self.squash_fn)
|
||||
|
||||
# if not copy_tensor(act_seq, self.act_seq):
|
||||
@@ -399,7 +399,8 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
act_seq = self.mean_action # .clone() # [self.mean_idx]#.clone()
|
||||
elif mode == SampleMode.SAMPLE:
|
||||
delta = self.generate_noise(
|
||||
shape=torch.Size((1, self.horizon)), base_seed=self.seed + 123 * self.num_steps
|
||||
shape=torch.Size((1, self.action_horizon)),
|
||||
base_seed=self.seed + 123 * self.num_steps,
|
||||
)
|
||||
act_seq = self.mean_action + torch.matmul(delta, self.full_scale_tril)
|
||||
elif mode == SampleMode.BEST:
|
||||
@@ -426,9 +427,11 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
Tensor: dimension is (d_action, d_action)
|
||||
"""
|
||||
if self.cov_type == CovType.SIGMA_I:
|
||||
return self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
|
||||
return (
|
||||
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
|
||||
)
|
||||
elif self.cov_type == CovType.DIAG_A:
|
||||
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1) # .cl
|
||||
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1) # .cl
|
||||
elif self.cov_type == CovType.FULL_A:
|
||||
return self.scale_tril
|
||||
elif self.cov_type == CovType.FULL_HA:
|
||||
@@ -486,10 +489,10 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
def full_scale_tril(self):
|
||||
if self.cov_type == CovType.SIGMA_I:
|
||||
return (
|
||||
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.horizon, -1)
|
||||
self.scale_tril.unsqueeze(-2).unsqueeze(-2).expand(-1, -1, self.action_horizon, -1)
|
||||
) # .cl
|
||||
elif self.cov_type == CovType.DIAG_A:
|
||||
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.horizon, -1) # .cl
|
||||
return self.scale_tril.unsqueeze(-2).expand(-1, -1, self.action_horizon, -1) # .cl
|
||||
elif self.cov_type == CovType.FULL_A:
|
||||
return self.scale_tril
|
||||
elif self.cov_type == CovType.FULL_HA:
|
||||
@@ -504,7 +507,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
self.sample_lib = SampleLib(self.sample_params)
|
||||
self.mean_lib = HaltonSampleLib(
|
||||
SampleConfig(
|
||||
self.horizon,
|
||||
self.action_horizon,
|
||||
self.d_action,
|
||||
tensor_args=self.tensor_args,
|
||||
**{"fixed_samples": False, "seed": 2567, "filter_coeffs": None}
|
||||
@@ -530,7 +533,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
n_iters,
|
||||
self.n_envs,
|
||||
self.sampled_particles_per_env,
|
||||
self.horizon,
|
||||
self.action_horizon,
|
||||
self.d_action,
|
||||
)
|
||||
.clone()
|
||||
@@ -541,7 +544,7 @@ class ParallelMPPI(ParticleOptBase, ParallelMPPIConfig):
|
||||
base_seed=self.seed,
|
||||
)
|
||||
s_set = s_set.view(
|
||||
n_iters, 1, self.sampled_particles_per_env, self.horizon, self.d_action
|
||||
n_iters, 1, self.sampled_particles_per_env, self.action_horizon, self.d_action
|
||||
)
|
||||
s_set = s_set.repeat(1, self.n_envs, 1, 1, 1).clone()
|
||||
s_set[:, :, -1, :, :] = 0.0
|
||||
|
||||
@@ -259,7 +259,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
|
||||
# generate random simulated trajectories
|
||||
trajectory = self.generate_rollouts()
|
||||
trajectory.actions = trajectory.actions.view(
|
||||
self.n_envs, self.particles_per_env, self.horizon, self.d_action
|
||||
self.n_envs, self.particles_per_env, self.action_horizon, self.d_action
|
||||
)
|
||||
trajectory.costs = trajectory.costs.view(
|
||||
self.n_envs, self.particles_per_env, self.horizon
|
||||
@@ -295,7 +295,7 @@ class ParticleOptBase(Optimizer, ParticleOptConfig):
|
||||
if self.null_per_env > 0:
|
||||
self.null_act_seqs = torch.zeros(
|
||||
self.null_per_env,
|
||||
self.horizon,
|
||||
self.action_horizon,
|
||||
self.d_action,
|
||||
device=self.tensor_args.device,
|
||||
dtype=self.tensor_args.dtype,
|
||||
|
||||
@@ -64,7 +64,9 @@ def get_stomp_cov(
|
||||
Coefficients from here: https://en.wikipedia.org/wiki/Finite_difference_coefficient
|
||||
More info here: https://github.com/ros-industrial/stomp_ros/blob/7fe40fbe6ad446459d8d4889916c64e276dbf882/stomp_core/src/utils.cpp#L36
|
||||
"""
|
||||
cov, scale_tril, scaled_M = get_stomp_cov_jit(horizon, d_action, cov_mode)
|
||||
cov, scale_tril, scaled_M = get_stomp_cov_jit(
|
||||
horizon, d_action, cov_mode, device=tensor_args.device
|
||||
)
|
||||
cov = tensor_args.to_device(cov)
|
||||
scale_tril = tensor_args.to_device(scale_tril)
|
||||
if RETURN_M:
|
||||
@@ -77,13 +79,16 @@ def get_stomp_cov_jit(
|
||||
horizon: int,
|
||||
d_action: int,
|
||||
cov_mode: str = "acc",
|
||||
device: torch.device = torch.device("cuda:0"),
|
||||
):
|
||||
# This function can lead to nans. There are checks to raise an error when nan occurs.
|
||||
vel_fd_array = [0.0, 0.0, 1.0, -2.0, 1.0, 0.0, 0.0]
|
||||
|
||||
fd_array = vel_fd_array
|
||||
A = torch.zeros(
|
||||
(d_action * horizon, d_action * horizon),
|
||||
dtype=torch.float64,
|
||||
dtype=torch.float32,
|
||||
device=device,
|
||||
)
|
||||
|
||||
if cov_mode == "vel":
|
||||
@@ -117,14 +122,17 @@ def get_stomp_cov_jit(
|
||||
A[k * horizon + i, k * horizon + index] = fd_array[j + 3]
|
||||
|
||||
R = torch.matmul(A.transpose(-2, -1), A)
|
||||
|
||||
M = torch.inverse(R)
|
||||
scaled_M = (1 / horizon) * M / (torch.max(torch.abs(M), dim=1)[0].unsqueeze(0))
|
||||
cov = M / torch.max(torch.abs(M))
|
||||
|
||||
# also compute the cholesky decomposition:
|
||||
# scale_tril = torch.zeros((d_action * horizon, d_action * horizon), **tensor_args)
|
||||
scale_tril = torch.linalg.cholesky(cov)
|
||||
if (cov == cov.T).all() and (torch.linalg.eigvals(cov).real >= 0).all():
|
||||
scale_tril = torch.linalg.cholesky(cov)
|
||||
else:
|
||||
scale_tril = cov
|
||||
|
||||
"""
|
||||
k = 0
|
||||
act_cov_matrix = cov[k * horizon:k * horizon + horizon, k * horizon:k * horizon + horizon]
|
||||
|
||||
Reference in New Issue
Block a user