Bug Fixes, Add Isaac Sim 4.5 support for examples

This commit is contained in:
Balakumar Sundaralingam
2025-04-25 11:24:16 -07:00
parent 2fbffc3522
commit 0a50de1ba7
43 changed files with 728 additions and 193 deletions

224
examples/mesh_dataset.py Normal file
View File

@@ -0,0 +1,224 @@
#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
import math
import os
import sys
from typing import Optional
import numpy as np
import pyrender
import torch
import torch.nn.functional as F
import trimesh
from torch.utils.data.dataset import Dataset
def fov_and_size_to_intrinsics(fov, img_size, device="cpu"):
img_h, img_w = img_size
fx = img_w / (2 * math.tan(math.radians(fov) / 2))
fy = img_h / (2 * math.tan(math.radians(fov) / 2))
intrinsics = torch.tensor(
[[fx, 0, img_h / 2], [0, fy, img_w / 2], [0, 0, 1]],
dtype=torch.float,
device=device,
)
return intrinsics
def lookat_to_cam_pose(eyes, ats, ups=[[0, 0, 1]], device="cpu", mode="opengl"):
if not isinstance(eyes, torch.Tensor):
eyes = torch.tensor(eyes, device=device, dtype=torch.float32)
if not isinstance(ats, torch.Tensor):
ats = torch.tensor(ats, device=device, dtype=torch.float32)
if not isinstance(ups, torch.Tensor):
ups = torch.tensor(ups, device=device, dtype=torch.float32)
batch_size = eyes.shape[0]
camera_view = F.normalize(ats - eyes, dim=1)
camera_right = F.normalize(torch.cross(camera_view, ups, dim=1), dim=1)
camera_up = F.normalize(torch.cross(camera_right, camera_view, dim=1), dim=1)
# rotation matrix from opencv conventions
T = torch.zeros((batch_size, 4, 4))
if mode == "opengl":
T[:, :3, :] = torch.stack([camera_right, camera_up, -camera_view, eyes], dim=2)
elif mode == "opencv":
T[:, :3, :] = torch.stack([camera_right, -camera_up, camera_view, eyes], dim=2)
else:
raise ValueError(f"Unknown mode: {mode}")
T[:, 3, 3] = 1.0
return T.float()
def sample_sphere_points(N, radius, device="cuda"):
latitude = (torch.rand(size=(N, 1), device=device) - 0.5) * torch.pi
longitude = (torch.rand(size=(N, 1), device=device) - 0.5) * torch.pi * 2
x = torch.cos(latitude) * torch.cos(longitude)
y = torch.sin(latitude) * torch.cos(longitude)
z = torch.sin(longitude)
pc = torch.cat([x, y, z], dim=1) * radius
return pc
def sample_sphere_poses(N, origin, radius, device="cuda"):
eyes = sample_sphere_points(N, radius, device)
if not isinstance(origin, torch.Tensor):
origin = torch.tensor(origin).float().to(device)
ats = origin[None, :].repeat((N, 1))
poses_gl = lookat_to_cam_pose(eyes, ats, device=device, mode="opengl")
poses_cv = lookat_to_cam_pose(eyes, ats, device=device, mode="opencv")
return poses_gl, poses_cv
def compute_origin_and_radius(trimesh_scene):
low, high = trimesh_scene.bounds
center = high + low / 2
low = low - center
high = high - center
radius = max(np.sqrt((high**2).sum()), np.sqrt((low**2).sum()))
return center, radius
def render_batch(trimesh_mesh, camera_poses, fov, image_size):
camera_poses = camera_poses.detach().cpu().numpy()
mesh = pyrender.Mesh.from_trimesh(trimesh_mesh)
scene = pyrender.Scene()
scene.add(mesh)
camera = pyrender.PerspectiveCamera(yfov=fov, aspectRatio=1.0)
camera = pyrender.Node(camera=camera, matrix=np.eye(4))
scene.add_node(camera)
light = pyrender.SpotLight(
color=np.ones(3),
intensity=1.0,
innerConeAngle=np.pi / 16.0,
outerConeAngle=np.pi / 2.0,
)
light = pyrender.Node(light=light, matrix=np.eye(4))
scene.add_node(light)
r = pyrender.OffscreenRenderer(image_size, image_size)
colors = []
depths = []
for camera_pose in camera_poses:
scene.set_pose(camera, camera_pose)
scene.set_pose(light, camera_pose)
color, depth = r.render(scene)
colors.append(color)
depths.append(depth)
return np.asarray(colors), np.asarray(depths)
"""
MeshDataset takes a path to a mesh as input and uses PyRender to render images of the mesh
from a sphere centered around the scene.
"""
class MeshDataset(Dataset):
def __init__(
self,
mesh_file: str = None,
n_frames: int = 10,
image_size: float = 256,
save_data_dir: Optional[str] = None,
trimesh_mesh: Optional[trimesh.Trimesh] = None,
fov_deg: int = 60,
# visible_point: Optional[List[float]] = None,
) -> None:
super().__init__()
self.mesh_file = mesh_file
self.n_frames = n_frames
if trimesh_mesh is None:
self.trimesh_mesh = trimesh.load(self.mesh_file)
else:
self.trimesh_mesh = trimesh_mesh
self.image_size = image_size
origin, radius = compute_origin_and_radius(self.trimesh_mesh)
self.fov_deg = fov_deg
sphere_radius = radius * 2.0
self.camera_poses_gl, self.camera_poses_cv = sample_sphere_poses(
n_frames, origin, sphere_radius, "cuda"
)
self.colors, self.depths = render_batch(
self.trimesh_mesh,
self.camera_poses_gl,
fov=math.radians(self.fov_deg),
image_size=self.image_size,
)
self.intrinsics = fov_and_size_to_intrinsics(
self.fov_deg, (self.image_size, self.image_size), device="cuda"
)
if save_data_dir is not None:
self.save_as_sun3d_dataset(save_data_dir)
# sys.exit(0)
def save_as_sun3d_dataset(self, output_dir):
import imageio
from transforms3d.quaternions import quat2mat
os.makedirs(output_dir, exist_ok=True)
K = self.intrinsics.detach().cpu().numpy().tolist()
intrinsics_text = f"""{K[0][0]} {K[0][1]} {K[0][2]}
{K[1][0]} {K[1][1]} {K[1][2]}
{K[2][0]} {K[2][1]} {K[2][2]}"""
with open(f"{output_dir}/camera-intrinsics.txt", "w") as fp:
fp.write(intrinsics_text)
seqdir = f"{output_dir}/seq-01"
os.makedirs(seqdir, exist_ok=True)
for i in range(len(self)):
data = self[i]
rgb = data["rgba"][:3, :, :].detach().cpu().permute(1, 2, 0).numpy()
depth = data["depth"]
depth = (depth * 1000).detach().cpu().numpy().astype(np.uint16)
nvblox_pose = data["pose"]
eigen_quat = [0.707106769, 0.707106769, 0, 0]
sun3d_to_nvblox_T = torch.eye(4)
sun3d_to_nvblox_T[:3, :3] = torch.tensor(quat2mat(eigen_quat))
sun3d_pose = torch.linalg.inv(sun3d_to_nvblox_T) @ nvblox_pose
P = sun3d_pose.detach().cpu().numpy().tolist()
pose_text = f"""{P[0][0]} {P[0][1]} {P[0][2]} {P[0][3]}
{P[1][0]} {P[1][1]} {P[1][2]} {P[1][3]}
{P[2][0]} {P[2][1]} {P[2][2]} {P[2][3]}
{P[3][0]} {P[3][1]} {P[3][2]} {P[3][3]}"""
framename = f"frame-{str(i).zfill(6)}"
imageio.imwrite(f"{seqdir}/{framename}.color.png", rgb)
imageio.imwrite(f"{seqdir}/{framename}.depth.png", depth)
with open(f"{seqdir}/{framename}.pose.txt", "w") as fp:
fp.write(pose_text)
def __len__(self):
return self.n_frames
def __getitem__(self, index):
rgb_np = self.colors[index]
depth_np = self.depths[index]
a_np = (depth_np > 0).astype(np.uint8) * 255
rgba_np = np.concatenate([rgb_np, a_np[:, :, None]], axis=2)
pose = self.camera_poses_cv[index]
intrinsics = self.intrinsics
depth_np = depth_np.astype(np.float32)
rgba = torch.from_numpy(rgba_np).permute((2, 0, 1))
depth = torch.from_numpy(depth_np).float()
return {"rgba": rgba, "depth": depth, "pose": pose, "intrinsics": intrinsics}