225 lines
7.8 KiB
Python
225 lines
7.8 KiB
Python
#
|
|
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
#
|
|
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
|
# property and proprietary rights in and to this material, related
|
|
# documentation and any modifications thereto. Any use, reproduction,
|
|
# disclosure or distribution of this material and related documentation
|
|
# without an express license agreement from NVIDIA CORPORATION or
|
|
# its affiliates is strictly prohibited.
|
|
#
|
|
import math
|
|
import os
|
|
import sys
|
|
from typing import Optional
|
|
|
|
import numpy as np
|
|
import pyrender
|
|
import torch
|
|
import torch.nn.functional as F
|
|
import trimesh
|
|
from torch.utils.data.dataset import Dataset
|
|
|
|
|
|
def fov_and_size_to_intrinsics(fov, img_size, device="cpu"):
|
|
img_h, img_w = img_size
|
|
fx = img_w / (2 * math.tan(math.radians(fov) / 2))
|
|
fy = img_h / (2 * math.tan(math.radians(fov) / 2))
|
|
|
|
intrinsics = torch.tensor(
|
|
[[fx, 0, img_h / 2], [0, fy, img_w / 2], [0, 0, 1]],
|
|
dtype=torch.float,
|
|
device=device,
|
|
)
|
|
return intrinsics
|
|
|
|
|
|
def lookat_to_cam_pose(eyes, ats, ups=[[0, 0, 1]], device="cpu", mode="opengl"):
|
|
if not isinstance(eyes, torch.Tensor):
|
|
eyes = torch.tensor(eyes, device=device, dtype=torch.float32)
|
|
if not isinstance(ats, torch.Tensor):
|
|
ats = torch.tensor(ats, device=device, dtype=torch.float32)
|
|
if not isinstance(ups, torch.Tensor):
|
|
ups = torch.tensor(ups, device=device, dtype=torch.float32)
|
|
|
|
batch_size = eyes.shape[0]
|
|
|
|
camera_view = F.normalize(ats - eyes, dim=1)
|
|
camera_right = F.normalize(torch.cross(camera_view, ups, dim=1), dim=1)
|
|
camera_up = F.normalize(torch.cross(camera_right, camera_view, dim=1), dim=1)
|
|
|
|
# rotation matrix from opencv conventions
|
|
T = torch.zeros((batch_size, 4, 4))
|
|
if mode == "opengl":
|
|
T[:, :3, :] = torch.stack([camera_right, camera_up, -camera_view, eyes], dim=2)
|
|
elif mode == "opencv":
|
|
T[:, :3, :] = torch.stack([camera_right, -camera_up, camera_view, eyes], dim=2)
|
|
else:
|
|
raise ValueError(f"Unknown mode: {mode}")
|
|
T[:, 3, 3] = 1.0
|
|
return T.float()
|
|
|
|
|
|
def sample_sphere_points(N, radius, device="cuda"):
|
|
latitude = (torch.rand(size=(N, 1), device=device) - 0.5) * torch.pi
|
|
longitude = (torch.rand(size=(N, 1), device=device) - 0.5) * torch.pi * 2
|
|
x = torch.cos(latitude) * torch.cos(longitude)
|
|
y = torch.sin(latitude) * torch.cos(longitude)
|
|
z = torch.sin(longitude)
|
|
pc = torch.cat([x, y, z], dim=1) * radius
|
|
return pc
|
|
|
|
|
|
def sample_sphere_poses(N, origin, radius, device="cuda"):
|
|
eyes = sample_sphere_points(N, radius, device)
|
|
if not isinstance(origin, torch.Tensor):
|
|
origin = torch.tensor(origin).float().to(device)
|
|
ats = origin[None, :].repeat((N, 1))
|
|
poses_gl = lookat_to_cam_pose(eyes, ats, device=device, mode="opengl")
|
|
poses_cv = lookat_to_cam_pose(eyes, ats, device=device, mode="opencv")
|
|
return poses_gl, poses_cv
|
|
|
|
|
|
def compute_origin_and_radius(trimesh_scene):
|
|
low, high = trimesh_scene.bounds
|
|
center = high + low / 2
|
|
low = low - center
|
|
high = high - center
|
|
radius = max(np.sqrt((high**2).sum()), np.sqrt((low**2).sum()))
|
|
return center, radius
|
|
|
|
|
|
def render_batch(trimesh_mesh, camera_poses, fov, image_size):
|
|
camera_poses = camera_poses.detach().cpu().numpy()
|
|
mesh = pyrender.Mesh.from_trimesh(trimesh_mesh)
|
|
scene = pyrender.Scene()
|
|
scene.add(mesh)
|
|
camera = pyrender.PerspectiveCamera(yfov=fov, aspectRatio=1.0)
|
|
camera = pyrender.Node(camera=camera, matrix=np.eye(4))
|
|
scene.add_node(camera)
|
|
|
|
light = pyrender.SpotLight(
|
|
color=np.ones(3),
|
|
intensity=1.0,
|
|
innerConeAngle=np.pi / 16.0,
|
|
outerConeAngle=np.pi / 2.0,
|
|
)
|
|
light = pyrender.Node(light=light, matrix=np.eye(4))
|
|
scene.add_node(light)
|
|
r = pyrender.OffscreenRenderer(image_size, image_size)
|
|
|
|
colors = []
|
|
depths = []
|
|
for camera_pose in camera_poses:
|
|
scene.set_pose(camera, camera_pose)
|
|
scene.set_pose(light, camera_pose)
|
|
color, depth = r.render(scene)
|
|
colors.append(color)
|
|
depths.append(depth)
|
|
|
|
return np.asarray(colors), np.asarray(depths)
|
|
|
|
|
|
"""
|
|
MeshDataset takes a path to a mesh as input and uses PyRender to render images of the mesh
|
|
from a sphere centered around the scene.
|
|
"""
|
|
|
|
|
|
class MeshDataset(Dataset):
|
|
def __init__(
|
|
self,
|
|
mesh_file: str = None,
|
|
n_frames: int = 10,
|
|
image_size: float = 256,
|
|
save_data_dir: Optional[str] = None,
|
|
trimesh_mesh: Optional[trimesh.Trimesh] = None,
|
|
fov_deg: int = 60,
|
|
# visible_point: Optional[List[float]] = None,
|
|
) -> None:
|
|
super().__init__()
|
|
self.mesh_file = mesh_file
|
|
self.n_frames = n_frames
|
|
if trimesh_mesh is None:
|
|
self.trimesh_mesh = trimesh.load(self.mesh_file)
|
|
else:
|
|
self.trimesh_mesh = trimesh_mesh
|
|
self.image_size = image_size
|
|
|
|
origin, radius = compute_origin_and_radius(self.trimesh_mesh)
|
|
self.fov_deg = fov_deg
|
|
sphere_radius = radius * 2.0
|
|
self.camera_poses_gl, self.camera_poses_cv = sample_sphere_poses(
|
|
n_frames, origin, sphere_radius, "cuda"
|
|
)
|
|
self.colors, self.depths = render_batch(
|
|
self.trimesh_mesh,
|
|
self.camera_poses_gl,
|
|
fov=math.radians(self.fov_deg),
|
|
image_size=self.image_size,
|
|
)
|
|
self.intrinsics = fov_and_size_to_intrinsics(
|
|
self.fov_deg, (self.image_size, self.image_size), device="cuda"
|
|
)
|
|
|
|
if save_data_dir is not None:
|
|
self.save_as_sun3d_dataset(save_data_dir)
|
|
# sys.exit(0)
|
|
|
|
def save_as_sun3d_dataset(self, output_dir):
|
|
import imageio
|
|
from transforms3d.quaternions import quat2mat
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
K = self.intrinsics.detach().cpu().numpy().tolist()
|
|
intrinsics_text = f"""{K[0][0]} {K[0][1]} {K[0][2]}
|
|
{K[1][0]} {K[1][1]} {K[1][2]}
|
|
{K[2][0]} {K[2][1]} {K[2][2]}"""
|
|
with open(f"{output_dir}/camera-intrinsics.txt", "w") as fp:
|
|
fp.write(intrinsics_text)
|
|
|
|
seqdir = f"{output_dir}/seq-01"
|
|
os.makedirs(seqdir, exist_ok=True)
|
|
|
|
for i in range(len(self)):
|
|
data = self[i]
|
|
rgb = data["rgba"][:3, :, :].detach().cpu().permute(1, 2, 0).numpy()
|
|
depth = data["depth"]
|
|
depth = (depth * 1000).detach().cpu().numpy().astype(np.uint16)
|
|
nvblox_pose = data["pose"]
|
|
|
|
eigen_quat = [0.707106769, 0.707106769, 0, 0]
|
|
sun3d_to_nvblox_T = torch.eye(4)
|
|
sun3d_to_nvblox_T[:3, :3] = torch.tensor(quat2mat(eigen_quat))
|
|
|
|
sun3d_pose = torch.linalg.inv(sun3d_to_nvblox_T) @ nvblox_pose
|
|
P = sun3d_pose.detach().cpu().numpy().tolist()
|
|
|
|
pose_text = f"""{P[0][0]} {P[0][1]} {P[0][2]} {P[0][3]}
|
|
{P[1][0]} {P[1][1]} {P[1][2]} {P[1][3]}
|
|
{P[2][0]} {P[2][1]} {P[2][2]} {P[2][3]}
|
|
{P[3][0]} {P[3][1]} {P[3][2]} {P[3][3]}"""
|
|
|
|
framename = f"frame-{str(i).zfill(6)}"
|
|
imageio.imwrite(f"{seqdir}/{framename}.color.png", rgb)
|
|
imageio.imwrite(f"{seqdir}/{framename}.depth.png", depth)
|
|
with open(f"{seqdir}/{framename}.pose.txt", "w") as fp:
|
|
fp.write(pose_text)
|
|
|
|
def __len__(self):
|
|
return self.n_frames
|
|
|
|
def __getitem__(self, index):
|
|
rgb_np = self.colors[index]
|
|
depth_np = self.depths[index]
|
|
a_np = (depth_np > 0).astype(np.uint8) * 255
|
|
rgba_np = np.concatenate([rgb_np, a_np[:, :, None]], axis=2)
|
|
pose = self.camera_poses_cv[index]
|
|
intrinsics = self.intrinsics
|
|
|
|
depth_np = depth_np.astype(np.float32)
|
|
rgba = torch.from_numpy(rgba_np).permute((2, 0, 1))
|
|
depth = torch.from_numpy(depth_np).float()
|
|
|
|
return {"rgba": rgba, "depth": depth, "pose": pose, "intrinsics": intrinsics}
|