Source code for xiuminglib.camera

# pylint: disable=too-many-public-methods

import json
import numpy as np
from scipy.spatial.transform import Rotation

from .geometry.proj import to_homo, from_homo
from .geometry.rot import is_rot_mat, rad2deg
from .linalg import normalize


GLCAM_TO_CVCAM = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
CVCAM_TO_GLCAM = np.linalg.inv(GLCAM_TO_CVCAM)


[docs]class PerspCam: r"""Perspective camera in 35mm format. This is not an OpenGL/Blender camera (where :math:`+x` points right, :math:`+y` up, and :math:`-z` into the viewing direction), but rather a "CV camera" (where :math:`+x` points right, :math:`+y` down, and :math:`+z` into the viewing direction). See more in :attr:`~ext_mat`. Because we mostly consider just the camera and the object, we assume the object coordinate system (the "local system" in Blender) aligns with (and hence, is the same as) the world coordinate system (the "global system" in Blender). Note: - Sensor width of the 35mm format is actually 36mm. - This class assumes unit pixel aspect ratio (i.e., :math:`f_x = f_y`) and no skewing between the sensor plane and optical axis. - The active sensor size may be smaller than ``sensor_w`` and ``sensor_h``, depending on ``im_res``. See :attr:`~sensor_w_active` and :attr:`~sensor_h_active`. - ``aov``, ``sensor_h``, and ``sensor_w`` are hardware properties, having nothing to do with ``im_res``. """
[docs] def __init__( self, name='cam', f_pix=533.33, im_res=(256, 256), loc=(1, 1, 1), lookat=(0, 0, 0), up=(0, 1, 0)): """ Args: name (str, optional): Camera name. f_pix (float, optional): Focal length in pixel. im_res (array_like, optional): Image height and width in pixels. loc (array_like, optional): Camera location in object space. lookat (array_like, optional): Where the camera points to in object space, so default :math:`(0, 0, 0)` is the object center. up (array_like, optional): Vector in object space that, when projected, points upward in image. """ self._name = str(name) self._f_pix = float(f_pix) self._im_h = int(im_res[0]) self._im_w = int(im_res[1]) self._loc = np.array(loc) self._lookat = np.array(lookat) self._up = np.array(up)
@property def name(self): """str: Camera name.""" return self._name @name.setter def name(self, value): self._name = str(value) @property def f_pix(self): """float: Focal length in pixels.""" return self._f_pix @f_pix.setter def f_pix(self, value): self._f_pix = float(value) @property def im_h(self): """int: Image height. """ return self._im_h @im_h.setter def im_h(self, value): self._im_h = safe_cast_to_int(value) @property def im_w(self): """int: Image width. """ return self._im_w @im_w.setter def im_w(self, value): self._im_w = safe_cast_to_int(value) @property def loc(self): """numpy.ndarray: Camera location in object space.""" return self._loc @loc.setter def loc(self, value): self._loc = np.array(value) @property def lookat(self): """numpy.ndarray: Where in object space the camera points to.""" return self._lookat @lookat.setter def lookat(self, value): self._lookat = np.array(value) @property def up(self): """numpy.ndarray: Up vector, the vector in object space that, when projected, points upward on image plane. """ return self._up @up.setter def up(self, value): self._up = np.array(value) @property def aov(self): """numpy.ndarray: Vertical and horizontal angles of view in degrees.""" alpha_v = 2 * np.arctan(self.sensor_h / (2 * self.f_mm)) alpha_h = 2 * np.arctan(self.sensor_w / (2 * self.f_mm)) alpha = np.array([alpha_v, alpha_h]) return rad2deg(alpha) @property def sensor_w(self): """float: Sensor's physical width (fixed at 36mm).""" return 36 # mm @property def sensor_h(self): """float: Sensor's physical height (fixed at 24mm).""" return 24 # mm @property def sensor_fit_horizontal(self): """bool: Whether field of view angle fits along the horizontal or vertical direction. """ if self.sensor_h / self.im_h < self.sensor_w / self.im_w: return False return True @property def mm_per_pix(self): """float: Millimeter per pixel.""" if self.sensor_fit_horizontal: return self.sensor_w / self.im_w return self.sensor_h / self.im_h @property def sensor_w_active(self): """float: Actual sensor width (mm) in use (resolution-dependent).""" return self.im_w * self.mm_per_pix @property def sensor_h_active(self): """float: Actual sensor height (mm) in use (resolution-dependent).""" return self.im_h * self.mm_per_pix @property def f_mm(self): """float: 35mm format-equivalent focal length in mm.""" return self.mm_per_pix * self.f_pix @f_mm.setter def f_mm(self, value): self._f_pix = float(value) / self.mm_per_pix @property def int_mat(self): r"""numpy.ndarray: :math:`3\times 3` intrinsics matrix.""" return np.array([ [self.f_pix, 0, self.im_w / 2], [0, self.f_pix, self.im_h / 2], [0, 0, 1]]) @int_mat.setter def int_mat(self, mat): mat = np.array(mat) # Assert matrix structure assert mat.shape == (3, 3), "Intrinsics matrix is not 3x3" assert mat[1, 0] == 0, "`intrinsics[1, 0]` is not 0" skew = mat[0, 1] assert skew == 0, f"Skew ({skew}) is not 0" assert all(mat[2, :] == [0, 0, 1]), "Last row is not [0, 0, 1]" f_pix = mat[0, 0] assert f_pix == mat[1, 1], "X and Y focal lengths are different" # Set relevant properties self.f_pix = f_pix self.im_w = mat[0, 2] * 2 self.im_h = mat[1, 2] * 2 @property def ext_mat(self): r"""numpy.ndarray: :math:`3\times 4` object-to-camera extrinsics matrix, i.e., rotation and translation that transform a point from object space to camera space. Two coordinate systems involved: object space "obj" and camera space following the computer vision convention "cv", where :math:`+x` horizontally points right (to align with pixel coordinates), :math:`+y` vertically points down, and :math:`+z` is the look-at direction (because right-handed). """ # cv axes expressed in obj space cvz_obj = self.lookat - self.loc assert np.linalg.norm(cvz_obj) > 0, \ "Camera location and look-at coincide" cvx_obj = np.cross(cvz_obj, self.up) cvy_obj = np.cross(cvz_obj, cvx_obj) # Normalize cvz_obj = normalize(cvz_obj) cvx_obj = normalize(cvx_obj) cvy_obj = normalize(cvy_obj) # Compute rotation from obj to cv: R rot_cv2obj = np.vstack((cvx_obj, cvy_obj, cvz_obj)).T # such that # rot_cv2obj transforms (1, 0, 0), i.e., cvx, into cvx_obj rot_obj2cv = np.linalg.inv(rot_cv2obj) # Extrinsics obj2cv = rot_obj2cv.dot( # translate first and then rotate np.array([ [1, 0, 0, -self.loc[0]], [0, 1, 0, -self.loc[1]], [0, 0, 1, -self.loc[2]]])) return obj2cv @ext_mat.setter def ext_mat(self, o2c): o2c = np.array(o2c) # Assert matrix structure assert o2c.shape == (3, 4), \ "This setter accepts only 3x4 extrinsics. Set ext_mat_4x4 instead?" # Call the 4x4 setter o2c_4x4 = np.vstack((o2c, [0, 0, 0, 1])) self.ext_mat_4x4 = o2c_4x4 @property def ext_mat_4x4(self): r"""numpy.ndarray: Padding :math:`[0, 0, 0, 1]` to bottom of the :math:`3\times 4` extrinsics matrix to make it invertible. """ return np.vstack((self.ext_mat, [0, 0, 0, 1])) @ext_mat_4x4.setter def ext_mat_4x4(self, o2c): o2c = np.array(o2c) # Assert matrix structure assert o2c.shape == (4, 4), \ "This setter accepts only 4x4 extrinsics. Set ext_mat instead?" assert all(o2c[3, :] == [0, 0, 0, 1]), \ "Last row of 4x4 extrinsics must be [0, 0, 0, 1]" assert is_rot_mat(o2c[:3, :3]), \ "The R part of object-to-camera is not a valid rotation matrix" # Camera to object space c2o = np.linalg.inv(o2c) # Camera location in object space is the origin of camera space loc = c2o.dot(to_homo([0, 0, 0])) self.loc = from_homo(loc) # Look-at in object space is any point on +z in camera space lookat = c2o.dot(to_homo([0, 0, 1])) self.lookat = from_homo(lookat) # Up vector in object space is -y in camera space up_end = c2o.dot(to_homo([0, -1, 0])) self.up = from_homo(up_end) - self.loc @property def proj_mat(self): r"""numpy.ndarray: :math:`3\times 4` projection matrix, derived from intrinsics and extrinsics. """ return self.int_mat.dot(self.ext_mat) @property def blender_rot_euler(self): """numpy.ndarray: Euler rotations in degrees.""" c2o = self.get_cam2obj(cam_type='blender') rot_mat = c2o[:3, :3] rot = Rotation.from_matrix(rot_mat) euler_angles = rot.as_euler('xyz', degrees=True) return euler_angles
[docs] def to_dict(self, app=None): """Converts this camera to a dictionary of its properties. Args: app (str, optional): For what application are we converting? Accepted are ``None`` and ``'blender'``. Returns: dict: This camera as a dictionary. """ if isinstance(app, str): app = app.lower() if app is None: prop_dict = { 'name': self.name, 'f_mm': self.f_mm, 'f_pix': self.f_pix, 'sensor_fit_horizontal': self.sensor_fit_horizontal, 'sensor_w': self.sensor_w, 'sensor_w_active': self.sensor_w_active, 'sensor_h': self.sensor_h, 'sensor_h_active': self.sensor_h_active, 'mm_per_pix': self.mm_per_pix, 'im_h': self.im_h, 'im_w': self.im_w, 'loc': self.loc, 'lookat': self.lookat, 'up': self.up, 'aov': self.aov, 'int_mat': self.int_mat, 'ext_mat': self.ext_mat, 'proj_mat': self.proj_mat} elif app == 'blender': prop_dict = { 'name': self.name, 'f_mm': self.f_mm, 'im_h': self.im_h, 'im_w': self.im_w, 'sensor_fit_horizontal': self.sensor_fit_horizontal, 'sensor_h': self.sensor_h, 'sensor_w': self.sensor_w, 'loc': self.loc, 'rot_euler_deg': self.blender_rot_euler} else: raise NotImplementedError(app) return prop_dict
def __str__(self): prop_dict = self.to_dict() prop_dict_serializable = {} for k, v in prop_dict.items(): if isinstance(v, np.ndarray): prop_dict_serializable[k] = v.tolist() else: prop_dict_serializable[k] = v prop_str = json.dumps(prop_dict_serializable, indent=4) return prop_str
[docs] def get_obj2cam(self, cam_type='cv', square=False): r"""Gets the object-to-camera transformation matrix. Args: cam_type (str, optional): Accepted are ``'cv'``/``'opencv'`` and ``'opengl'``/``'blender'``. square (bool, optional): If true, the last row of :math:`[0, 0, 0, 1]` is kept, which makes the matrix invertible. Returns: numpy.ndarray: :math:`3\times 4` or :math:`4\times 4` object-to-camera transformation matrix. """ cam_type = cam_type.lower() if cam_type in ('cv', 'opencv'): obj2cam = self.ext_mat elif cam_type in ('gl', 'opengl', 'blender'): # Additional 180-degree rotation around x-axis rot = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]) obj2cam = rot.dot(self.ext_mat) else: raise NotImplementedError(f"Camera type: {cam_type}") if square: obj2cam_4x4 = np.vstack((obj2cam, [0, 0, 0, 1])) return obj2cam_4x4 return obj2cam
[docs] def get_cam2obj(self, cam_type='cv', square=False): """Inverse of :func:`get_obj2cam`. One example use: calling this with ``cam_type='blender'`` gives Blender's ``cam.matrix_world``. """ obj2cam_4x4 = self.get_obj2cam(cam_type=cam_type, square=True) cam2obj_4x4 = np.linalg.inv(obj2cam_4x4) if square: return cam2obj_4x4 return cam2obj_4x4[:3, :]
[docs] def set_from_mitsuba(self, xml_path): """Sets camera according to a Mitsuba XML file. Args: xml_path (str): Path to the XML file. """ from xml.etree.ElementTree import parse tree = parse(xml_path) # Focal length f_tag = tree.find('./sensor/string[@name="focalLength"]') if f_tag is None: self.f_mm = 50. # Mitsuba default else: f_str = f_tag.attrib['value'] if f_str[-2:] == 'mm': self.f_mm = float(f_str[:-2]) else: raise NotImplementedError(f_str) # Extrinsics cam_transform = tree.find('./sensor/transform/lookAt').attrib self.loc = np.fromstring(cam_transform['origin'], sep=',') self.lookat = np.fromstring(cam_transform['target'], sep=',') self.up = np.fromstring(cam_transform['up'], sep=',') # Resolution self.im_h = int( tree.find('./sensor/film/integer[@name="height"]').attrib['value']) self.im_w = int( tree.find('./sensor/film/integer[@name="width"]').attrib['value'])
[docs] def proj(self, pts, space='object'): r"""Projects 3D points to 2D. Args: pts (array_like): 3D point(s) of shape :math:`N\times 3` or :math:`3\times N`, or of length 3. space (str, optional): In which space these points are specified: ``'object'`` or ``'camera'``. Returns: array_like: Vertical and horizontal coordinates of the projections, following: .. code-block:: none +-----------> dim1 | | | v dim0 """ pts = np.array(pts) if pts.shape == (3,): pts = pts.reshape((3, 1)) elif pts.shape[1] == 3: pts = pts.T assert space in ('object', 'camera'), "Unrecognized space" pts_homo = to_homo(pts) # 3xN to 4xN if space == 'object': proj_mat = self.proj_mat else: ext_mat = np.hstack((np.eye(3), np.zeros((3, 1)))) proj_mat = self.int_mat.dot(ext_mat) # Project hvs_homo = proj_mat.dot(pts_homo) # 3xN: dim0 is horizontal, and dim1 is vertical hvs = from_homo(hvs_homo) # 3xN to 2xN vhs = np.vstack((hvs[1, :], hvs[0, :])).T if vhs.shape[0] == 1: # Single point vhs = vhs[0, :] return vhs
[docs] def backproj( self, depth, fg_mask=None, bg_fill=0., depth_type='plane', space='object'): """Backprojects a depth map to 3D points. Resolution of the depth map may be different from :attr:`im_h` and :attr:`im_w`: :attr:`im_h` and :attr:`im_w` decide the image coordinate bounds, and the depth resolution decides number of steps. Args: depth (numpy.ndarray): Depth map. fg_mask (numpy.ndarray, optional): Backproject only pixels falling inside this foreground mask. Its values should be logical. bg_fill (flaot, optional): Filler value for background region. depth_type (str, optional): Plane or ray depth. space (str, optional): In which space the backprojected points are specified: ``'object'`` or ``'camera'``. Returns: numpy.ndarray: :math:`xyz` map. """ if fg_mask is None: fg_mask = np.ones(depth.shape, dtype=bool) assert depth_type in ('ray', 'plane'), "Unrecognized depth type" assert space in ('object', 'camera'), "Unrecognized space" # Generate 2D coordinates v_is, h_is = np.where(fg_mask) hs = (h_is + 0.5) / fg_mask.shape[1] * self.im_w vs = (v_is + 0.5) / fg_mask.shape[0] * self.im_h h_c, v_c = self.im_w / 2, self.im_h / 2 zs = depth[fg_mask] if depth_type == 'ray': d2 = np.power(vs - v_c, 2) + np.power(hs - h_c, 2) # Similar triangles zs_plane = np.multiply( zs, self.f_pix / np.sqrt(self.f_pix ** 2 + d2)) zs = zs_plane # Backproject to camera space xs = np.multiply(zs, hs - h_c) / self.f_pix ys = np.multiply(zs, vs - v_c) / self.f_pix pts = np.vstack((xs, ys, zs)) if space == 'object': # Need to further transform to object space o2c = self.ext_mat_4x4 c2o = np.linalg.inv(o2c) pts_o = c2o.dot(to_homo(pts)) pts = from_homo(pts_o, axis=0) pts = pts.T # (n_fg_pts, 3) # Put them back into a buffer xyz = bg_fill * np.ones(depth.shape + (3,), dtype=float) xyz[np.dstack([fg_mask] * 3)] = pts.ravel() return xyz
[docs] def gen_rays(self, spp=1): r"""Generates ray directions in object space, with the ray origin being the camera location. Args: spp (int, optional): Samples (or number of rays) per pixel. Must be a perfect square :math:`S^2` due to uniform, deterministic supersampling. Returns: numpy.ndarray: An :math:`H\times W\times S^2\times 3` array of ray directions. """ sps = np.sqrt(spp) if sps.is_integer(): sps = int(sps) else: raise ValueError( f"Samples per pixel ({spp}) is not a perfect square") # Supersample according to samples per side h, w = self.im_h * sps, self.im_w * sps depth = np.ones((h, w), dtype=float) # Backproject a uniform plane depth map to a wall in 3D xyzs = self.backproj(depth) # (HS)x(WS)x3 # Compute ray directions ray_dirs_ss = normalize(xyzs - self.loc, axis=2) # Put samples in each pixel bucket ray_dirs = [] for i in range(sps): for j in range(sps): ray_dirs.append(ray_dirs_ss[i::sps, j::sps, :]) ray_dirs = np.stack(ray_dirs, axis=2) return ray_dirs # HxWx(S^2)x3
[docs] def resize(self, new_h=None, new_w=None): """Updates the camera intrinsics according to the new size. Args: new_h (int, optional): Target height. If ``None``, will be calculated according to the target width, assuming the same aspect ratio. new_w (int, optional): Target width. If ``None``, will be calculated according to the target height, assuming the same aspect ratio. """ if new_h is not None and new_w is not None: assert int(self.im_h / self.im_w * new_w) == new_h, \ "Aspect ratio change violates the `f_x == f_y` assumption" elif new_h is None and new_w is not None: new_h = int(self.im_h / self.im_w * new_w) elif new_h is not None and new_w is None: new_w = int(self.im_w / self.im_h * new_h) else: raise ValueError( "At least one of new height or width must be given") # Update relevant properties self.f_pix = new_h / float(self.im_h) * self.f_pix self.im_h = new_h self.im_w = new_w
[docs]def safe_cast_to_int(x): """Casts a string or float to integer only when safe. Args: x (str or float): Input to be cast to integer. Returns: int: Integer version of the input. """ int_x = int(x) if np.issubdtype(type(x), np.floating): assert int_x == x, \ f"Can't safely cast a non-integer value ({x}) to integer" return int_x