# pylint: disable=too-many-public-methods
import json
import numpy as np
from scipy.spatial.transform import Rotation
from .geometry.proj import to_homo, from_homo
from .geometry.rot import is_rot_mat, rad2deg
from .linalg import normalize
GLCAM_TO_CVCAM = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
CVCAM_TO_GLCAM = np.linalg.inv(GLCAM_TO_CVCAM)
[docs]class PerspCam:
r"""Perspective camera in 35mm format.
This is not an OpenGL/Blender camera (where :math:`+x` points right,
:math:`+y` up, and :math:`-z` into the viewing direction), but rather a
"CV camera" (where :math:`+x` points right, :math:`+y` down, and :math:`+z`
into the viewing direction). See more in :attr:`~ext_mat`.
Because we mostly consider just the camera and the object, we assume the
object coordinate system (the "local system" in Blender) aligns with (and
hence, is the same as) the world coordinate system (the "global system" in
Blender).
Note:
- Sensor width of the 35mm format is actually 36mm.
- This class assumes unit pixel aspect ratio (i.e., :math:`f_x = f_y`)
and no skewing between the sensor plane and optical axis.
- The active sensor size may be smaller than ``sensor_w`` and
``sensor_h``, depending on ``im_res``. See :attr:`~sensor_w_active`
and :attr:`~sensor_h_active`.
- ``aov``, ``sensor_h``, and ``sensor_w`` are hardware properties,
having nothing to do with ``im_res``.
"""
[docs] def __init__(
self, name='cam', f_pix=533.33, im_res=(256, 256),
loc=(1, 1, 1), lookat=(0, 0, 0), up=(0, 1, 0)):
"""
Args:
name (str, optional): Camera name.
f_pix (float, optional): Focal length in pixel.
im_res (array_like, optional): Image height and width in pixels.
loc (array_like, optional): Camera location in object space.
lookat (array_like, optional): Where the camera points to in
object space, so default :math:`(0, 0, 0)` is the object center.
up (array_like, optional): Vector in object space that, when
projected, points upward in image.
"""
self._name = str(name)
self._f_pix = float(f_pix)
self._im_h = int(im_res[0])
self._im_w = int(im_res[1])
self._loc = np.array(loc)
self._lookat = np.array(lookat)
self._up = np.array(up)
@property
def name(self):
"""str: Camera name."""
return self._name
@name.setter
def name(self, value):
self._name = str(value)
@property
def f_pix(self):
"""float: Focal length in pixels."""
return self._f_pix
@f_pix.setter
def f_pix(self, value):
self._f_pix = float(value)
@property
def im_h(self):
"""int: Image height.
"""
return self._im_h
@im_h.setter
def im_h(self, value):
self._im_h = safe_cast_to_int(value)
@property
def im_w(self):
"""int: Image width.
"""
return self._im_w
@im_w.setter
def im_w(self, value):
self._im_w = safe_cast_to_int(value)
@property
def loc(self):
"""numpy.ndarray: Camera location in object space."""
return self._loc
@loc.setter
def loc(self, value):
self._loc = np.array(value)
@property
def lookat(self):
"""numpy.ndarray: Where in object space the camera points to."""
return self._lookat
@lookat.setter
def lookat(self, value):
self._lookat = np.array(value)
@property
def up(self):
"""numpy.ndarray: Up vector, the vector in object space that, when
projected, points upward on image plane.
"""
return self._up
@up.setter
def up(self, value):
self._up = np.array(value)
@property
def aov(self):
"""numpy.ndarray: Vertical and horizontal angles of view in degrees."""
alpha_v = 2 * np.arctan(self.sensor_h / (2 * self.f_mm))
alpha_h = 2 * np.arctan(self.sensor_w / (2 * self.f_mm))
alpha = np.array([alpha_v, alpha_h])
return rad2deg(alpha)
@property
def sensor_w(self):
"""float: Sensor's physical width (fixed at 36mm)."""
return 36 # mm
@property
def sensor_h(self):
"""float: Sensor's physical height (fixed at 24mm)."""
return 24 # mm
@property
def sensor_fit_horizontal(self):
"""bool: Whether field of view angle fits along the horizontal or
vertical direction.
"""
if self.sensor_h / self.im_h < self.sensor_w / self.im_w:
return False
return True
@property
def mm_per_pix(self):
"""float: Millimeter per pixel."""
if self.sensor_fit_horizontal:
return self.sensor_w / self.im_w
return self.sensor_h / self.im_h
@property
def sensor_w_active(self):
"""float: Actual sensor width (mm) in use (resolution-dependent)."""
return self.im_w * self.mm_per_pix
@property
def sensor_h_active(self):
"""float: Actual sensor height (mm) in use (resolution-dependent)."""
return self.im_h * self.mm_per_pix
@property
def f_mm(self):
"""float: 35mm format-equivalent focal length in mm."""
return self.mm_per_pix * self.f_pix
@f_mm.setter
def f_mm(self, value):
self._f_pix = float(value) / self.mm_per_pix
@property
def int_mat(self):
r"""numpy.ndarray: :math:`3\times 3` intrinsics matrix."""
return np.array([
[self.f_pix, 0, self.im_w / 2],
[0, self.f_pix, self.im_h / 2],
[0, 0, 1]])
@int_mat.setter
def int_mat(self, mat):
mat = np.array(mat)
# Assert matrix structure
assert mat.shape == (3, 3), "Intrinsics matrix is not 3x3"
assert mat[1, 0] == 0, "`intrinsics[1, 0]` is not 0"
skew = mat[0, 1]
assert skew == 0, f"Skew ({skew}) is not 0"
assert all(mat[2, :] == [0, 0, 1]), "Last row is not [0, 0, 1]"
f_pix = mat[0, 0]
assert f_pix == mat[1, 1], "X and Y focal lengths are different"
# Set relevant properties
self.f_pix = f_pix
self.im_w = mat[0, 2] * 2
self.im_h = mat[1, 2] * 2
@property
def ext_mat(self):
r"""numpy.ndarray: :math:`3\times 4` object-to-camera extrinsics matrix,
i.e., rotation and translation that transform a point from object space
to camera space.
Two coordinate systems involved: object space "obj" and camera space
following the computer vision convention "cv", where :math:`+x`
horizontally points right (to align with pixel coordinates), :math:`+y`
vertically points down, and :math:`+z` is the look-at direction
(because right-handed).
"""
# cv axes expressed in obj space
cvz_obj = self.lookat - self.loc
assert np.linalg.norm(cvz_obj) > 0, \
"Camera location and look-at coincide"
cvx_obj = np.cross(cvz_obj, self.up)
cvy_obj = np.cross(cvz_obj, cvx_obj)
# Normalize
cvz_obj = normalize(cvz_obj)
cvx_obj = normalize(cvx_obj)
cvy_obj = normalize(cvy_obj)
# Compute rotation from obj to cv: R
rot_cv2obj = np.vstack((cvx_obj, cvy_obj, cvz_obj)).T # such that
# rot_cv2obj transforms (1, 0, 0), i.e., cvx, into cvx_obj
rot_obj2cv = np.linalg.inv(rot_cv2obj)
# Extrinsics
obj2cv = rot_obj2cv.dot( # translate first and then rotate
np.array([
[1, 0, 0, -self.loc[0]],
[0, 1, 0, -self.loc[1]],
[0, 0, 1, -self.loc[2]]]))
return obj2cv
@ext_mat.setter
def ext_mat(self, o2c):
o2c = np.array(o2c)
# Assert matrix structure
assert o2c.shape == (3, 4), \
"This setter accepts only 3x4 extrinsics. Set ext_mat_4x4 instead?"
# Call the 4x4 setter
o2c_4x4 = np.vstack((o2c, [0, 0, 0, 1]))
self.ext_mat_4x4 = o2c_4x4
@property
def ext_mat_4x4(self):
r"""numpy.ndarray: Padding :math:`[0, 0, 0, 1]` to bottom of the
:math:`3\times 4` extrinsics matrix to make it invertible.
"""
return np.vstack((self.ext_mat, [0, 0, 0, 1]))
@ext_mat_4x4.setter
def ext_mat_4x4(self, o2c):
o2c = np.array(o2c)
# Assert matrix structure
assert o2c.shape == (4, 4), \
"This setter accepts only 4x4 extrinsics. Set ext_mat instead?"
assert all(o2c[3, :] == [0, 0, 0, 1]), \
"Last row of 4x4 extrinsics must be [0, 0, 0, 1]"
assert is_rot_mat(o2c[:3, :3]), \
"The R part of object-to-camera is not a valid rotation matrix"
# Camera to object space
c2o = np.linalg.inv(o2c)
# Camera location in object space is the origin of camera space
loc = c2o.dot(to_homo([0, 0, 0]))
self.loc = from_homo(loc)
# Look-at in object space is any point on +z in camera space
lookat = c2o.dot(to_homo([0, 0, 1]))
self.lookat = from_homo(lookat)
# Up vector in object space is -y in camera space
up_end = c2o.dot(to_homo([0, -1, 0]))
self.up = from_homo(up_end) - self.loc
@property
def proj_mat(self):
r"""numpy.ndarray: :math:`3\times 4` projection matrix, derived from
intrinsics and extrinsics.
"""
return self.int_mat.dot(self.ext_mat)
@property
def blender_rot_euler(self):
"""numpy.ndarray: Euler rotations in degrees."""
c2o = self.get_cam2obj(cam_type='blender')
rot_mat = c2o[:3, :3]
rot = Rotation.from_matrix(rot_mat)
euler_angles = rot.as_euler('xyz', degrees=True)
return euler_angles
[docs] def to_dict(self, app=None):
"""Converts this camera to a dictionary of its properties.
Args:
app (str, optional): For what application are we converting?
Accepted are ``None`` and ``'blender'``.
Returns:
dict: This camera as a dictionary.
"""
if isinstance(app, str):
app = app.lower()
if app is None:
prop_dict = {
'name': self.name, 'f_mm': self.f_mm, 'f_pix': self.f_pix,
'sensor_fit_horizontal': self.sensor_fit_horizontal,
'sensor_w': self.sensor_w,
'sensor_w_active': self.sensor_w_active,
'sensor_h': self.sensor_h,
'sensor_h_active': self.sensor_h_active,
'mm_per_pix': self.mm_per_pix,
'im_h': self.im_h, 'im_w': self.im_w,
'loc': self.loc, 'lookat': self.lookat, 'up': self.up,
'aov': self.aov, 'int_mat': self.int_mat,
'ext_mat': self.ext_mat, 'proj_mat': self.proj_mat}
elif app == 'blender':
prop_dict = {
'name': self.name, 'f_mm': self.f_mm,
'im_h': self.im_h, 'im_w': self.im_w,
'sensor_fit_horizontal': self.sensor_fit_horizontal,
'sensor_h': self.sensor_h, 'sensor_w': self.sensor_w,
'loc': self.loc, 'rot_euler_deg': self.blender_rot_euler}
else:
raise NotImplementedError(app)
return prop_dict
def __str__(self):
prop_dict = self.to_dict()
prop_dict_serializable = {}
for k, v in prop_dict.items():
if isinstance(v, np.ndarray):
prop_dict_serializable[k] = v.tolist()
else:
prop_dict_serializable[k] = v
prop_str = json.dumps(prop_dict_serializable, indent=4)
return prop_str
[docs] def get_obj2cam(self, cam_type='cv', square=False):
r"""Gets the object-to-camera transformation matrix.
Args:
cam_type (str, optional): Accepted are ``'cv'``/``'opencv'`` and
``'opengl'``/``'blender'``.
square (bool, optional): If true, the last row of
:math:`[0, 0, 0, 1]` is kept, which makes the matrix invertible.
Returns:
numpy.ndarray: :math:`3\times 4` or :math:`4\times 4`
object-to-camera transformation matrix.
"""
cam_type = cam_type.lower()
if cam_type in ('cv', 'opencv'):
obj2cam = self.ext_mat
elif cam_type in ('gl', 'opengl', 'blender'):
# Additional 180-degree rotation around x-axis
rot = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
obj2cam = rot.dot(self.ext_mat)
else:
raise NotImplementedError(f"Camera type: {cam_type}")
if square:
obj2cam_4x4 = np.vstack((obj2cam, [0, 0, 0, 1]))
return obj2cam_4x4
return obj2cam
[docs] def get_cam2obj(self, cam_type='cv', square=False):
"""Inverse of :func:`get_obj2cam`.
One example use: calling this with ``cam_type='blender'`` gives
Blender's ``cam.matrix_world``.
"""
obj2cam_4x4 = self.get_obj2cam(cam_type=cam_type, square=True)
cam2obj_4x4 = np.linalg.inv(obj2cam_4x4)
if square:
return cam2obj_4x4
return cam2obj_4x4[:3, :]
[docs] def set_from_mitsuba(self, xml_path):
"""Sets camera according to a Mitsuba XML file.
Args:
xml_path (str): Path to the XML file.
"""
from xml.etree.ElementTree import parse
tree = parse(xml_path)
# Focal length
f_tag = tree.find('./sensor/string[@name="focalLength"]')
if f_tag is None:
self.f_mm = 50. # Mitsuba default
else:
f_str = f_tag.attrib['value']
if f_str[-2:] == 'mm':
self.f_mm = float(f_str[:-2])
else:
raise NotImplementedError(f_str)
# Extrinsics
cam_transform = tree.find('./sensor/transform/lookAt').attrib
self.loc = np.fromstring(cam_transform['origin'], sep=',')
self.lookat = np.fromstring(cam_transform['target'], sep=',')
self.up = np.fromstring(cam_transform['up'], sep=',')
# Resolution
self.im_h = int(
tree.find('./sensor/film/integer[@name="height"]').attrib['value'])
self.im_w = int(
tree.find('./sensor/film/integer[@name="width"]').attrib['value'])
[docs] def proj(self, pts, space='object'):
r"""Projects 3D points to 2D.
Args:
pts (array_like): 3D point(s) of shape :math:`N\times 3` or
:math:`3\times N`, or of length 3.
space (str, optional): In which space these points are specified:
``'object'`` or ``'camera'``.
Returns:
array_like: Vertical and horizontal coordinates of the projections,
following:
.. code-block:: none
+-----------> dim1
|
|
|
v dim0
"""
pts = np.array(pts)
if pts.shape == (3,):
pts = pts.reshape((3, 1))
elif pts.shape[1] == 3:
pts = pts.T
assert space in ('object', 'camera'), "Unrecognized space"
pts_homo = to_homo(pts) # 3xN to 4xN
if space == 'object':
proj_mat = self.proj_mat
else:
ext_mat = np.hstack((np.eye(3), np.zeros((3, 1))))
proj_mat = self.int_mat.dot(ext_mat)
# Project
hvs_homo = proj_mat.dot(pts_homo)
# 3xN: dim0 is horizontal, and dim1 is vertical
hvs = from_homo(hvs_homo) # 3xN to 2xN
vhs = np.vstack((hvs[1, :], hvs[0, :])).T
if vhs.shape[0] == 1:
# Single point
vhs = vhs[0, :]
return vhs
[docs] def backproj(
self, depth, fg_mask=None, bg_fill=0., depth_type='plane',
space='object'):
"""Backprojects a depth map to 3D points.
Resolution of the depth map may be different from :attr:`im_h` and
:attr:`im_w`: :attr:`im_h` and :attr:`im_w` decide the image coordinate
bounds, and the depth resolution decides number of steps.
Args:
depth (numpy.ndarray): Depth map.
fg_mask (numpy.ndarray, optional): Backproject only pixels falling
inside this foreground mask. Its values should be logical.
bg_fill (flaot, optional): Filler value for background region.
depth_type (str, optional): Plane or ray depth.
space (str, optional): In which space the backprojected points are
specified: ``'object'`` or ``'camera'``.
Returns:
numpy.ndarray: :math:`xyz` map.
"""
if fg_mask is None:
fg_mask = np.ones(depth.shape, dtype=bool)
assert depth_type in ('ray', 'plane'), "Unrecognized depth type"
assert space in ('object', 'camera'), "Unrecognized space"
# Generate 2D coordinates
v_is, h_is = np.where(fg_mask)
hs = (h_is + 0.5) / fg_mask.shape[1] * self.im_w
vs = (v_is + 0.5) / fg_mask.shape[0] * self.im_h
h_c, v_c = self.im_w / 2, self.im_h / 2
zs = depth[fg_mask]
if depth_type == 'ray':
d2 = np.power(vs - v_c, 2) + np.power(hs - h_c, 2)
# Similar triangles
zs_plane = np.multiply(
zs, self.f_pix / np.sqrt(self.f_pix ** 2 + d2))
zs = zs_plane
# Backproject to camera space
xs = np.multiply(zs, hs - h_c) / self.f_pix
ys = np.multiply(zs, vs - v_c) / self.f_pix
pts = np.vstack((xs, ys, zs))
if space == 'object':
# Need to further transform to object space
o2c = self.ext_mat_4x4
c2o = np.linalg.inv(o2c)
pts_o = c2o.dot(to_homo(pts))
pts = from_homo(pts_o, axis=0)
pts = pts.T # (n_fg_pts, 3)
# Put them back into a buffer
xyz = bg_fill * np.ones(depth.shape + (3,), dtype=float)
xyz[np.dstack([fg_mask] * 3)] = pts.ravel()
return xyz
[docs] def gen_rays(self, spp=1):
r"""Generates ray directions in object space, with the ray origin being
the camera location.
Args:
spp (int, optional): Samples (or number of rays) per pixel. Must be
a perfect square :math:`S^2` due to uniform, deterministic
supersampling.
Returns:
numpy.ndarray: An :math:`H\times W\times S^2\times 3` array of ray
directions.
"""
sps = np.sqrt(spp)
if sps.is_integer():
sps = int(sps)
else:
raise ValueError(
f"Samples per pixel ({spp}) is not a perfect square")
# Supersample according to samples per side
h, w = self.im_h * sps, self.im_w * sps
depth = np.ones((h, w), dtype=float)
# Backproject a uniform plane depth map to a wall in 3D
xyzs = self.backproj(depth) # (HS)x(WS)x3
# Compute ray directions
ray_dirs_ss = normalize(xyzs - self.loc, axis=2)
# Put samples in each pixel bucket
ray_dirs = []
for i in range(sps):
for j in range(sps):
ray_dirs.append(ray_dirs_ss[i::sps, j::sps, :])
ray_dirs = np.stack(ray_dirs, axis=2)
return ray_dirs # HxWx(S^2)x3
[docs] def resize(self, new_h=None, new_w=None):
"""Updates the camera intrinsics according to the new size.
Args:
new_h (int, optional): Target height. If ``None``, will be
calculated according to the target width, assuming the same
aspect ratio.
new_w (int, optional): Target width. If ``None``, will be calculated
according to the target height, assuming the same aspect ratio.
"""
if new_h is not None and new_w is not None:
assert int(self.im_h / self.im_w * new_w) == new_h, \
"Aspect ratio change violates the `f_x == f_y` assumption"
elif new_h is None and new_w is not None:
new_h = int(self.im_h / self.im_w * new_w)
elif new_h is not None and new_w is None:
new_w = int(self.im_w / self.im_h * new_h)
else:
raise ValueError(
"At least one of new height or width must be given")
# Update relevant properties
self.f_pix = new_h / float(self.im_h) * self.f_pix
self.im_h = new_h
self.im_w = new_w
[docs]def safe_cast_to_int(x):
"""Casts a string or float to integer only when safe.
Args:
x (str or float): Input to be cast to integer.
Returns:
int: Integer version of the input.
"""
int_x = int(x)
if np.issubdtype(type(x), np.floating):
assert int_x == x, \
f"Can't safely cast a non-integer value ({x}) to integer"
return int_x