Source code for airobot.sensor.camera.rgbdcam

import numpy as np

from airobot.sensor.camera.camera import Camera
from airobot.utils.common import to_rot_mat


[docs]class RGBDCamera(Camera):
    """
    A RGBD camera.

    Args:
        cfgs (YACS CfgNode): configurations for the camera.

    Attributes:
        cfgs (YACS CfgNode): configurations for the end effector.
        img_height (int): height of the image.
        img_width (int): width of the image.
        cam_ext_mat (np.ndarray): extrinsic matrix (shape: :math:`[4, 4]`)
            for the camera (source frame: base frame.
            target frame: camera frame).
        cam_int_mat (np.ndarray): intrinsic matrix (shape: :math:`[3, 3]`)
            for the camera.
        cam_int_mat_inv (np.ndarray): inverse of the intrinsic matrix.
        depth_scale (float): ratio of the depth image value
            to true depth value.
        depth_min (float): minimum depth value considered in 3D reconstruction.
        depth_max (float): maximum depth value considered in 3D reconstruction.
    """

    def __init__(self, cfgs):
        super(RGBDCamera, self).__init__(cfgs=cfgs)
        self.img_height = None
        self.img_width = None
        self.cam_ext_mat = None
        self.cam_int_mat = None
        self.cam_int_mat_inv = None
        self.depth_scale = None
        self.depth_min = None
        self.depth_max = None

    def _init_pers_mat(self):
        """
        Initialize related matrices for projecting
        pixels to points in camera frame.
        """
        self.cam_int_mat_inv = np.linalg.inv(self.cam_int_mat)

        img_pixs = np.mgrid[0: self.img_height,
                            0: self.img_width].reshape(2, -1)
        img_pixs[[0, 1], :] = img_pixs[[1, 0], :]
        self._uv_one = np.concatenate((img_pixs,
                                       np.ones((1, img_pixs.shape[1]))))
        self._uv_one_in_cam = np.dot(self.cam_int_mat_inv, self._uv_one)

[docs]    def get_cam_ext(self):
        """
        Return the camera's extrinsic matrix.

        Returns:
            np.ndarray: extrinsic matrix (shape: :math:`[4, 4]`)
            for the camera (source frame: base frame.
            target frame: camera frame).
        """
        return self.cam_ext_mat

[docs]    def get_cam_int(self):
        """
        Return the camera's intrinsic matrix.

        Returns:
            np.ndarray: intrinsic matrix (shape: :math:`[3, 3]`)
            for the camera.
        """
        return self.cam_int_mat

[docs]    def set_cam_ext(self, pos=None, ori=None, cam_ext=None):
        """
        Set the camera extrinsic matrix.

        Args:
            pos (np.ndarray): position of the camera (shape: :math:`[3,]`).
            ori (np.ndarray): orientation.
                It can be rotation matrix (shape: :math:`[3, 3]`)
                quaternion ([x, y, z, w], shape: :math:`[4]`), or
                euler angles ([roll, pitch, yaw], shape: :math:`[3]`).
            cam_ext (np.ndarray): extrinsic matrix (shape: :math:`[4, 4]`).
                If this is provided, pos and ori will be ignored.
        """
        if cam_ext is not None:
            self.cam_ext_mat = cam_ext
        else:
            if pos is None or ori is None:
                raise ValueError('If cam_ext is not provided, '
                                 'both pos and ori need '
                                 'to be provided.')
            ori = to_rot_mat(ori)
            cam_mat = np.eye(4)
            cam_mat[:3, :3] = ori
            cam_mat[:3, 3] = pos.flatten()
            self.cam_ext_mat = cam_mat

[docs]    def get_pix_3dpt(self, rs, cs, in_world=True, filter_depth=False,
                     k=1, ktype='median', depth_min=None, depth_max=None,
                     cam_ext_mat=None):
        """
        Calculate the 3D position of pixels in the RGB image.

        Args:
            rs (int or list or np.ndarray): rows of interest.
                It can be a list or 1D numpy array
                which contains the row indices. The default value is None,
                which means all rows.
            cs (int or list or np.ndarray): columns of interest.
                It can be a list or 1D numpy array
                which contains the column indices. The default value is None,
                which means all columns.
            in_world (bool): if True, return the 3D position in
                the world frame,
                Otherwise, return the 3D position in the camera frame.
            filter_depth (bool): if True, only pixels with depth values
                between [depth_min, depth_max]
                will remain.
            k (int): kernel size. A kernel (slicing window) will be used
               to get the neighboring depth values of the pixels specified
               by rs and cs. And depending on the ktype, a corresponding
               method will be applied to use some statistical value
               (such as minimum, maximum, median, mean) of all the depth
               values in the slicing window as a more robust estimate of
               the depth value of the specified pixels.
            ktype (str): what kind of statistical value of all the depth
               values in the sliced kernel
               to use as a proxy of the depth value at specified pixels.
               It can be `median`, `min`, `max`, `mean`.
            depth_min (float): minimum depth value. If None, it will use the
                default minimum depth value defined in the config file.
            depth_max (float): maximum depth value. If None, it will use the
                default maximum depth value defined in the config file.
            cam_ext_mat (np.ndarray): camera extrinsic matrix (shape: :math:`[4,4]`).
                If provided, it will be used to compute the points in the world frame.

        Returns:
            np.ndarray: 3D point coordinates of the pixels in
            camera frame (shape: :math:`[N, 3]`).
        """
        if not isinstance(rs, int) and not isinstance(rs, list) and \
                not isinstance(rs, np.ndarray):
            raise TypeError('rs should be an int, a list or a numpy array')
        if not isinstance(cs, int) and not isinstance(cs, list) and \
                not isinstance(cs, np.ndarray):
            raise TypeError('cs should be an int, a list or a numpy array')
        if isinstance(rs, int):
            rs = [rs]
        if isinstance(cs, int):
            cs = [cs]
        if isinstance(rs, np.ndarray):
            rs = rs.flatten()
        if isinstance(cs, np.ndarray):
            cs = cs.flatten()
        if not (isinstance(k, int) and (k % 2) == 1):
            raise TypeError('k should be a positive odd integer.')
        _, depth_im = self.get_images(get_rgb=False, get_depth=True)
        if k == 1:
            depth_im = depth_im[rs, cs]
        else:
            depth_im_list = []
            if ktype == 'min':
                ktype_func = np.min
            elif ktype == 'max':
                ktype_func = np.max
            elif ktype == 'median':
                ktype_func = np.median
            elif ktype == 'mean':
                ktype_func = np.mean
            else:
                raise TypeError('Unsupported ktype:[%s]' % ktype)
            for r, c in zip(rs, cs):
                s = k // 2
                rmin = max(0, r - s)
                rmax = min(self.img_height, r + s + 1)
                cmin = max(0, c - s)
                cmax = min(self.img_width, c + s + 1)
                depth_im_list.append(ktype_func(depth_im[rmin:rmax,
                                                cmin:cmax]))
            depth_im = np.array(depth_im_list)

        depth = depth_im.reshape(-1) * self.depth_scale
        img_pixs = np.stack((rs, cs)).reshape(2, -1)
        img_pixs[[0, 1], :] = img_pixs[[1, 0], :]
        depth_min = depth_min if depth_min else self.depth_min
        depth_max = depth_max if depth_max else self.depth_max
        if filter_depth:
            valid = depth > depth_min
            valid = np.logical_and(valid,
                                   depth < depth_max)
            depth = depth[:, valid]
            img_pixs = img_pixs[:, valid]
        uv_one = np.concatenate((img_pixs,
                                 np.ones((1, img_pixs.shape[1]))))
        uv_one_in_cam = np.dot(self.cam_int_mat_inv, uv_one)
        pts_in_cam = np.multiply(uv_one_in_cam, depth)
        if in_world:
            if self.cam_ext_mat is None and cam_ext_mat is None:
                raise ValueError('Please call set_cam_ext() first to set up'
                                 ' the camera extrinsic matrix')
            cam_ext_mat = self.cam_ext_mat if cam_ext_mat is None else cam_ext_mat
            pts_in_cam = np.concatenate((pts_in_cam,
                                         np.ones((1, pts_in_cam.shape[1]))),
                                        axis=0)
            pts_in_world = np.dot(cam_ext_mat, pts_in_cam)
            pts_in_world = pts_in_world[:3, :].T
            return pts_in_world
        else:
            return pts_in_cam.T

[docs]    def get_pcd(self, in_world=True, filter_depth=True,
                depth_min=None, depth_max=None, cam_ext_mat=None,
                rgb_image=None, depth_image=None):
        """
        Get the point cloud from the entire depth image
        in the camera frame or in the world frame.

        Args:
            in_world (bool): return point cloud in the world frame, otherwise,
                return point cloud in the camera frame.
            filter_depth (bool): only return the point cloud with depth values
                lying in [depth_min, depth_max].
            depth_min (float): minimum depth value. If None, it will use the
                default minimum depth value defined in the config file.
            depth_max (float): maximum depth value. If None, it will use the
                default maximum depth value defined in the config file.
            cam_ext_mat (np.ndarray): camera extrinsic matrix (shape: :math:`[4,4]`).
                If provided, it will be used to compute the points in the world frame.
            rgb_image (np.ndarray): externally captured RGB image, if we want to
                convert a depth image captured outside this function to a point cloud.
                (shape :math:`[H, W, 3]`)
            depth_image (np.ndarray): externally captured depth image, if we want to
                convert a depth image captured outside this function to a point cloud.
                (shape :math:`[H, W]`)

        Returns:
            2-element tuple containing

            - np.ndarray: point coordinates (shape: :math:`[N, 3]`).
            - np.ndarray: rgb values (shape: :math:`[N, 3]`).
        """
        if depth_image is None or rgb_image is None:
            rgb_im, depth_im = self.get_images(get_rgb=True, get_depth=True)
        else:
            rgb_im = rgb_image
            depth_im = depth_image
        # pcd in camera from depth
        depth = depth_im.reshape(-1) * self.depth_scale
        rgb = None
        if rgb_im is not None:
            rgb = rgb_im.reshape(-1, 3)
        depth_min = depth_min if depth_min else self.depth_min
        depth_max = depth_max if depth_max else self.depth_max
        if filter_depth:
            valid = depth > depth_min
            valid = np.logical_and(valid,
                                   depth < depth_max)
            depth = depth[valid]
            if rgb is not None:
                rgb = rgb[valid]
            uv_one_in_cam = self._uv_one_in_cam[:, valid]
        else:
            uv_one_in_cam = self._uv_one_in_cam
        pts_in_cam = np.multiply(uv_one_in_cam, depth)
        if not in_world:
            pcd_pts = pts_in_cam.T
            pcd_rgb = rgb
            return pcd_pts, pcd_rgb
        else:
            if self.cam_ext_mat is None and cam_ext_mat is None:
                raise ValueError('Please call set_cam_ext() first to set up'
                                 ' the camera extrinsic matrix')
            cam_ext_mat = self.cam_ext_mat if cam_ext_mat is None else cam_ext_mat
            pts_in_cam = np.concatenate((pts_in_cam,
                                         np.ones((1, pts_in_cam.shape[1]))),
                                        axis=0)
            pts_in_world = np.dot(cam_ext_mat, pts_in_cam)
            pcd_pts = pts_in_world[:3, :].T
            pcd_rgb = rgb
            return pcd_pts, pcd_rgb