Source code for airobot.sensor.camera.rgbdcam_pybullet

import numpy as np

from airobot.sensor.camera.rgbdcam import RGBDCamera
from airobot.utils.common import create_se3
from airobot.utils.common import rotvec2rot


[docs]class RGBDCameraPybullet(RGBDCamera): """ RGBD Camera in Pybullet. Args: cfgs (YACS CfgNode): configurations for the camera. Attributes: view_matrix (np.ndarray): view matrix of opengl camera (shape: :math:`[4, 4]`). proj_matrix (np.ndarray): projection matrix of opengl camera (shape: :math:`[4, 4]`). """ def __init__(self, cfgs, pb_client): self._pb = pb_client super(RGBDCameraPybullet, self).__init__(cfgs=cfgs) self.view_matrix = None self.proj_matrix = None self.depth_scale = 1 self.depth_min = self.cfgs.CAM.SIM.ZNEAR self.depth_max = self.cfgs.CAM.SIM.ZFAR self._set_cam_int() def _set_cam_int(self, img_height=None, img_width=None): self.img_height = img_height if img_height else self.cfgs.CAM.SIM.HEIGHT self.img_width = img_width if img_width else self.cfgs.CAM.SIM.WIDTH aspect = self.img_width / float(self.img_height) znear = self.cfgs.CAM.SIM.ZNEAR zfar = self.cfgs.CAM.SIM.ZFAR fov = self.cfgs.CAM.SIM.FOV pm = self._pb.computeProjectionMatrixFOV(fov, aspect, znear, zfar) self.proj_matrix = np.array(pm).reshape(4, 4) vfov = np.deg2rad(fov) tan_half_vfov = np.tan(vfov / 2.0) tan_half_hfov = tan_half_vfov * self.img_width / float(self.img_height) # focal length in pixel space fx = self.img_width / 2.0 / tan_half_hfov fy = self.img_height / 2.0 / tan_half_vfov self.cam_int_mat = np.array([[fx, 0, self.img_width / 2.0], [0, fy, self.img_height / 2.0], [0, 0, 1]]) self._init_pers_mat()
[docs] def setup_camera(self, focus_pt=None, dist=3, yaw=0, pitch=0, roll=0, height=None, width=None): """ Setup the camera view matrix and projection matrix. Must be called first before images are renderred. Args: focus_pt (list): position of the target (focus) point, in Cartesian world coordinates. dist (float): distance from eye (camera) to the focus point. yaw (float): yaw angle in degrees, left/right around up-axis (z-axis). pitch (float): pitch in degrees, up/down. roll (float): roll in degrees around forward vector. height (float): height of image. If None, it will use the default height from the config file. width (float): width of image. If None, it will use the default width from the config file. """ if focus_pt is None: focus_pt = [0, 0, 0] if len(focus_pt) != 3: raise ValueError('Length of focus_pt should be 3 ([x, y, z]).') vm = self._pb.computeViewMatrixFromYawPitchRoll(focus_pt, dist, yaw, pitch, roll, upAxisIndex=2) self.view_matrix = np.array(vm).reshape(4, 4) rot = create_se3(rotvec2rot(np.pi * np.array([1, 0, 0]))) view_matrix_T = self.view_matrix.T self.cam_ext_mat = np.dot(np.linalg.inv(view_matrix_T), rot) if height is not None or width is not None: self._set_cam_int(img_height=height, img_width=width)
[docs] def set_cam_ext(self, pos=None, ori=None, cam_ext=None): """ Set the camera extrinsic matrix. Args: pos (np.ndarray): position of the camera (shape: :math:`[3,]`). ori (np.ndarray): orientation. It can be rotation matrix (shape: :math:`[3, 3]`) quaternion ([x, y, z, w], shape: :math:`[4]`), or euler angles ([roll, pitch, yaw], shape: :math:`[3]`). cam_ext (np.ndarray): extrinsic matrix (shape: :math:`[4, 4]`). If this is provided, pos and ori will be ignored. """ super().set_cam_ext(pos=pos, ori=ori, cam_ext=cam_ext) transform = np.eye(4) transform[:3, :3] = rotvec2rot(np.pi * np.array([1, 0, 0])) view_matrix = np.linalg.inv(np.dot(self.cam_ext_mat, transform)).T self.view_matrix = view_matrix
[docs] def get_images(self, get_rgb=True, get_depth=True, get_seg=False, **kwargs): """ Return rgb, depth, and segmentation images. Args: get_rgb (bool): return rgb image if True, None otherwise. get_depth (bool): return depth image if True, None otherwise. get_seg (bool): return the segmentation mask if True, None otherwise. Returns: 2-element tuple (if `get_seg` is False) containing - np.ndarray: rgb image (shape: [H, W, 3]). - np.ndarray: depth image (shape: [H, W]). 3-element tuple (if `get_seg` is True) containing - np.ndarray: rgb image (shape: [H, W, 3]). - np.ndarray: depth image (shape: [H, W]). - np.ndarray: segmentation mask image (shape: [H, W]), with pixel values corresponding to object id and link id. From the PyBullet documentation, the pixel value "combines the object unique id and link index as follows: value = objectUniqueId + (linkIndex+1)<<24 ... for a free floating body without joints/links, the segmentation mask is equal to its body unique id, since its link index is -1.". """ if self.view_matrix is None: raise ValueError('Please call setup_camera() first!') if self._pb.opengl_render: renderer = self._pb.ER_BULLET_HARDWARE_OPENGL else: renderer = self._pb.ER_TINY_RENDERER cam_img_kwargs = { 'width': self.img_width, 'height': self.img_height, 'viewMatrix': self.view_matrix.flatten(), 'projectionMatrix': self.proj_matrix.flatten(), 'flags': self._pb.ER_NO_SEGMENTATION_MASK, 'renderer': renderer } if get_seg: pb_seg_flag = self._pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX cam_img_kwargs['flags'] = pb_seg_flag cam_img_kwargs.update(kwargs) images = self._pb.getCameraImage(**cam_img_kwargs) rgb = None depth = None if get_rgb: rgb = np.reshape(images[2], (self.img_height, self.img_width, 4))[:, :, :3] # 0 to 255 if get_depth: depth_buffer = np.reshape(images[3], [self.img_height, self.img_width]) znear = self.cfgs.CAM.SIM.ZNEAR zfar = self.cfgs.CAM.SIM.ZFAR depth = zfar * znear / (zfar - (zfar - znear) * depth_buffer) if get_seg: seg = np.reshape(images[4], [self.img_height, self.img_width]) return rgb, depth, seg else: return rgb, depth