diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d45437..ace4a5f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -81,17 +81,3 @@ repos: rev: 3.0.0 hooks: - id: forbid-binary - # Note: to exclude files, uncomment the next line and provide the regex to the associated file or files - exclude: > - (?x)^( - docker/ros/web/.*| - docker/ros/rootfs/.* - )$ -- repo: local - hooks: - - id: check-bdai-deps - name: check bdai dependency rules - entry: ./scripts/check_dependency_rules.sh - language: script - files: \.py$ - require_serial: true \ No newline at end of file diff --git a/config/experiments/reality.yaml b/config/experiments/reality.yaml index 632851f..4c213e0 100644 --- a/config/experiments/reality.yaml +++ b/config/experiments/reality.yaml @@ -10,7 +10,7 @@ policy: name: "RealityITMPolicyV2" pointnav_policy_path: "data/pointnav_weights.pth" depth_image_shape: [212, 240] # height, width - pointnav_stop_radius: 1.2 + pointnav_stop_radius: 0.9 use_max_confidence: False object_map_erosion_size: 5 exploration_thresh: 0.0 @@ -21,7 +21,7 @@ policy: agent_radius: 0.2 env: - max_body_cam_depth: 3.5 + max_body_cam_depth: 2.5 max_gripper_cam_depth: 5.0 max_lin_dist: 0.2 max_ang_dist: 0.523599 diff --git a/vlfm/mapping/obstacle_map.py b/vlfm/mapping/obstacle_map.py index 29840b2..c840347 100644 --- a/vlfm/mapping/obstacle_map.py +++ b/vlfm/mapping/obstacle_map.py @@ -1,8 +1,9 @@ # Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved. +from typing import Any, Union + import cv2 import numpy as np -from typing import Union, Any from frontier_exploration.frontier_detection import detect_frontier_waypoints from frontier_exploration.utils.fog_of_war import reveal_fog_of_war diff --git a/vlfm/mapping/traj_visualizer.py b/vlfm/mapping/traj_visualizer.py index 11a4a14..661c0cb 100644 --- a/vlfm/mapping/traj_visualizer.py +++ b/vlfm/mapping/traj_visualizer.py @@ -36,7 +36,9 @@ def draw_trajectory( img = self._draw_agent(img, camera_positions[-1], camera_yaw) return img - def _draw_path(self, img: np.ndarray, camera_positions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray: + def _draw_path( + self, img: np.ndarray, camera_positions: Union[np.ndarray, List[np.ndarray]] + ) -> np.ndarray: """Draws the path on the image and returns it""" if len(camera_positions) < 2: return img diff --git a/vlfm/policy/base_objectnav_policy.py b/vlfm/policy/base_objectnav_policy.py index 85a9c67..e6953f0 100644 --- a/vlfm/policy/base_objectnav_policy.py +++ b/vlfm/policy/base_objectnav_policy.py @@ -118,7 +118,7 @@ def act( prev_actions: Any, masks: Tensor, deterministic: bool = False, - ) -> Tuple[Tensor, Tensor]: + ) -> Any: """ Starts the episode by 'initializing' and allowing robot to get its bearings (e.g., spinning in place to get a good view of the scene). diff --git a/vlfm/policy/itm_policy.py b/vlfm/policy/itm_policy.py index c5bd559..38c6621 100644 --- a/vlfm/policy/itm_policy.py +++ b/vlfm/policy/itm_policy.py @@ -265,7 +265,7 @@ def act( prev_actions: Any, masks: Tensor, deterministic: bool = False, - ) -> Tuple[Tensor, Tensor]: + ) -> Any: self._pre_step(observations, masks) self._update_value_map() return super().act( diff --git a/vlfm/policy/reality_policies.py b/vlfm/policy/reality_policies.py index c863fd5..2d46422 100644 --- a/vlfm/policy/reality_policies.py +++ b/vlfm/policy/reality_policies.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved. from dataclasses import dataclass -from typing import Any, Dict, List, Tuple, Union, Optional +from typing import Any, Dict, List, Union import numpy as np import torch @@ -60,7 +60,7 @@ def act( prev_actions: Any, masks: Tensor, deterministic: bool = False, - ) -> Tuple[Tensor, Tensor]: + ) -> Dict[str, Any]: if observations["objectgoal"] not in self._non_coco_caption: self._non_coco_caption = ( observations["objectgoal"] + " . " + self._non_coco_caption @@ -75,36 +75,31 @@ def act( # convert this numpy array to a dictionary with keys "angular" and "linear" so # that it can be passed to the Spot robot. if self._done_initializing: - angular = action[0][0].item() - linear = action[0][1].item() - arm_yaw = -1.0 + action_dict = { + "angular": action[0][0].item(), + "linear": action[0][1].item(), + "arm_yaw": -1, + "info": self._policy_info, + } else: - angular = 0.0 - linear = 0.0 - arm_yaw = action[0][0].item() + action_dict = { + "angular": 0, + "linear": 0, + "arm_yaw": action[0][0].item(), + "info": self._policy_info, + } - self._done_initializing = len(self._initial_yaws) == 0 + if "rho_theta" in self._policy_info: + action_dict["rho_theta"] = self._policy_info["rho_theta"] - action = torch.tensor([[angular, linear, arm_yaw]], dtype=torch.float32) + self._done_initializing = len(self._initial_yaws) == 0 - return action, rnn_hidden_states + return action_dict def get_action( self, observations: Dict[str, Any], masks: Tensor, deterministic: bool = True ) -> Dict[str, Any]: - actions, _ = self.act( - observations, None, None, masks, deterministic=deterministic - ) - action_dict = { - "angular": actions[0], - "linear": actions[1], - "arm_yaw": actions[2], - "info": self._policy_info, - } - if "rho_theta" in self._policy_info: - action_dict["rho_theta"] = self._policy_info["rho_theta"] - - return action_dict + return self.act(observations, None, None, masks, deterministic=deterministic) def _reset(self: Union["RealityMixin", ITMPolicyV2]) -> None: parent_cls: ITMPolicyV2 = super() # type: ignore diff --git a/vlfm/policy/utils/non_habitat_policy/resnet.py b/vlfm/policy/utils/non_habitat_policy/resnet.py index 73e03be..e1a18c2 100755 --- a/vlfm/policy/utils/non_habitat_policy/resnet.py +++ b/vlfm/policy/utils/non_habitat_policy/resnet.py @@ -6,7 +6,7 @@ # https://github.com/facebookresearch/habitat-lab/blob/main/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet.py # This is a filtered down version that only support ResNet-18 -from typing import List, Optional, Type, cast +from typing import List, Optional, Type from torch import Tensor from torch import nn as nn diff --git a/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py b/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py index 7fa7469..56a242a 100755 --- a/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py +++ b/vlfm/policy/utils/non_habitat_policy/rnn_state_encoder.py @@ -6,7 +6,7 @@ # https://github.com/facebookresearch/habitat-lab/blob/main/habitat-baselines/habitat_baselines/rl/models/rnn_state_encoder.py # This is a filtered down version that only supports LSTM -from typing import Dict, Optional, Tuple, Any +from typing import Any, Dict, Optional, Tuple import torch import torch.nn as nn @@ -20,6 +20,7 @@ class RNNStateEncoder(nn.Module): is that it takes an addition masks input that resets the hidden state between two adjacent timesteps to handle episodes ending in the middle of a rollout. """ + def __init__( self, input_size: int, @@ -131,16 +132,16 @@ def __init__( ): super().__init__(input_size, hidden_size, num_layers) - # Note: Type handling mypy errors in pytorch libraries prevent + # Note: Type handling mypy errors in pytorch libraries prevent # directly setting hidden_states type def pack_hidden( - self, hidden_states: Any # type is Tuple[torch.Tensor, torch.Tensor] + self, hidden_states: Any # type is Tuple[torch.Tensor, torch.Tensor] ) -> torch.Tensor: return torch.cat(hidden_states, 0) def unpack_hidden( self, hidden_states: torch.Tensor - ) -> Any: # type is Tuple[torch.Tensor, torch.Tensor] + ) -> Any: # type is Tuple[torch.Tensor, torch.Tensor] lstm_states = torch.chunk(hidden_states.contiguous(), 2, 0) return (lstm_states[0], lstm_states[1]) @@ -150,7 +151,10 @@ def build_rnn_inputs( rnn_states: torch.Tensor, not_dones: torch.Tensor, rnn_build_seq_info: Dict[str, torch.Tensor], -) -> Tuple[PackedSequence, torch.Tensor,]: +) -> Tuple[ + PackedSequence, + torch.Tensor, +]: r"""Create a PackedSequence input for an RNN such that each set of steps that are part of the same episode are all part of a batch in the PackedSequence. diff --git a/vlfm/policy/utils/pointnav_policy.py b/vlfm/policy/utils/pointnav_policy.py index 002de45..a030ae5 100644 --- a/vlfm/policy/utils/pointnav_policy.py +++ b/vlfm/policy/utils/pointnav_policy.py @@ -191,6 +191,16 @@ def load_pointnav_policy(file_path: str) -> PointNavResNetTensorOutputPolicy: ckpt_dict = torch.load(file_path, map_location="cpu") pointnav_policy = PointNavResNetTensorOutputPolicy() current_state_dict = pointnav_policy.state_dict() + # Let old checkpoints work with new code + if "net.prev_action_embedding_cont.bias" not in ckpt_dict.keys(): + ckpt_dict["net.prev_action_embedding_cont.bias"] = ckpt_dict[ + "net.prev_action_embedding.bias" + ] + if "net.prev_action_embedding_cont.weights" not in ckpt_dict.keys(): + ckpt_dict["net.prev_action_embedding_cont.weight"] = ckpt_dict[ + "net.prev_action_embedding.weight" + ] + pointnav_policy.load_state_dict( {k: v for k, v in ckpt_dict.items() if k in current_state_dict} ) diff --git a/vlfm/reality/pointnav_env.py b/vlfm/reality/pointnav_env.py index 7d1bc45..ee5dfbf 100644 --- a/vlfm/reality/pointnav_env.py +++ b/vlfm/reality/pointnav_env.py @@ -106,9 +106,7 @@ def _compute_velocities(self, action: Dict[str, Any]) -> Tuple[float, float]: lin_vel = lin_dist / self._time_step return ang_vel, lin_vel - def _compute_displacements( - self, action: Dict[str, Any] - ) -> Tuple[float, float]: + def _compute_displacements(self, action: Dict[str, Any]) -> Tuple[float, float]: displacements = [] for action_key, max_dist in ( ("angular", self._max_ang_dist), diff --git a/vlfm/reality/robots/frame_ids.py b/vlfm/reality/robots/frame_ids.py new file mode 100644 index 0000000..7da70ec --- /dev/null +++ b/vlfm/reality/robots/frame_ids.py @@ -0,0 +1,8 @@ +class SpotFrameIds: + BODY: str = "body" + FLAT_BODY: str = "flat_body" + GPE: str = "gpe" + HAND: str = "hand" + LINK_WR1: str = "link_wr1" + ODOM: str = "odom" + VISION: str = "vision" diff --git a/vlfm/utils/geometry_utils.py b/vlfm/utils/geometry_utils.py index e600d2b..abd793b 100644 --- a/vlfm/utils/geometry_utils.py +++ b/vlfm/utils/geometry_utils.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved. import math -from typing import Tuple, Union +from typing import Tuple import numpy as np @@ -83,8 +83,7 @@ def calculate_vfov(hfov: float, width: int, height: int) -> float: """ # Calculate the diagonal field of view (DFOV) dfov = 2 * math.atan( - math.tan(hfov / 2) - * math.sqrt((width**2 + height**2) / (width**2 + height**2)) + math.tan(hfov / 2) * math.sqrt((width**2 + height**2) / (width**2 + height**2)) ) # Calculate the vertical field of view (VFOV) diff --git a/vlfm/vlm/detections.py b/vlfm/vlm/detections.py index 3ec1917..7ad1ebc 100644 --- a/vlfm/vlm/detections.py +++ b/vlfm/vlm/detections.py @@ -220,15 +220,15 @@ def draw_bounding_box( # reshape to a single dimensional array rand_color = single_pixel.reshape(3) - color = [int(c) for c in rand_color] # type: ignore + bgr_color = [int(c) for c in rand_color] # type: ignore else: # Convert RGB to BGR - bgr_color = color[::-1] - color = [int(c) for c in bgr_color] # type: ignore + color = color[::-1] + bgr_color = [int(c) for c in color] # Draw bounding box on image box_thickness = 2 - cv2.rectangle(img, point1, point2, color, thickness=box_thickness) + cv2.rectangle(img, point1, point2, bgr_color, thickness=box_thickness) # Draw class name and score on image text_label = f"{class_name}: {int(score * 100)}%" @@ -242,7 +242,7 @@ def draw_bounding_box( img, (text_x, text_y - 2 * text_size[1]), (text_x + text_size[0], text_y - text_size[1]), - color, + bgr_color, -1, ) cv2.putText(