diff --git a/.gitignore b/.gitignore index a6880fd68..318e4375d 100644 --- a/.gitignore +++ b/.gitignore @@ -149,3 +149,10 @@ dmypy.json # Pyre type checker .pyre/ + +competition/logs/ +competition/results/ +*.png +competition/pretrain_models/ +*.npy +competition/models/ diff --git a/action_test.py b/action_test.py new file mode 100644 index 000000000..9eac64de8 --- /dev/null +++ b/action_test.py @@ -0,0 +1,221 @@ + +# action test: +""" test +#----------------test action--------------------- +# if self.iteration == 0: +# z = 1 +# yaw = 0. +# # duration 会影响时间 +# duration = 2 +# command_type = Command(5) # goTo. +# args = [[0, 0, z], yaw, duration, True] + +# elif self.iteration > 0.5 *self.CTRL_FREQ and self.iteration < 1.5 *self.CTRL_FREQ : +# command_type = Command(4) # Stop. +# args = [] +# else: +# command_type = Command(0) # None. +# args = [] +#----------------test action--------------------- +""" + +""" test go to action +#------------------------------------- +# go to not very exactly(exist error). if not random , always this (getting_started.yaml) +# x y z +# even if send command "only change y " from 60steps (2*CTRL_FREQ) z will continue increase ,meybe the problem about the Asynchronous +# init: [-0.96609923 0. -2.90654614 0. 0.04960551 0. 0.02840174 -0.05744736 0.07373744 0. 0. 0. ] +# 0-- [-0.96661885 -0.0279078 -2.90698836 -0.00873109 0.04799388 -0.05964756 0.02340614 -0.04682872 0.07295734 -0.60471722 1.66696169 -0.06040739] +# 15- [-0.98957009 0.0177486 -2.91855744 0.01612694 0.08783306 0.40195587 -0.01363278 0.039587 0.07313598 0.1224719 -0.18560217 -0.0133112 ] +# 30- [-9.74126116e-01 -6.12250160e-03 -2.90933867e+00 2.79855108e-02 5.77164080e-01 1.57968746e+00 1.13573342e-03 2.79637762e-04 7.34927365e-02 -3.14627467e-02 1.04491797e-01 -3.82468499e-05] +# 45- [-0.96690019 0.02592517 -2.9244567 -0.04027908 1.58570938 2.21925943 -0.04590852 0.00406897 0.07356948 -0.20060068 -0.07108402 -0.00401296] +# 60- [-0.97490167 -0.00690929 -2.90244781 0.03042487 2.5512753 1.51622867 0.03011076 0.02357646 0.07466138 -0.6881512 -0.51120803 0.0308384 ] +# 75- [-0.95789148 0.04922068 -2.84188733 0.34668242 3.25083697 1.17400743 -0.23051172 -0.00670337 0.07419264 -1.46396087 0.74862298 0.10745695] +# 90- [-0.96644482 0.03573701 -2.35380695 1.60398413 3.55250003 -0.0314102 -0.30132277 0.04541282 0.06064814 -0.13881515 1.02686106 -0.1751778 ] +# 105-[-0.96541722 0.00741155 -1.35492773 2.16873779 3.27599927 -0.96907974 -0.02264677 0.00403805 0.07491015 0.86021552 1.42216285 0.04174302] +# 120-[-9.69886183e-01 3.45681240e-02 -3.70208598e-01 1.58408557e+00 2.75920080e+00 -8.91757966e-01 1.95222977e-01 6.89915684e-04 7.17125806e-02 -2.54350558e-01 9.00435832e-01 2.10910125e-01] +# 135-[-0.93853971 0.14409335 0.299702 1.2196867 2.35446502 -0.66440774 0.08730616 0.0404132 0.0763123 1.39507016 -0.06357986 -0.14670249] +# if self.iteration == 0: +# z = 3 +# yaw = 0. +# # duration 会影响时间 +# duration = 3 +# command_type = Command(5) # goTo. +# args = [[0, 0, z], yaw, duration, True] +# elif self.iteration == 2 *self.CTRL_FREQ: +# y = 3 +# yaw = 0. +# # duration 会影响时间 +# duration = 3 +# command_type = Command(5) # goTo. +# args = [[0, y, 0], yaw, duration, True] +# elif self.iteration == 4 *self.CTRL_FREQ + 1: +# x = 1 +# yaw = 0. +# # duration 会影响时间 +# duration = 3 +# command_type = Command(5) # goTo. +# args = [[x, 0, 0], yaw, duration, True] +# else: +# command_type = Command(0) # None. +# args = [] +#----------------test action end --------------------- +""" + +"""test go to and stop action +#----------------test go to and stop action--------------------- +# result : stop can break the commmand but have to observe the rule about the intertia. +# if self.iteration == 0: +# z = 1 +# yaw = 0. +# # duration 会影响时间 +# duration = 0.2 +# command_type = Command(5) # goTo. +# args = [[0, 0, z], yaw, duration, True] +# elif self.iteration == 2 *self.CTRL_FREQ : +# command_type = Command(4) # Stop. +# args = [] +# elif self.iteration == 3 *self.CTRL_FREQ: +# y = -3 +# yaw = 0. +# # duration 会影响时间 +# duration = 5 +# command_type = Command(5) # goTo. +# args = [[0, y, 0], yaw, duration, True] +# elif self.iteration == 5 *self.CTRL_FREQ : +# command_type = Command(4) # Stop. +# args = [] +# elif self.iteration == 6 *self.CTRL_FREQ : +# x = -1 +# yaw = 0. +# # duration 会影响时间 +# duration = 5 +# command_type = Command(5) # goTo. +# args = [[x, 0, 0], yaw, duration, True] +# else: +# command_type = Command(0) # None. +# args = [] +#----------------test action end --------------------- +""" + +"""test the min time and the max distance about the go to action +#----------------test the min time and the max distance about the go to action--------------------- +# result : +if self.iteration == 0: + z = 0.15 + # duration 会影响时间 + duration = 0.33 + command_type = Command(5) # goTo. + args = [[0, 0, z], 0, duration, True] +else: + command_type = Command(0) # None. + args = [] +#----------------test action end --------------------- +""" + +""" tst the cmdFullState command the next command will break the last command +x=obs[0] +y=obs[2] +z=obs[4] +if self.iteration == 0: + height = 1 + duration = 2 + command_type = Command(2) # Take-off. + args = [height, duration] +# 90-[-9.75560513e-01 3.98563938e-02 -2.90579683e+00 -2.46277672e-03 9.46152214e-01 -4.69072173e-02 5.22578314e-03 3.41655474e-02 5.11630895e-02 -2.64937344e-01 1.81993538e+00 -7.07496076e-01] +elif self.iteration == 3 *self.CTRL_FREQ: + target_pos = np.array([x+0.1,y,z]) + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + + command_type = Command(1) # cmdFullState. + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + +# 105-[-9.09596873e-01 2.00324028e-01 -2.90993115e+00 -3.19762174e-02 9.08722645e-01 -6.57070694e-02 -4.07341352e-02 -3.57578141e-02 2.78315544e-03 -4.06931208e-01 1.00299233e+00 2.78734319e-01] +elif self.iteration == 3.5 *self.CTRL_FREQ: + target_pos = np.array([x, y+0.1, z]) + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + + command_type = Command(1) # cmdFullState. + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + +# 120-[-9.09082306e-01 -8.87231485e-02 -2.84472628e+00 2.40919527e-01 8.64559996e-01 1.77237485e-02 3.32461760e-02 2.48394834e-02 2.12280677e-03 9.54833499e-01 1.31716466e+00 1.11926138e-01] +elif self.iteration == 4 *self.CTRL_FREQ: + target_pos = np.array([x, y, z+0.1]) + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + + command_type = Command(1) # cmdFullState. + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] +else : + command_type = Command(0) # None. + args = [] +# 135-[-0.90734541 0.07334976 -2.83234816 -0.10398782 0.90615406 0.03580154 -0.0083358 -0.00963472 0.00332174 0.67757902 0.5731973 0.09230706] +""" + +""" +# Example action : Handwritten solution for GitHub's getting_stated scenario. +# # default action +# if self.iteration == 0: +# height = 1 +# duration = 2 +# command_type = Command(2) # Take-off. +# args = [height, duration] + +# elif self.iteration >= 3*self.CTRL_FREQ and self.iteration < 20*self.CTRL_FREQ: +# step = min(self.iteration-3*self.CTRL_FREQ, len(self.ref_x) -1) +# target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]]) +# target_vel = np.zeros(3) +# target_acc = np.zeros(3) +# target_yaw = 0. +# target_rpy_rates = np.zeros(3) + +# command_type = Command(1) # cmdFullState. +# args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + +# elif self.iteration == 20*self.CTRL_FREQ: +# command_type = Command(6) # notify setpoint stop. +# args = [] + +# elif self.iteration == 20*self.CTRL_FREQ+1: +# x = self.ref_x[-1] +# y = self.ref_y[-1] +# z = 1.5 +# yaw = 0. +# duration = 2.5 + +# command_type = Command(5) # goTo. +# args = [[x, y, z], yaw, duration, False] + +# elif self.iteration == 23*self.CTRL_FREQ: +# x = self.initial_obs[0] +# y = self.initial_obs[2] +# z = 1.5 +# yaw = 0. +# duration = 6 + +# command_type = Command(5) # goTo. +# args = [[x, y, z], yaw, duration, False] + +# elif self.iteration == 30*self.CTRL_FREQ: +# height = 0. +# duration = 3 + +# command_type = Command(3) # Land. +# args = [height, duration] + +# elif self.iteration == 33*self.CTRL_FREQ-1: +# command_type = Command(-1) # Terminate command to be sent once trajectory is completed. +# args = [] + +# else: +# command_type = Command(0) # None. +# args = [] +""" diff --git a/competition/competition_utils.py b/competition/competition_utils.py index d00fe85ab..059c20405 100644 --- a/competition/competition_utils.py +++ b/competition/competition_utils.py @@ -259,7 +259,8 @@ def wrap(*args, **keyword_args): args[0].interstep_learning_time += elapsed args[0].interstep_learning_occurrences += 1 if elapsed >= args[0].CTRL_TIMESTEP: - print('\n[WARNING] Function "{}" took: {} sec (too slow)'.format(function.__name__, elapsed)) + # print('\n[WARNING] Function "{}" took: {} sec (too slow)'.format(function.__name__, elapsed)) + pass if args[0].VERBOSE and args[0].interstep_counter%int(args[0].CTRL_FREQ/2) == 0: print('\n{}-th call to function "{}" took: {} sec'.format(args[0].interstep_counter, function.__name__, elapsed)) return result diff --git a/competition/create_buffer.py b/competition/create_buffer.py new file mode 100644 index 000000000..05a8ec2d5 --- /dev/null +++ b/competition/create_buffer.py @@ -0,0 +1,499 @@ +"""Write your control strategy. + +Then run: + + $ python3 getting_started.py --overrides ./getting_started.yaml + +Tips: + Search for strings `INSTRUCTIONS:` and `REPLACE THIS (START)` in this file. + + Change the code between the 4 blocks starting with + ######################### + # REPLACE THIS (START) ## + ######################### + and ending with + ######################### + # REPLACE THIS (END) #### + ######################### + with your own code. + + They are in methods: + 1) __init__ + 2) cmdFirmware + 3) interStepLearn (optional) + 4) interEpisodeLearn (optional) + +""" +from bdb import set_trace +from tkinter import Y +import numpy as np +import time +from collections import deque + +try: + from competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory +except ImportError: + # Test import. + from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory +from gym.spaces import Box +from slam import SLAM + +from safetyplusplus_folder.plus_logger import SafeLogger +import random +file_name='test' +sim_only=False + +class Controller(): + """Template controller class. + + """ + + def __init__(self, + initial_obs, + initial_info, + use_firmware: bool = False, + buffer_size: int = 100, + verbose: bool = False + ): + """Initialization of the controller. + + INSTRUCTIONS: + The controller's constructor has access the initial state `initial_obs` and the a priori infromation + contained in dictionary `initial_info`. Use this method to initialize constants, counters, pre-plan + trajectories, etc. + + Args: + initial_obs (ndarray): The initial observation of the quadrotor's state + [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]. + initial_info (dict): The a priori information as a dictionary with keys + 'symbolic_model', 'nominal_physical_parameters', 'nominal_gates_pos_and_type', etc. + use_firmware (bool, optional): Choice between the on-board controll in `pycffirmware` + or simplified software-only alternative. + buffer_size (int, optional): Size of the data buffers used in method `learn()`. + verbose (bool, optional): Turn on and off additional printouts and plots. + + """ + # Save environment and conrol parameters. + self.CTRL_TIMESTEP = initial_info["ctrl_timestep"] + self.CTRL_FREQ = initial_info["ctrl_freq"] + + + # when level 1 this will be changed + # [-0.9660992341982342, 0, -2.906546142854587, 0, 0.04960550660033104, 0, 0.02840173653255687, -0.05744735611733429, 0.07373743557600966, 0, 0, 0] + # [-0.96609923 0. -2.90654614 0. 0.04960551 0. 0.02840174 -0.05744736 0.07373744 0. 0. 0. ] + self.initial_obs = initial_obs + + self.VERBOSE = verbose + self.BUFFER_SIZE = buffer_size + + # Store a priori scenario information. + + # when level 2 this will be changed + # x y z r p y + # initial is [[0.5, -2.5, 0, 0, 0, -1.57, 0], [2, -1.5, 0, 0, 0, 0, 1], [0, 0.2, 0, 0, 0, 1.57, 1], [-0.5, 1.5, 0, 0, 0, 0, 0]] + # when target gate show in horizon , be like "Current target gate position: [0.6143489615604684, -2.634075935292277, 1.0, 0.0, 0.0, -1.6783756661588305]" + self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"] + + # when level 2 this will be changed + # x y z r p y + # [[1.5, -2.5, 0, 0, 0, 0], [0.5, -1, 0, 0, 0, 0], [1.5, 0, 0, 0, 0, 0], [-1, 0, 0, 0, 0, 0]] + self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] + + # Check for pycffirmware. + if use_firmware: + self.ctrl = None + else: + # Initialize a simple PID Controller ror debugging and test + # Do NOT use for the IROS 2022 competition. + self.ctrl = PIDController() + # Save additonal environment parameters. + self.KF = initial_info["quadrotor_kf"] + + + + ######################### + # REPLACE THIS (START) ## + ######################### + import torch + torch.manual_seed(101) + np.random.seed(101) + self.net_work_freq=0.5 # time gap 1 1s/次 0.5s/次 0.2m 400episode + + # state-based + self.mass=initial_info['nominal_physical_parameters']['quadrotor_mass'] + self.global_state_dim = 9 + self.local_state_dim=[5,23,23] + self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + + self.m_slam = SLAM() + self.m_slam.reset_occ_map() + + # action + action_dim = 3 + max_action=2 + min_action=max_action * (-1) + self.action_space=Box(np.array([min_action,min_action,min_action],dtype=np.float64),np.array([max_action,max_action,max_action],dtype=np.float64)) + self.action_space.seed(1) + self.current_action=np.zeros(action_dim) + self.last_action=np.zeros(action_dim) + # network + kwargs = { + "state_dim": self.global_state_dim, + "action_dim": action_dim, + "max_action": max_action, + "rew_discount": 0.99, + "tau":0.005, + "policy_noise": 0.2 * max_action, + "noise_clip": 0.5 * max_action, + "policy_freq": 2, + } + from safetyplusplus_folder import safetyplusplus,replay_buffer,unconstrained + # td3 policy + self.policy = unconstrained.TD3(**kwargs) + self.replay_buffer = replay_buffer.IrosReplayBuffer(self.global_state_dim,self.local_state_dim,action_dim,load=False) + + # env-based variable + self.episode_reward = 0 + self.episode_cost = 0 + self.collisions_count=self.violations_count=0 + self.episode_iteration=-1 + self.target_gate_id=0 + self.goal_pos=[initial_info['x_reference'][0],initial_info['x_reference'][2],initial_info['x_reference'][4]] + self.target_offset=np.array([0,0,0]) + self.get_offset(info=None) + + # Reset counters and buffers. + self.reset() + self.interEpisodeReset() + + # logger + self.logger_plus = SafeLogger(exp_name=file_name, env_name="compitition", seed=0, + fieldnames=['Eptime','EpRet', 'EpCost', 'collision_num','vilation_num','target_gate']) + ######################### + # REPLACE THIS (END) #### + ######################### + + + def get_offset(self,info): + # init + if info is None: + self.target_offset=np.array([0.2,0,0]) + # step cross gate + elif info['current_target_gate_id'] == 1 or info['current_target_gate_id'] == 3: + self.target_offset=np.array([0,0.2,0]) + elif info['current_target_gate_id'] == 2: + self.target_offset=np.array([-0.2,0,0]) + else: + self.target_offset=np.array([0,0,0]) + + def get_state(self,obs,info): + # state info : mass(1) + obs_info(3) + goal_info(3) + pic_info + # x,y,z 3 + current_pos=[obs[0],obs[2],obs[4]] + + if info !={}: + # goal [x,y,z] + if info['current_target_gate_id'] == -1 : + current_goal_pos = self.goal_pos + current_target_gate_in_range= 0 + else : + # 1 or 0 1 + current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0 + # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy (r,p is zero ,ignored ) + current_target_gate_pos = info['current_target_gate_pos'] + if current_target_gate_pos[2]==0: # means that current_target_gate_in_range is False, add default height. + current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525 + current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]] + current_goal_pos=current_target_gate_pos[:3] + current_goal_pos += self.target_offset + else : + current_target_gate_in_range= 0 + current_goal_pos=np.zeros(3) + target_vector=[current_goal_pos[0]- current_pos[0],current_goal_pos[1]- current_pos[1],current_goal_pos[2]- current_pos[2]] + global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2], + current_target_gate_in_range,info['current_target_gate_id'],self.mass]) + local_state = self.m_slam.generate_3obs_img(obs,target_vector,name=self.episode_iteration,save=False) + return [global_state,local_state] + + def cmdFirmware(self,ctime,obs,reward=None,done=None,info=None,exploration=True): + """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface. + + INSTRUCTIONS: + Re-implement this method to return the target position, velocity, acceleration, attitude, and attitude rates to be sent + from Crazyswarm to the Crazyflie using, e.g., a `cmdFullState` call. + + Args: + time (float): Episode's elapsed time, in seconds. + obs (ndarray): The quadrotor's Vicon data [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0]. + reward (float, optional): The reward signal. + done (bool, optional): Wether the episode has terminated. + info (dict, optional): Current step information as a dictionary with keys + 'constraint_violation', 'current_target_gate_pos', etc. + + Returns: + Command: selected type of command (takeOff, cmdFullState, etc., see Enum-like class `Command`). + List: arguments for the type of command (see comments in class `Command`) + + """ + self.episode_iteration+=1 + + if self.ctrl is not None: + raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.") + + ######################### + # REPLACE THIS (START) ## + ######################### + self.m_slam.update_occ_map(info) + + # begin with take off + if self.episode_iteration == 0: + height = 1 + duration = 1.5 + command_type = Command(2) # Take-off. + args = [height, duration] + + # using network to choose action + elif self.episode_iteration >= 3 * self.CTRL_FREQ : + + if self.episode_iteration % (30*self.net_work_freq) ==0: + # cmdFullState + command_type = Command(1) # cmdFullState. + all_state=self.get_state(obs,info) + global_state=all_state[0] + current_pos=np.array([global_state[0],global_state[1],global_state[2]]) + target_pos=np.array([global_state[3]+current_pos[0],global_state[4]+current_pos[1],global_state[5]+current_pos[2]]) + action_raw=(target_pos-current_pos) + action_raw= np.array([_+1 if _>=0 else _-1 for _ in action_raw]) + action= np.where(action_raw<=2,action_raw,2) + action= np.where(action_raw>=-2,action_raw,-2) + # action = self.policy.select_action(all_state, exploration=exploration) # array delta_x , delta_y, delta_z + action /= 10 + self.current_all_state=all_state + self.current_action=action + # import pdb;pdb.set_trace() + target_pos = global_state[[0,1,2]] + action + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + # other time do nothing + else : + command_type = Command(0) # None. + args = [] + + else : + command_type = Command(0) # None. + args = [] + + ######################### + # REPLACE THIS (END) #### + ######################### + + return command_type, args + + # NOT need to re-implement + def cmdSimOnly(self,time,obs,reward=None,done=None,info=None): + """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller. + + INSTRUCTIONS: + You do NOT need to re-implement this method for the IROS 2022 Safe Robot Learning competition. + Only re-implement this method when `use_firmware` == False to return the target position and velocity. + + Args: + time (float): Episode's elapsed time, in seconds. + obs (ndarray): The quadrotor's state [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]. + reward (float, optional): The reward signal. + done (bool, optional): Wether the episode has terminated. + info (dict, optional): Current step information as a dictionary with keys + 'constraint_violation', 'current_target_gate_pos', etc. + + Returns: + List: target position (len == 3). + List: target velocity (len == 3). + + """ + self.episode_iteration+=1 + if self.ctrl is None: + raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.") + + self.iteration = int(time*self.CTRL_FREQ) + self.m_slam.update_occ_map(info) + # import pdb;pdb.set_trace() + ######################### + self.interstep_counter += 1 + if self.episode_iteration <= 40: + target_p = np.array([-0.9,-2.9,1]) + else : + all_state=self.get_state(obs,info) + global_state=all_state[0] + if self.interepisode_counter < 30: + action= self.action_space.sample() + else: + action = self.policy.select_action(all_state, exploration=True) # array delta_x , delta_y, delta_z + action /= 10 + self.current_all_state=all_state + self.current_action=action + target_p = global_state[[0,1,2]] + action + + target_v = np.zeros(3) + ######################### + + return target_p, target_v + + @timing_step + def interStepLearn(self,args,obs,reward,done,info): + """Learning and controller updates called between control steps. + + INSTRUCTIONS: + Use the historically collected information in the five data buffers of actions, observations, + rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan. + + Args: + action (List): Most recent applied action. + obs (List): Most recent observation of the quadrotor state. + reward (float): Most recent reward. + done (bool): Most recent done flag. + info (dict): Most recent information dictionary. + + """ + self.interstep_counter += 1 + reward=0 + ######################### + # REPLACE THIS (START) ## + ######################### + if sim_only==False: + # add experience when use network to decide + if self.episode_iteration == 3 * self.CTRL_FREQ: + self.last_all_state=self.current_all_state + self.last_action = self.current_action + # import pdb;pdb.set_trace() + + if self.episode_iteration> 3 * self.CTRL_FREQ : + if self.episode_iteration % (30*self.net_work_freq) ==0: + last_pos= self.last_all_state[0][[0,1,2]] + current_pos=self.current_all_state[0][[0,1,2]] + + last2goal_vector= self.last_all_state[0][[3,4,5]] + last2cur_vector=current_pos-last_pos + cur2goal_vector=self.current_all_state[0][[3,4,5]] + + # std_last2goal_vector=last2goal_vector/(min(abs(last2goal_vector))) + # std_last2goal_vector=np.array([max(min(_,1.),-1.) for _ in std_last2goal_vector]) + + # std_last2cur_vector=last2cur_vector/(min(abs(last2cur_vector))) + # std_last2cur_vector=np.array([max(min(_,1.),-1.) for _ in std_last2cur_vector]) + + cur2goal_dis=sum(cur2goal_vector * cur2goal_vector) + last2goal_dis=sum(last2goal_vector * last2goal_vector) + # import pdb;pdb.set_trace() + + # 对于跨过门动作 给一个大的奖励 + if self.last_all_state[0][-2] == self.current_all_state[0][-2]: + reward =( last2goal_dis - cur2goal_dis ) * 20 + else: + reward = 10 + # cross the gate + if info['current_target_gate_id']!=self.target_gate_id : + reward += 100 + print(f"STEP{self.episode_iteration} , step gate{self.target_gate_id}") + self.get_offset(info) + if info['at_goal_position']: + reward += 100 + if info['constraint_violation'] : + reward -= 10 + if info["collision"][1] : + reward -= 10 + if info["collision"][1]: + self.collisions_count += 1 + self.episode_cost+=1 + if 'constraint_values' in info and info['constraint_violation'] == True: + self.violations_count += 1 + self.episode_cost+=1 + # cmdFullState + self.replay_buffer.add(self.last_all_state[0],self.last_all_state[1],self.last_action * 10 ,self.current_all_state[0],self.current_all_state[1],reward,done) + if self.episode_iteration % 900 ==0 : + print(f"Step{self.episode_iteration} add buffer:\nlast_pos:{last_pos} aim vector: {last2goal_vector} ") + print(f"action_infer: {self.last_action * 10}\t lastDis-CurDis:{( last2goal_dis - cur2goal_dis )}") + print(f"last2cur_pos_vector: {last2cur_vector } \t reward: {reward}") + print(f"target_gate_id:{info['current_target_gate_id']} ; pos: {info['current_target_gate_pos']}") + print("*************************************************************************************") + # import pdb;pdb.set_trace() + self.episode_reward+=reward + + # ready for next update + self.last_all_state=self.current_all_state + self.last_action=self.current_action + self.target_gate_id= info['current_target_gate_id'] + else : + pass + + # change_Train_Num Better + # if self.interepisode_counter > 5: + # self.policy.train(self.replay_buffer,batch_size=128,train_nums=int(5)) + + if self.replay_buffer.size==1e4: + self.replay_buffer.write() + print("write done") + import sys + sys.exit(0) + ######################### + # REPLACE THIS (END) #### + ######################### + + @timing_ep + def interEpisodeLearn(self,info): + """Learning and controller updates called between episodes. + + INSTRUCTIONS: + Use the historically collected information in the five data buffers of actions, observations, + rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan. + + """ + self.interepisode_counter += 1 + + ######################### + # REPLACE THIS (START) ## + ######################### + + # if self.interepisode_counter >= 20 : + # self.policy.train(self.replay_buffer,batch_size=256,train_nums=(30 /self.net_work_freq )) + + if self.interepisode_counter % 100 == 0 : + self.policy.save(filename=f"{self.logger_plus.log_dir}/{self.interepisode_counter}") + + print(f"Episode Num: {self.interepisode_counter} Reward: {self.episode_reward:.3f} Cost: {self.episode_cost:.3f} violation: {self.violations_count:.3f} collision:{self.collisions_count:.3f} ,") + print(f"gates_passed:{info['current_target_gate_id']},at_goal_position : {info['at_goal_position']} task_completed: {info['task_completed']}") + self.logger_plus.update([self.episode_reward, self.episode_cost,self.collisions_count,self.violations_count,info['current_target_gate_id']], total_steps=self.interepisode_counter) + + ######################### + # REPLACE THIS (END) #### + ######################### + + def reset(self): + # Counters. + self.interstep_counter = 0 + self.interepisode_counter = 0 + + def interEpisodeReset(self): + """Initialize/reset learning timing variables. + + Called between episodes in `getting_started.py`. + + """ + # Timing stats variables. + self.interstep_learning_time = 0 + self.interstep_learning_occurrences = 0 + self.interepisode_learning_time = 0 + self.episode_reward = 0 + self.episode_cost = 0 + self.collisions_count=self.violations_count=0 + self.episode_iteration=-1 + # env-based variable + self.target_gate_id=0 + self.m_slam.reset_occ_map() + self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.target_offset=np.array([0,0,0]) + diff --git a/competition/edit_this.py b/competition/edit_this.py index e61f25d55..2a2c4457c 100644 --- a/competition/edit_this.py +++ b/competition/edit_this.py @@ -24,8 +24,10 @@ 4) interEpisodeLearn (optional) """ +from bdb import set_trace +from tkinter import Y import numpy as np - +import time from collections import deque try: @@ -33,7 +35,19 @@ except ImportError: # PyTest import. from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory +from gym.spaces import Box + +from safetyplusplus_folder.slam import SLAM +from safetyplusplus_folder.plus_logger import SafeLogger +import random + + +file_name='Submit' +Pretrain=False +test=True +sim_only=False +model_name='submit_models/1016_01_1800' ######################### # REPLACE THIS (START) ## ######################### @@ -55,6 +69,7 @@ class Controller(): """ + def __init__(self, initial_obs, initial_info, @@ -86,11 +101,9 @@ def __init__(self, self.initial_obs = initial_obs self.VERBOSE = verbose self.BUFFER_SIZE = buffer_size - - # Store a priori scenario information. self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"] - self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] - + self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] + # Check for pycffirmware. if use_firmware: self.ctrl = None @@ -101,71 +114,131 @@ def __init__(self, # Save additonal environment parameters. self.KF = initial_info["quadrotor_kf"] - # Reset counters and buffers. - self.reset() - self.interEpisodeReset() + ######################### # REPLACE THIS (START) ## ######################### + import torch + torch.manual_seed(101) + torch.cuda.manual_seed(101) + torch.cuda.manual_seed_all(101) + np.random.seed(101) + random.seed(101) + self.begin_train_seconds=1.5 + self.begin_net_infer_epo=80 + self.begin_train_epo=100 + self.net_work_freq=0.5 # time gap 1 1s/次 0.5s/次 0.2m 400episode + max_action=2 + self.global_state_dim = 9 + self.set_offset=True + self.batch_size=128 + + # state-based + self.mass=initial_info['nominal_physical_parameters']['quadrotor_mass'] + self.local_state_dim=[5,23,23] + self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + + self.m_slam = SLAM() + self.m_slam.reset_occ_map() + + # action + action_dim = 3 + min_action=max_action * (-1) + self.action_space=Box(np.array([min_action,min_action,min_action],dtype=np.float64),np.array([max_action,max_action,max_action],dtype=np.float64)) + self.action_space.seed(1) + self.current_action=np.zeros(action_dim) + self.last_action=np.zeros(action_dim) + # network + kwargs = { + "state_dim": self.global_state_dim, + "action_dim": action_dim, + "max_action": max_action, + "rew_discount": 0.99, + "tau":0.005, + "policy_noise": 0.2 * max_action, + "noise_clip": 0.5 * max_action, + "policy_freq": 2, + } + from safetyplusplus_folder import safetyplusplus,replay_buffer,unconstrained + # td3 policy + self.policy = unconstrained.TD3(**kwargs) + self.replay_buffer = replay_buffer.IrosReplayBuffer(self.global_state_dim,self.local_state_dim,action_dim,load=False) + if test or Pretrain: + self.policy.load(model_name) + print("load ok ") + + # env-based variable + self.info=None + self.cur2goal_dis=0 + self.episode_reward = 0 + self.episode_cost = 0 + self.collisions_count=self.violations_count=0 + self.episode_iteration=-1 + self.target_gate_id=0 + self.goal_pos=[initial_info['x_reference'][0],initial_info['x_reference'][2],initial_info['x_reference'][4]] + self.target_offset=np.array([0,0,0]) + self.trigger=False + self.get_offset(info=None) + self.rule_control_time=0 + # Reset counters and buffers. + self.reset() + self.interEpisodeReset() - # Call a function in module `example_custom_utils`. - ecu.exampleFunction() - - # Example: hardcode waypoints through the gates. - if use_firmware: - waypoints = [(self.initial_obs[0], self.initial_obs[2], initial_info["gate_dimensions"]["tall"]["height"])] # Height is hardcoded scenario knowledge. - else: - waypoints = [(self.initial_obs[0], self.initial_obs[2], self.initial_obs[4])] - for idx, g in enumerate(self.NOMINAL_GATES): - height = initial_info["gate_dimensions"]["tall"]["height"] if g[6] == 0 else initial_info["gate_dimensions"]["low"]["height"] - if g[5] > 0.75 or g[5] < 0: - if idx == 2: # Hardcoded scenario knowledge (direction in which to take gate 2). - waypoints.append((g[0]+0.3, g[1]-0.3, height)) - waypoints.append((g[0]-0.3, g[1]-0.3, height)) - else: - waypoints.append((g[0]-0.3, g[1], height)) - waypoints.append((g[0]+0.3, g[1], height)) - else: - if idx == 3: # Hardcoded scenario knowledge (correct how to take gate 3). - waypoints.append((g[0]+0.1, g[1]-0.3, height)) - waypoints.append((g[0]+0.1, g[1]+0.3, height)) - else: - waypoints.append((g[0], g[1]-0.3, height)) - waypoints.append((g[0], g[1]+0.3, height)) - waypoints.append([initial_info["x_reference"][0], initial_info["x_reference"][2], initial_info["x_reference"][4]]) - - # Polynomial fit. - self.waypoints = np.array(waypoints) - deg = 6 - t = np.arange(self.waypoints.shape[0]) - fx = np.poly1d(np.polyfit(t, self.waypoints[:,0], deg)) - fy = np.poly1d(np.polyfit(t, self.waypoints[:,1], deg)) - fz = np.poly1d(np.polyfit(t, self.waypoints[:,2], deg)) - duration = 15 - t_scaled = np.linspace(t[0], t[-1], int(duration*self.CTRL_FREQ)) - self.ref_x = fx(t_scaled) - self.ref_y = fy(t_scaled) - self.ref_z = fz(t_scaled) - - if self.VERBOSE: - # Plot trajectory in each dimension and 3D. - plot_trajectory(t_scaled, self.waypoints, self.ref_x, self.ref_y, self.ref_z) - - # Draw the trajectory on PyBullet's GUI. - draw_trajectory(initial_info, self.waypoints, self.ref_x, self.ref_y, self.ref_z) - + # logger + self.logger_plus = SafeLogger(exp_name=file_name, env_name="compitition", seed=0, + fieldnames=['Eptime','EpRet', 'EpCost', 'collision_num','vilation_num','target_gate','dis']) ######################### # REPLACE THIS (END) #### ######################### - - def cmdFirmware(self, - time, - obs, - reward=None, - done=None, - info=None - ): + + + def get_offset(self,info): + # init + if info is None: + self.target_offset=np.array([0.05,0,0]) + # step cross gate + elif info['current_target_gate_id'] == 1 or info['current_target_gate_id'] == 3: + self.target_offset=np.array([0,0.05,0]) + elif info['current_target_gate_id'] == 2: + self.target_offset=np.array([-0.05,0,0]) + else: + self.target_offset=np.array([0,0,0]) + + def get_state(self,obs,info): + # state info :+ obs_info(3) + goal_info(3) + gate_in_range(1)+target_gate_id(1) + mass(1) + pic_info(16) + # x,y,z 3 + current_pos=[obs[0],obs[2],obs[4]] + + if info !={}: + # goal [x,y,z] + if info['current_target_gate_id'] == -1 : + current_goal_pos = self.goal_pos + current_target_gate_in_range= 0 + else : + # 1 or 0 1 + current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0 + # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy (r,p is zero ,ignored ) + current_target_gate_pos = info['current_target_gate_pos'] + if current_target_gate_pos[2]==0: # means that current_target_gate_in_range is False, add default height. + current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525 + current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]] + current_goal_pos=current_target_gate_pos[:3] + if self.set_offset: + current_goal_pos += self.target_offset + else : + current_target_gate_in_range= 0 + current_goal_pos=np.zeros(3) + target_vector=[current_goal_pos[0]- current_pos[0],current_goal_pos[1]- current_pos[1],current_goal_pos[2]- current_pos[2]] + # 10.09 V0.1 + global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2], + current_target_gate_in_range,info['current_target_gate_id'],self.mass]) + # global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2],self.mass]) + local_state = self.m_slam.generate_3obs_img(obs,target_vector,name=self.episode_iteration,save=False if self.episode_iteration % 20!=0 else True) + return [global_state,local_state] + + def cmdFirmware(self,ctime,obs,reward=None,done=None,info=None,exploration=True): """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface. INSTRUCTIONS: @@ -185,87 +258,102 @@ def cmdFirmware(self, List: arguments for the type of command (see comments in class `Command`) """ + self.episode_iteration+=1 + if self.ctrl is not None: raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.") - iteration = int(time*self.CTRL_FREQ) - ######################### # REPLACE THIS (START) ## ######################### - - # Handwritten solution for GitHub's getting_stated scenario. - - if iteration == 0: + self.m_slam.update_occ_map(info) + + + # begin with take off + if self.episode_iteration == 0: height = 1 - duration = 2 - + duration = 1.5 command_type = Command(2) # Take-off. args = [height, duration] - - elif iteration >= 3*self.CTRL_FREQ and iteration < 20*self.CTRL_FREQ: - step = min(iteration-3*self.CTRL_FREQ, len(self.ref_x) -1) - target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]]) + elif info!={} and info['current_target_gate_id'] == -1 and self.episode_iteration % 5 ==0: + self.rule_control_time+=1 + # navigate to the goal_pos.and stop + command_type = Command(1) + if self.rule_control_time <=5: + target_pos = np.array(self.goal_pos) + target_pos[1] -=0.4 + elif self.rule_control_time <=20: + target_pos = np.array(self.goal_pos) + target_pos[1] -=0.2 + else: + target_pos = np.array(self.goal_pos) target_vel = np.zeros(3) target_acc = np.zeros(3) target_yaw = 0. target_rpy_rates = np.zeros(3) - - command_type = Command(1) # cmdFullState. - args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] - - elif iteration == 20*self.CTRL_FREQ: - command_type = Command(6) # Notify setpoint stop. - args = [] - - elif iteration == 20*self.CTRL_FREQ+1: - x = self.ref_x[-1] - y = self.ref_y[-1] - z = 1.5 - yaw = 0. - duration = 2.5 - - command_type = Command(5) # goTo. - args = [[x, y, z], yaw, duration, False] - - elif iteration == 23*self.CTRL_FREQ: - x = self.initial_obs[0] - y = self.initial_obs[2] - z = 1.5 - yaw = 0. - duration = 6 - - command_type = Command(5) # goTo. - args = [[x, y, z], yaw, duration, False] - - elif iteration == 30*self.CTRL_FREQ: - height = 0. - duration = 3 - - command_type = Command(3) # Land. - args = [height, duration] - - elif iteration == 33*self.CTRL_FREQ-1: - command_type = Command(-1) # Terminate command to be sent once the trajectory is completed. - args = [] - - else: + args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + + if self.episode_iteration % (30*self.net_work_freq) ==0: + all_state=self.get_state(obs,info) + global_state=all_state[0] + action=target_pos-global_state[[0,1,2]] + action /= 10 + + self.current_all_state=all_state + self.current_action=action + # using network to choose action + elif self.episode_iteration >= self.begin_train_seconds * self.CTRL_FREQ and self.episode_iteration % (30*self.net_work_freq) ==0 : + # cmdFullState + command_type = Command(1) + all_state=self.get_state(obs,info) + global_state=all_state[0] + if not (test or Pretrain) and self.interepisode_counter < self.begin_net_infer_epo: + action= self.action_space.sample() + else: + action = self.policy.select_action(all_state, exploration=True if not test else False) # array delta_x , delta_y, delta_z + action /= 10 + target_pos = global_state[[0,1,2]] + action + self.current_all_state=all_state + self.current_action=action + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + # other time do nothing + else : command_type = Command(0) # None. args = [] + # if test and info !={}: + # all_state=self.get_state(obs,info) + # global_state=all_state[0] + # current_local_space=all_state[1][2-1:2+2,11-1:11+2,11-1:11+2] + # if (current_local_space<0).any(): + # command_type = Command(1) + # if info['current_target_gate_id'] ==-1: + # last_gate_id=3 + # elif info['current_target_gate_id'] ==0: + # last_gate_id=0 + # else: + # last_gate_id=info['current_target_gate_id']-1 + # last_gate_pos=self.NOMINAL_GATES[last_gate_id] + # last_gate_pos[2]=1 if last_gate_pos[-1]==0 else 0.525 + # last_gate_pos=np.array(last_gate_pos[0:3]) + # target_pos=last_gate_pos + # target_vel = np.zeros(3) + # target_acc = np.zeros(3) + # target_yaw = 0. + # target_rpy_rates = np.zeros(3) + # args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] ######################### # REPLACE THIS (END) #### ######################### - + return command_type, args - def cmdSimOnly(self, - time, - obs, - reward=None, - done=None, - info=None - ): + # NOT need to re-implement + def cmdSimOnly(self,time,obs,reward=None,done=None,info=None): """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller. INSTRUCTIONS: @@ -285,28 +373,36 @@ def cmdSimOnly(self, List: target velocity (len == 3). """ + self.episode_iteration+=1 if self.ctrl is None: raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.") - iteration = int(time*self.CTRL_FREQ) + self.iteration = int(time*self.CTRL_FREQ) + self.m_slam.update_occ_map(info) ######################### - if iteration < len(self.ref_x): - target_p = np.array([self.ref_x[iteration], self.ref_y[iteration], self.ref_z[iteration]]) - else: - target_p = np.array([self.ref_x[-1], self.ref_y[-1], self.ref_z[-1]]) + self.interstep_counter += 1 + if self.episode_iteration <= 40: + target_p = np.array([-0.9,-2.9,1]) + else : + all_state=self.get_state(obs,info) + global_state=all_state[0] + if self.interepisode_counter < 30: + action= self.action_space.sample() + else: + action = self.policy.select_action(all_state, exploration=True) # array delta_x , delta_y, delta_z + action /= 10 + self.current_all_state=all_state + self.current_action=action + target_p = global_state[[0,1,2]] + action + target_v = np.zeros(3) ######################### return target_p, target_v @timing_step - def interStepLearn(self, - action, - obs, - reward, - done, - info): + def interStepLearn(self,args,obs,reward,done,info): """Learning and controller updates called between control steps. INSTRUCTIONS: @@ -322,20 +418,82 @@ def interStepLearn(self, """ self.interstep_counter += 1 - - # Store the last step's events. - self.action_buffer.append(action) - self.obs_buffer.append(obs) - self.reward_buffer.append(reward) - self.done_buffer.append(done) - self.info_buffer.append(info) - + reward=0 ######################### # REPLACE THIS (START) ## ######################### - - pass - + if not sim_only: + # add experience when use network to decide + if self.episode_iteration == self.begin_train_seconds * self.CTRL_FREQ: + self.last_all_state=self.current_all_state + self.last_action = self.current_action + + if self.episode_iteration> self.begin_train_seconds * self.CTRL_FREQ : + if info['constraint_violation']: + self.reward -= 50 + if info["collision"][1]: + self.reward -= 100 + self.collisions_count += 1 + self.episode_cost+=1 + if 'constraint_values' in info and info['constraint_violation'] == True: + self.violations_count += 1 + self.episode_cost+=1 + # if info['constraint_violation'] and ( test or self.interepisode_counter> 100 ) : + # print(f"step{self.episode_iteration} , constraint_violation : current pos : {current_pos}") + # if info["collision"][1] and ( test or self.interepisode_counter> 100 ): + # print(info["collision"]) + + if self.episode_iteration % (30*self.net_work_freq) ==0: + last_pos= self.last_all_state[0][[0,1,2]] + current_pos=self.current_all_state[0][[0,1,2]] + current_local_space=self.current_all_state[1][2-1:2+2,11-1:11+2,11-1:11+2] + + last2goal_vector= self.last_all_state[0][[3,4,5]] + last2cur_vector=current_pos-last_pos + cur2goal_vector=self.current_all_state[0][[3,4,5]] + cur2goal_dis=sum(cur2goal_vector * cur2goal_vector) + last2goal_dis=sum(last2goal_vector * last2goal_vector) + + if self.target_gate_id == info['current_target_gate_id']: + self.reward +=( last2goal_dis - cur2goal_dis ) * 20 + # cross the gate ,get the big reward + else: + self.reward = self.reward + 100 * ( (info['current_target_gate_id'] if info['current_target_gate_id']!=-1 else 4) +1) + print(f"STEP{self.episode_iteration} , step gate{self.target_gate_id}") + if info['current_target_gate_id']==-1: + print("step all gates") + self.trigger=True + self.get_offset(info) + if info['at_goal_position']: + self.reward += 100 + # if (current_local_space<0).any(): + # reward -= 5 + + # cmdFullState + self.replay_buffer.add(self.last_all_state[0],self.last_all_state[1],self.last_action * 10 ,self.current_all_state[0],self.current_all_state[1],self.reward,done) + if self.episode_iteration % 900 ==0 and not test: + print(f"Step{self.episode_iteration} add buffer:\nlast_pos:{last_pos} aim vector: {last2goal_vector} ") + print(f"action_infer: {self.last_action * 10}\t lastDis-CurDis:{( last2goal_dis - cur2goal_dis )}") + print(f"last2cur_pos_vector: {last2cur_vector } \t reward: {self.reward}") + print(f"target_gate_id:{info['current_target_gate_id']} ; pos: {info['current_target_gate_pos']} ; distance : {cur2goal_dis}") + print("*************************************************************************************") + self.episode_reward+=self.reward + + # ready for next update + self.last_all_state=self.current_all_state + self.last_action=self.current_action + self.target_gate_id= info['current_target_gate_id'] + self.cur2goal_dis=cur2goal_dis + self.info=info + self.reward=0 + + + else : + pass + # change_Train_Num Better + if (self.interepisode_counter > self.begin_train_epo and not test) or ( Pretrain and self.interepisode_counter > 20) : + # self.policy.train(self.replay_buffer,batch_size=min(128,64*int(self.interepisode_counter/100+1)),train_nums=int(1)) + self.policy.train(self.replay_buffer,batch_size=self.batch_size,train_nums=int(1)) ######################### # REPLACE THIS (END) #### ######################### @@ -350,34 +508,23 @@ def interEpisodeLearn(self): """ self.interepisode_counter += 1 - + ######################### # REPLACE THIS (START) ## ######################### - _ = self.action_buffer - _ = self.obs_buffer - _ = self.reward_buffer - _ = self.done_buffer - _ = self.info_buffer + if self.interepisode_counter % 300 == 0 or self.interepisode_counter==1000: + self.policy.save(filename=f"{self.logger_plus.log_dir}/{self.interepisode_counter}") + + print(f"Episode Num: {self.interepisode_counter} step Num : {self.episode_iteration} ,Reward: {self.episode_reward:.3f} Cost: {self.episode_cost:.3f} violation: {self.violations_count:.3f} collision:{self.collisions_count:.3f} ,") + print(f"gates_passed:{self.info['current_target_gate_id']},at_goal_position : {self.info['at_goal_position']} task_completed: {self.info['task_completed']}") + self.logger_plus.update([self.episode_reward, self.episode_cost,self.collisions_count,self.violations_count,self.info['current_target_gate_id'],self.cur2goal_dis], total_steps=self.interepisode_counter) ######################### # REPLACE THIS (END) #### ######################### def reset(self): - """Initialize/reset data buffers and counters. - - Called once in __init__(). - - """ - # Data buffers. - self.action_buffer = deque([], maxlen=self.BUFFER_SIZE) - self.obs_buffer = deque([], maxlen=self.BUFFER_SIZE) - self.reward_buffer = deque([], maxlen=self.BUFFER_SIZE) - self.done_buffer = deque([], maxlen=self.BUFFER_SIZE) - self.info_buffer = deque([], maxlen=self.BUFFER_SIZE) - # Counters. self.interstep_counter = 0 self.interepisode_counter = 0 @@ -392,3 +539,19 @@ def interEpisodeReset(self): self.interstep_learning_time = 0 self.interstep_learning_occurrences = 0 self.interepisode_learning_time = 0 + self.episode_reward = 0 + self.episode_cost = 0 + self.collisions_count=self.violations_count=0 + self.episode_iteration=-1 + # env-based variable + self.target_gate_id=0 + self.m_slam.reset_occ_map() + self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)] + self.target_offset=np.array([0,0,0]) + self.cur2goal_dis=0 + self.info=None + self.trigger=False + self.rule_control_time=0 + self.reward=0 + diff --git a/competition/edit_this_base.py b/competition/edit_this_base.py new file mode 100644 index 000000000..d1c2af38a --- /dev/null +++ b/competition/edit_this_base.py @@ -0,0 +1,376 @@ +"""Write your control strategy. + +Then run: + + $ python3 getting_started.py --overrides ./getting_started.yaml + +Tips: + Search for strings `INSTRUCTIONS:` and `REPLACE THIS (START)` in this file. + + Change the code between the 4 blocks starting with + ######################### + # REPLACE THIS (START) ## + ######################### + and ending with + ######################### + # REPLACE THIS (END) #### + ######################### + with your own code. + + They are in methods: + 1) __init__ + 2) cmdFirmware + 3) interStepLearn (optional) + 4) interEpisodeLearn (optional) + +""" +import numpy as np + +from collections import deque + +try: + from competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory +except ImportError: + # Test import. + from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory + + +class Controller(): + """Template controller class. + + """ + + def __init__(self, + initial_obs, + initial_info, + use_firmware: bool = False, + buffer_size: int = 100, + verbose: bool = False + ): + """Initialization of the controller. + + INSTRUCTIONS: + The controller's constructor has access the initial state `initial_obs` and the a priori infromation + contained in dictionary `initial_info`. Use this method to initialize constants, counters, pre-plan + trajectories, etc. + + Args: + initial_obs (ndarray): The initial observation of the quadrotor's state + [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]. + initial_info (dict): The a priori information as a dictionary with keys + 'symbolic_model', 'nominal_physical_parameters', 'nominal_gates_pos_and_type', etc. + use_firmware (bool, optional): Choice between the on-board controll in `pycffirmware` + or simplified software-only alternative. + buffer_size (int, optional): Size of the data buffers used in method `learn()`. + verbose (bool, optional): Turn on and off additional printouts and plots. + + """ + # Save environment and conrol parameters. + self.CTRL_TIMESTEP = initial_info["ctrl_timestep"] + self.CTRL_FREQ = initial_info["ctrl_freq"] + self.initial_obs = initial_obs + self.VERBOSE = verbose + self.BUFFER_SIZE = buffer_size + + # Store a priori scenario information. + self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"] + self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] + + # Check for pycffirmware. + if use_firmware: + self.ctrl = None + else: + # Initialize a simple PID Controller ror debugging and test + # Do NOT use for the IROS 2022 competition. + self.ctrl = PIDController() + # Save additonal environment parameters. + self.KF = initial_info["quadrotor_kf"] + + # Reset counters and buffers. + self.reset() + self.interEpisodeReset() + + ######################### + # REPLACE THIS (START) ## + ######################### + + # Example: harcode waypoints through the gates. + if use_firmware: + waypoints = [(self.initial_obs[0], self.initial_obs[2], initial_info["gate_dimensions"]["tall"]["height"])] # Height is hardcoded scenario knowledge. + else: + waypoints = [(self.initial_obs[0], self.initial_obs[2], self.initial_obs[4])] + for idx, g in enumerate(self.NOMINAL_GATES): + height = initial_info["gate_dimensions"]["tall"]["height"] if g[6] == 0 else initial_info["gate_dimensions"]["low"]["height"] + if g[5] > 0.75 or g[5] < 0: + if idx == 2: # Hardcoded scenario knowledge (direction in which to take gate 2). + waypoints.append((g[0]+0.3, g[1]-0.3, height)) + waypoints.append((g[0]-0.3, g[1]-0.3, height)) + else: + waypoints.append((g[0]-0.3, g[1], height)) + waypoints.append((g[0]+0.3, g[1], height)) + else: + if idx == 3: # Hardcoded scenario knowledge (correct how to take gate 3). + waypoints.append((g[0]+0.1, g[1]-0.3, height)) + waypoints.append((g[0]+0.1, g[1]+0.3, height)) + else: + waypoints.append((g[0], g[1]-0.3, height)) + waypoints.append((g[0], g[1]+0.3, height)) + waypoints.append([initial_info["x_reference"][0], initial_info["x_reference"][2], initial_info["x_reference"][4]]) + + # Polynomial fit + self.waypoints = np.array(waypoints) + deg = 6 + t = np.arange(self.waypoints.shape[0]) + fx = np.poly1d(np.polyfit(t, self.waypoints[:,0], deg)) + fy = np.poly1d(np.polyfit(t, self.waypoints[:,1], deg)) + fz = np.poly1d(np.polyfit(t, self.waypoints[:,2], deg)) + duration = 15 + t_scaled = np.linspace(t[0], t[-1], int(duration*self.CTRL_FREQ)) + self.ref_x = fx(t_scaled) + self.ref_y = fy(t_scaled) + self.ref_z = fz(t_scaled) + + if self.VERBOSE: + # Plot trajectory in each dimension and 3D. + plot_trajectory(t_scaled, self.waypoints, self.ref_x, self.ref_y, self.ref_z) + + # Draw the trajectory on PyBullet's GUI + draw_trajectory(initial_info, self.waypoints, self.ref_x, self.ref_y, self.ref_z) + + ######################### + # REPLACE THIS (END) #### + ######################### + + def cmdFirmware(self, + time, + obs, + reward=None, + done=None, + info=None + ): + """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface. + + INSTRUCTIONS: + Re-implement this method to return the target position, velocity, acceleration, attitude, and attitude rates to be sent + from Crazyswarm to the Crazyflie using, e.g., a `cmdFullState` call. + + Args: + time (float): Episode's elapsed time, in seconds. + obs (ndarray): The quadrotor's Vicon data [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0]. + reward (float, optional): The reward signal. + done (bool, optional): Wether the episode has terminated. + info (dict, optional): Current step information as a dictionary with keys + 'constraint_violation', 'current_target_gate_pos', etc. + + Returns: + Command: selected type of command (takeOff, cmdFullState, etc., see Enum-like class `Command`). + List: arguments for the type of command (see comments in class `Command`) + + """ + if self.ctrl is not None: + raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.") + + iteration = int(time*self.CTRL_FREQ) + # print(iteration) + ######################### + # REPLACE THIS (START) ## + ######################### + + # Handwritten solution for GitHub's getting_stated scenario. + + if iteration == 0: + height = 1 + duration = 2 + + command_type = Command(2) # Take-off. + args = [height, duration] + + elif iteration >= 3*self.CTRL_FREQ and iteration < 20*self.CTRL_FREQ: + step = min(iteration-3*self.CTRL_FREQ, len(self.ref_x) -1) + target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]]) + target_vel = np.zeros(3) + target_acc = np.zeros(3) + target_yaw = 0. + target_rpy_rates = np.zeros(3) + + command_type = Command(1) # cmdFullState. + args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates] + + elif iteration == 20*self.CTRL_FREQ: + command_type = Command(6) # notify setpoint stop. + args = [] + + elif iteration == 20*self.CTRL_FREQ+1: + x = self.ref_x[-1] + y = self.ref_y[-1] + z = 1.5 + yaw = 0. + duration = 2.5 + + command_type = Command(5) # goTo. + args = [[x, y, z], yaw, duration, False] + + elif iteration == 23*self.CTRL_FREQ: + x = self.initial_obs[0] + y = self.initial_obs[2] + z = 1.5 + yaw = 0. + duration = 6 + + command_type = Command(5) # goTo. + args = [[x, y, z], yaw, duration, False] + + elif iteration == 30*self.CTRL_FREQ: + height = 0. + duration = 3 + + command_type = Command(3) # Land. + args = [height, duration] + + elif iteration == 33*self.CTRL_FREQ-1: + command_type = Command(-1) # Terminate command to be sent once trajectory is completed. + args = [] + + else: + command_type = Command(0) # None. + args = [] + + ######################### + # REPLACE THIS (END) #### + ######################### + + return command_type, args + + def cmdSimOnly(self, + time, + obs, + reward=None, + done=None, + info=None + ): + """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller. + + INSTRUCTIONS: + You do NOT need to re-implement this method for the IROS 2022 Safe Robot Learning competition. + Only re-implement this method when `use_firmware` == False to return the target position and velocity. + + Args: + time (float): Episode's elapsed time, in seconds. + obs (ndarray): The quadrotor's state [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]. + reward (float, optional): The reward signal. + done (bool, optional): Wether the episode has terminated. + info (dict, optional): Current step information as a dictionary with keys + 'constraint_violation', 'current_target_gate_pos', etc. + + Returns: + List: target position (len == 3). + List: target velocity (len == 3). + + """ + if self.ctrl is None: + raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.") + + iteration = int(time*self.CTRL_FREQ) + + ######################### + if iteration < len(self.ref_x): + target_p = np.array([self.ref_x[iteration], self.ref_y[iteration], self.ref_z[iteration]]) + else: + target_p = np.array([self.ref_x[-1], self.ref_y[-1], self.ref_z[-1]]) + target_v = np.zeros(3) + ######################### + + return target_p, target_v + + @timing_step + def interStepLearn(self, + action, + obs, + reward, + done, + info): + """Learning and controller updates called between control steps. + + INSTRUCTIONS: + Use the historically collected information in the five data buffers of actions, observations, + rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan. + + Args: + action (List): Most recent applied action. + obs (List): Most recent observation of the quadrotor state. + reward (float): Most recent reward. + done (bool): Most recent done flag. + info (dict): Most recent information dictionary. + + """ + self.interstep_counter += 1 + + # Store the last step's events. + self.action_buffer.append(action) + self.obs_buffer.append(obs) + self.reward_buffer.append(reward) + self.done_buffer.append(done) + self.info_buffer.append(info) + + ######################### + # REPLACE THIS (START) ## + ######################### + + pass + + ######################### + # REPLACE THIS (END) #### + ######################### + + @timing_ep + def interEpisodeLearn(self,file_name): + """Learning and controller updates called between episodes. + + INSTRUCTIONS: + Use the historically collected information in the five data buffers of actions, observations, + rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan. + + """ + self.interepisode_counter += 1 + + ######################### + # REPLACE THIS (START) ## + ######################### + + _ = self.action_buffer + _ = self.obs_buffer + _ = self.reward_buffer + _ = self.done_buffer + _ = self.info_buffer + + ######################### + # REPLACE THIS (END) #### + ######################### + + def reset(self): + """Initialize/reset data buffers and counters. + + Called once in __init__(). + + """ + # Data buffers. + self.action_buffer = deque([], maxlen=self.BUFFER_SIZE) + self.obs_buffer = deque([], maxlen=self.BUFFER_SIZE) + self.reward_buffer = deque([], maxlen=self.BUFFER_SIZE) + self.done_buffer = deque([], maxlen=self.BUFFER_SIZE) + self.info_buffer = deque([], maxlen=self.BUFFER_SIZE) + + # Counters. + self.interstep_counter = 0 + self.interepisode_counter = 0 + + def interEpisodeReset(self): + """Initialize/reset learning timing variables. + + Called between episodes in `getting_started.py`. + + """ + # Timing stats variables. + self.interstep_learning_time = 0 + self.interstep_learning_occurrences = 0 + self.interepisode_learning_time = 0 \ No newline at end of file diff --git a/competition/getting_started.py b/competition/getting_started.py index ae2ef1951..97e417b92 100644 --- a/competition/getting_started.py +++ b/competition/getting_started.py @@ -15,7 +15,8 @@ from functools import partial from rich.tree import Tree from rich import print - +import sys +sys.path.append("..") from safe_control_gym.utils.configuration import ConfigFactory from safe_control_gym.utils.registration import make from safe_control_gym.utils.utils import sync @@ -249,11 +250,11 @@ def run(test=False): # If an episode is complete, reset the environment. if done: # Plot logging (comment as desired). - if not test: - logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True) + # if not test: + # logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True) # CSV save. - logger.save_as_csv(comment="get_start-episode-"+str(episodes_count)) + # logger.save_as_csv(comment="get_start-episode-"+str(episodes_count)) # Update the controller internal state and models. ctrl.interEpisodeLearn() @@ -329,6 +330,7 @@ def run(test=False): )) # Print episodes summary. + tree = Tree("Summary") for idx, ep in enumerate(stats): ep_tree = tree.add('Episode ' + str(idx+1)) @@ -339,4 +341,4 @@ def run(test=False): print('\n\n') if __name__ == "__main__": - run() + run() \ No newline at end of file diff --git a/competition/getting_started_my.py b/competition/getting_started_my.py new file mode 100644 index 000000000..88c1d35ae --- /dev/null +++ b/competition/getting_started_my.py @@ -0,0 +1,345 @@ +"""Base script. + +Run as: + + $ python3 getting_started.py --overrides ./getting_started.yaml + +Look for instructions in `README.md` and `edit_this.py`. + +""" +import time +import inspect +import numpy as np +import pybullet as p + +from functools import partial +from rich.tree import Tree +from rich import print +import sys +sys.path.append("..") +from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.registration import make +from safe_control_gym.utils.utils import sync +from safe_control_gym.envs.gym_pybullet_drones.Logger import Logger + +try: + from competition_utils import Command, thrusts + from edit_this import Controller +except ImportError: + # Test import. + from .competition_utils import Command, thrusts + from .edit_this import Controller + +try: + import pycffirmware +except ImportError: + FIRMWARE_INSTALLED = False +else: + FIRMWARE_INSTALLED = True +finally: + print("Module 'cffirmware' available:", FIRMWARE_INSTALLED) + + +def run(test=False): + """The main function creating, running, and closing an environment over N episodes. + + """ + + # Start a timer. + START = time.time() + + # Load configuration. + CONFIG_FACTORY = ConfigFactory() + config = CONFIG_FACTORY.merge() + + # Testing (without pycffirmware). + if test: + config['use_firmware'] = False + config['verbose'] = False + config.quadrotor_config['ctrl_freq'] = 60 + config.quadrotor_config['pyb_freq'] = 240 + config.quadrotor_config['gui'] = False + + # Check firmware configuration. + if config.use_firmware and not FIRMWARE_INSTALLED: + raise RuntimeError("[ERROR] Module 'cffirmware' not installed.") + CTRL_FREQ = config.quadrotor_config['ctrl_freq'] + CTRL_DT = 1/CTRL_FREQ + + # Create environment. + if config.use_firmware: + FIRMWARE_FREQ = 500 + assert(config.quadrotor_config['pyb_freq'] % FIRMWARE_FREQ == 0), "pyb_freq must be a multiple of firmware freq" + # The env.step is called at a firmware_freq rate, but this is not as intuitive to the end user, and so + # we abstract the difference. This allows ctrl_freq to be the rate at which the user sends ctrl signals, + # not the firmware. + config.quadrotor_config['ctrl_freq'] = FIRMWARE_FREQ + env_func = partial(make, 'quadrotor', **config.quadrotor_config) + firmware_wrapper = make('firmware', + env_func, FIRMWARE_FREQ, CTRL_FREQ + ) + obs, info = firmware_wrapper.reset() + info['ctrl_timestep'] = CTRL_DT + info['ctrl_freq'] = CTRL_FREQ + env = firmware_wrapper.env + else: + env = make('quadrotor', **config.quadrotor_config) + # Reset the environment, obtain the initial observations and info dictionary. + obs, info = env.reset() + + # Create controller. + vicon_obs = [obs[0], 0, obs[2], 0, obs[4], 0, obs[6], obs[7], obs[8], 0, 0, 0] + # obs = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r}. + # vicon_obs = {x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0}. + ctrl = Controller(vicon_obs, info, config.use_firmware, verbose=config.verbose) + + # Create a logger and counters + logger = Logger(logging_freq_hz=CTRL_FREQ) + episodes_count = 1 + cumulative_reward = 0 + collisions_count = 0 + collided_objects = set() + violations_count = 0 + episode_start_iter = 0 + time_label_id = p.addUserDebugText("", textPosition=[0, 0, 1],physicsClientId=env.PYB_CLIENT) + num_of_gates = len(config.quadrotor_config.gates) + stats = [] + + # Wait for keyboard input to start. + # input("Press any key to start") + + # Initial printouts. + if config.verbose: + print('\tInitial observation [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0]: ' + str(obs)) + print('\tControl timestep: ' + str(info['ctrl_timestep'])) + print('\tControl frequency: ' + str(info['ctrl_freq'])) + print('\tMaximum episode duration: ' + str(info['episode_len_sec'])) + print('\tNominal quadrotor mass and inertia: ' + str(info['nominal_physical_parameters'])) + print('\tGates properties: ' + str(info['gate_dimensions'])) + print('\tObstacles properties: ' + str(info['obstacle_dimensions'])) + print('\tNominal gates positions [x, y, z, r, p, y, type]: ' + str(info['nominal_gates_pos_and_type'])) + print('\tNominal obstacles positions [x, y, z, r, p, y]: ' + str(info['nominal_obstacles_pos'])) + print('\tFinal target hover position [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]: ' + str(info['x_reference'])) + print('\tDistribution of the error on the initial state: ' + str(info['initial_state_randomization'])) + print('\tDistribution of the error on the inertial properties: ' + str(info['inertial_prop_randomization'])) + print('\tDistribution of the error on positions of gates and obstacles: ' + str(info['gates_and_obs_randomization'])) + print('\tDistribution of the disturbances: ' + str(info['disturbances'])) + print('\tA priori symbolic model:') + print('\t\tState: ' + str(info['symbolic_model'].x_sym).strip('vertcat')) + print('\t\tInput: ' + str(info['symbolic_model'].u_sym).strip('vertcat')) + print('\t\tDynamics: ' + str(info['symbolic_model'].x_dot).strip('vertcat')) + print('Input constraints lower bounds: ' + str(env.constraints.input_constraints[0].lower_bounds)) + print('Input constraints upper bounds: ' + str(env.constraints.input_constraints[0].upper_bounds)) + print('State constraints active dimensions: ' + str(config.quadrotor_config.constraints[1].active_dims)) + print('State constraints lower bounds: ' + str(env.constraints.state_constraints[0].lower_bounds)) + print('State constraints upper bounds: ' + str(env.constraints.state_constraints[0].upper_bounds)) + print('\tSymbolic constraints: ') + for fun in info['symbolic_constraints']: + print('\t' + str(inspect.getsource(fun)).strip('\n')) + + # Run an experiment. + ep_start = time.time() + first_ep_iteration = True + for i in range(config.num_episodes*CTRL_FREQ*env.EPISODE_LEN_SEC): + + # Step by keyboard input. + # _ = input("Press any key to continue") + + # Elapsed sim time. + curr_time = (i-episode_start_iter)*CTRL_DT + + # Print episode time in seconds on the GUI. + time_label_id = p.addUserDebugText("Ep. time: {:.2f}s".format(curr_time), + textPosition=[0, 0, 1.5], + textColorRGB=[1, 0, 0], + lifeTime=3*CTRL_DT, + textSize=1.5, + parentObjectUniqueId=0, + parentLinkIndex=-1, + replaceItemUniqueId=time_label_id, + physicsClientId=env.PYB_CLIENT) + + # Compute control input. + if config.use_firmware: + vicon_obs = [obs[0], 0, obs[2], 0, obs[4], 0, obs[6], obs[7], obs[8], 0, 0, 0] + # obs = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r}. + # vicon_obs = {x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0}. + if first_ep_iteration: + action = np.zeros(4) + reward = 0 + done = False + info = {} + first_ep_iteration = False + command_type, args = ctrl.cmdFirmware(curr_time, vicon_obs, reward, done, info) + + # Select interface. + if command_type == Command.FULLSTATE: + firmware_wrapper.sendFullStateCmd(*args, curr_time) + elif command_type == Command.TAKEOFF: + firmware_wrapper.sendTakeoffCmd(*args) + elif command_type == Command.LAND: + firmware_wrapper.sendLandCmd(*args) + elif command_type == Command.STOP: + firmware_wrapper.sendStopCmd() + elif command_type == Command.GOTO: + firmware_wrapper.sendGotoCmd(*args) + elif command_type == Command.NOTIFYSETPOINTSTOP: + firmware_wrapper.notifySetpointStop() + elif command_type == Command.NONE: + pass + else: + raise ValueError("[ERROR] Invalid command_type.") + + # Step the environment. + obs, reward, done, info, action = firmware_wrapper.step(curr_time, action) + else: + if first_ep_iteration: + reward = 0 + done = False + info = {} + first_ep_iteration = False + target_pos, target_vel = ctrl.cmdSimOnly(curr_time, obs, reward, done, info) + action = thrusts(ctrl.ctrl, ctrl.CTRL_TIMESTEP, ctrl.KF, obs, target_pos, target_vel) + obs, reward, done, info = env.step(action) + + # Update the controller internal state and models. + ctrl.interStepLearn(action, obs, reward, done, info) + + # Add up reward, collisions, violations. + cumulative_reward += reward + if info["collision"][1]: + collisions_count += 1 + collided_objects.add(info["collision"][0]) + if 'constraint_values' in info and info['constraint_violation'] == True: + violations_count += 1 + + # Printouts. + if config.verbose and i%int(CTRL_FREQ/2) == 0: + print('\n'+str(i)+'-th step.') + print('\tApplied action: ' + str(action)) + print('\tObservation: ' + str(obs)) + print('\tReward: ' + str(reward) + ' (Cumulative: ' + str(cumulative_reward) +')') + print('\tDone: ' + str(done)) + print('\tCurrent target gate ID: ' + str(info['current_target_gate_id'])) + print('\tCurrent target gate type: ' + str(info['current_target_gate_type'])) + print('\tCurrent target gate in range: ' + str(info['current_target_gate_in_range'])) + print('\tCurrent target gate position: ' + str(info['current_target_gate_pos'])) + print('\tAt goal position: ' + str(info['at_goal_position'])) + print('\tTask completed: ' + str(info['task_completed'])) + if 'constraint_values' in info: + print('\tConstraints evaluations: ' + str(info['constraint_values'])) + print('\tConstraints violation: ' + str(bool(info['constraint_violation']))) + print('\tCollision: ' + str(info["collision"])) + print('\tTotal collisions: ' + str(collisions_count)) + print('\tCollided objects (history): ' + str(collided_objects)) + + # Log data. + pos = [obs[0],obs[2],obs[4]] + rpy = [obs[6],obs[7],obs[8]] + vel = [obs[1],obs[3],obs[5]] + bf_rates = [obs[9],obs[10],obs[11]] + # logger.log(drone=0, + # timestamp=i/CTRL_FREQ, + # state=np.hstack([pos, np.zeros(4), rpy, vel, bf_rates, np.sqrt(action/env.KF)]) + # ) + + # Synchronize the GUI. + if config.quadrotor_config.gui: + sync(i-episode_start_iter, ep_start, CTRL_DT) + + # If an episode is complete, reset the environment. + if done: + + # Plot logging (comment as desired). + # if not test: + # logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True) + + # # CSV save. + # logger.save_as_csv(comment="get_start-episode-"+str(episodes_count)) + + # Update the controller internal state and models. + ctrl.interEpisodeLearn() + + # Append episode stats. + # if info['current_target_gate_id'] == -1: + # gates_passed = num_of_gates + # else: + # gates_passed = info['current_target_gate_id'] + # if config.quadrotor_config.done_on_collision and info["collision"][1]: + # termination = 'COLLISION' + # elif config.quadrotor_config.done_on_completion and info['task_completed']: + # termination = 'TASK COMPLETION' + # elif config.quadrotor_config.done_on_violation and info['constraint_violation']: + # termination = 'CONSTRAINT VIOLATION' + # else: + # termination = 'MAX EPISODE DURATION' + # if ctrl.interstep_learning_occurrences != 0: + # interstep_learning_avg = ctrl.interstep_learning_time/ctrl.interstep_learning_occurrences + # else: + # interstep_learning_avg = ctrl.interstep_learning_time + # episode_stats = [ + # '[yellow]Flight time (s): '+str(curr_time), + # '[yellow]Reason for termination: '+termination, + # '[green]Gates passed: '+str(gates_passed), + # '[green]Total reward: '+str(cumulative_reward), + # '[red]Number of collisions: '+str(collisions_count), + # '[red]Number of constraint violations: '+str(violations_count), + # '[white]Total and average interstep learning time (s): '+str(ctrl.interstep_learning_time)+', '+str(interstep_learning_avg), + # '[white]Interepisode learning time (s): '+str(ctrl.interepisode_learning_time), + # ] + # stats.append(episode_stats) + + # Create a new logger. + # logger = Logger(logging_freq_hz=CTRL_FREQ) + + + # Reset/update counters. + episodes_count += 1 + if episodes_count > config.num_episodes: + break + cumulative_reward = 0 + collisions_count = 0 + collided_objects = set() + violations_count = 0 + ctrl.interEpisodeReset() + + # Reset the environment. + if config.use_firmware: + # Re-initialize firmware. + new_initial_obs, _ = firmware_wrapper.reset() + else: + new_initial_obs, _ = env.reset() + first_ep_iteration = True + + if config.verbose: + print(str(episodes_count)+'-th reset.') + print('Reset obs' + str(new_initial_obs)) + + episode_start_iter = i+1 + ep_start = time.time() + + # Close the environment and print timing statistics. + env.close() + elapsed_sec = time.time() - START + print(str("\n{:d} iterations (@{:d}Hz) and {:d} episodes in {:.2f} sec, i.e. {:.2f} steps/sec for a {:.2f}x speedup.\n" + .format(i, + env.CTRL_FREQ, + config.num_episodes, + elapsed_sec, + i/elapsed_sec, + (i*CTRL_DT)/elapsed_sec + ) + )) + + # Print episodes summary. + tree = Tree("Summary") + for idx, ep in enumerate(stats): + ep_tree = tree.add('Episode ' + str(idx+1)) + for val in ep: + ep_tree.add(val) + print('\n\n') + print(tree) + print('\n\n') + +if __name__ == "__main__": + run() \ No newline at end of file diff --git a/competition/level0.yaml b/competition/level0.yaml index d2e910690..d536a2154 100644 --- a/competition/level0.yaml +++ b/competition/level0.yaml @@ -5,9 +5,9 @@ # | `level0.yaml` | **Yes** | *No* | *No* | *No* | Perfect knowledge | # Configuration -num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) +num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes) -verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts +verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z # Thus, only scenarios that meet these constraints are suitable for sim2real experiments @@ -23,9 +23,9 @@ quadrotor_config: pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq episode_len_sec: 33 # Maximum episode duration, in seconds - done_on_violation: True # Whether `done` becomes True when a constraint is violated + done_on_violation: False # Whether `done` becomes True when a constraint is violated done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal) - done_on_collision: True # Whether `done` becomes True when a collision happens + done_on_collision: False # Whether `done` becomes True when a collision happens cost: competition # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward() # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -50,14 +50,14 @@ quadrotor_config: init_r: 0 # Nominal (pre-randomization) positions of gates and obstacles - gates: + gates: # high = 1 , low=0.525 yaw : 0 means 0°,1.57 means90°. edge is 0.45 [ # x, y, z, r, p, y, type (0: `tall` obstacle, 1: `low` obstacle) [0.5, -2.5, 0, 0, 0, -1.57, 0], [2, -1.5, 0, 0, 0, 0, 1], [0, 0.2, 0, 0, 0, 1.57, 1], [-0.5, 1.5, 0, 0, 0, 0, 0] ] - obstacles: + obstacles: # z default = 1.15 [ # x, y, z, r, p, y [1.5, -2.5, 0, 0, 0, 0], [0.5, -1, 0, 0, 0, 0], diff --git a/competition/level1.yaml b/competition/level1.yaml index 30fb937c8..ad44d6ba3 100644 --- a/competition/level1.yaml +++ b/competition/level1.yaml @@ -5,9 +5,9 @@ # | `level1.yaml` | **Yes** | **Yes** | *No* | *No** | Adaptive | # Configuration -num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) +num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes) -verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts +verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z # Thus, only scenarios that meet these constraints are suitable for sim2real experiments @@ -23,9 +23,9 @@ quadrotor_config: pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq episode_len_sec: 33 # Maximum episode duration, in seconds - done_on_violation: True # Whether `done` becomes True when a constraint is violated + done_on_violation: False # Whether `done` becomes True when a constraint is violated done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal) - done_on_collision: True # Whether `done` becomes True when a collision happens + done_on_collision: False # Whether `done` becomes True when a collision happens cost: competition # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward() # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py diff --git a/competition/level2.yaml b/competition/level2.yaml index c03d78447..8ed7375f6 100644 --- a/competition/level2.yaml +++ b/competition/level2.yaml @@ -5,9 +5,9 @@ # | `level2.yaml` | **Yes** | **Yes** | **Yes** | *No* | Learning, re-planning | # Configuration -num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) +num_episodes: 2000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes) -verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts +verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z # Thus, only scenarios that meet these constraints are suitable for sim2real experiments @@ -23,9 +23,9 @@ quadrotor_config: pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq episode_len_sec: 33 # Maximum episode duration, in seconds - done_on_violation: True # Whether `done` becomes True when a constraint is violated + done_on_violation: False # Whether `done` becomes True when a constraint is violated done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal) - done_on_collision: True # Whether `done` becomes True when a collision happens + done_on_collision: False # Whether `done` becomes True when a collision happens cost: competition # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward() # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py diff --git a/competition/level3.yaml b/competition/level3.yaml index f897113eb..6c4579e26 100644 --- a/competition/level3.yaml +++ b/competition/level3.yaml @@ -5,9 +5,9 @@ # | `level3.yaml` | **Yes** | **Yes** | **Yes** | **Yes** | Robustness | # Configuration -num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) +num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores) use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes) -verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts +verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z # Thus, only scenarios that meet these constraints are suitable for sim2real experiments @@ -23,9 +23,9 @@ quadrotor_config: pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq episode_len_sec: 33 # Maximum episode duration, in seconds - done_on_violation: True # Whether `done` becomes True when a constraint is violated + done_on_violation: False # Whether `done` becomes True when a constraint is violated done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal) - done_on_collision: True # Whether `done` becomes True when a collision happens + done_on_collision: False # Whether `done` becomes True when a collision happens cost: competition # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward() # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py diff --git a/competition/safetyplusplus_folder/log_utils.py b/competition/safetyplusplus_folder/log_utils.py new file mode 100755 index 000000000..c62be32fa --- /dev/null +++ b/competition/safetyplusplus_folder/log_utils.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = 'MICROYU' + +import json + +color2num = dict( + gray=30, + red=31, + green=32, + yellow=33, + blue=34, + magenta=35, + cyan=36, + white=37, + crimson=38 +) + +def colorize(string, color, bold=False, highlight=False): + """ + Colorize a string. + + This function was originally written by John Schulman. + """ + attr = [] + num = color2num[color] + if highlight: num += 10 + attr.append(str(num)) + if bold: attr.append('1') + return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) + +def is_json_serializable(v): + try: + json.dumps(v) + return True + except: + return False + +def convert_json(obj): + """ Convert obj to a version which can be serialized with JSON. """ + if is_json_serializable(obj): + return obj + else: + if isinstance(obj, dict): + return {convert_json(k): convert_json(v) + for k,v in obj.items()} + + if isinstance(obj, tuple): + return (convert_json(x) for x in obj) + + elif isinstance(obj, list): + return [convert_json(x) for x in obj] + + elif hasattr(obj,'__name__') and not('lambda' in obj.__name__): + return convert_json(obj.__name__) + + elif hasattr(obj,'__dict__') and obj.__dict__: + obj_dict = {convert_json(k): convert_json(v) + for k,v in obj.__dict__.items() if not str(k).startswith('_')} + return {'__dict__': obj_dict} + + return str(obj) + +def save_config(exp_name, config, log_dir): + config_json = convert_json(config) + if exp_name is not None: + config_json['exp_name'] = exp_name + output = json.dumps(config_json, separators=(',',':\t'), indent=4, sort_keys=True) + print(colorize('Saving config:', color='cyan', bold=True)) + print(colorize(output, color='cyan', bold=True)) + with open(log_dir + "/config.json", 'w') as out: + out.write(output) \ No newline at end of file diff --git a/competition/safetyplusplus_folder/networks.py b/competition/safetyplusplus_folder/networks.py new file mode 100755 index 000000000..bdd05d290 --- /dev/null +++ b/competition/safetyplusplus_folder/networks.py @@ -0,0 +1,166 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +num=256 +class Encoder(nn.Module): + def __init__(self, in_channels): + super(Encoder, self).__init__() + # (5,23,23) + self.conv1=nn.Conv2d(in_channels,16,3) # + self.conv2=nn.Conv2d(16,32,3) # + def forward(self,x): + # import pdb;pdb.set_trace() + in_size = x.size(0) + out = self.conv1(x) + out = F.relu(out) + out = F.max_pool2d(out, 2, 2) # (16,10,10) + out = self.conv2(out) + out = F.relu(out) + out = F.max_pool2d(out,2,2) + out = torch.mean(out,dim=1,keepdim=True) + out = out.view(in_size,-1) # 扁平化flat然后传入全连接层 + return out + + +class Actor(nn.Module): + def __init__(self, state_dim,in_channels,out_channels, action_dim, max_action): + super(Actor, self).__init__() + + self.conv1=nn.Conv2d(in_channels,16,3) # + self.conv2=nn.Conv2d(16,32,3) # + + self.l1 = nn.Linear(state_dim+out_channels, num) + self.l2 = nn.Linear(num, num) + self.l3 = nn.Linear(num, action_dim) + + self.max_action = max_action + + + def forward(self, all_state): + global_state=all_state[0] + local_state=all_state[1] + + + in_size = local_state.size(0) + out_local = self.conv1(local_state) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local, 2, 2) + out_local = self.conv2(out_local) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local,2,2) + out_local = torch.mean(out_local,dim=1,keepdim=True) + out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层 + + all_feature=torch.concat([global_state,out_local],1) + + out = F.relu(self.l1(all_feature)) + out = F.relu(self.l2(out)) + return self.max_action * torch.tanh(self.l3(out)) + + +class C_Critic(nn.Module): + def __init__(self, state_dim, action_dim): + super(C_Critic, self).__init__() + + self.l1 = nn.Linear(state_dim + action_dim, num) + self.l2 = nn.Linear(num, num) + self.l3 = nn.Linear(num, 1) + + + def forward(self, state, action): + q = F.relu(self.l1(torch.cat([state, action], 1))) + q = F.relu(self.l2(q)) + return self.l3(q) + + +class Critic(nn.Module): + def __init__(self, state_dim,in_channels,out_channels, action_dim): + super(Critic, self).__init__() + + # CNN + self.conv1=nn.Conv2d(in_channels,16,3) # + self.conv2=nn.Conv2d(16,32,3) # + + # Q1 architecture + self.l1 = nn.Linear(state_dim + action_dim + out_channels, num) + self.l2 = nn.Linear(num, num) + self.l3 = nn.Linear(num, 1) + + # Q2 architecture + self.l4 = nn.Linear(state_dim + action_dim + out_channels , num) + self.l5 = nn.Linear(num, num) + self.l6 = nn.Linear(num, 1) + + + def forward(self, all_state, action): + global_state=all_state[0] + local_state=all_state[1] + in_size = local_state.size(0) + out_local = self.conv1(local_state) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local, 2, 2) # (16,10,10) + out_local = self.conv2(out_local) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local,2,2) + out_local = torch.mean(out_local,dim=1,keepdim=True) + out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层 + # import pdb;pdb.set_trace() + all_feature=torch.concat([global_state,out_local],1) + + sa = torch.cat([all_feature, action], 1) + + q1 = F.relu(self.l1(sa)) + q1 = F.relu(self.l2(q1)) + q1 = self.l3(q1) + + q2 = F.relu(self.l4(sa)) + q2 = F.relu(self.l5(q2)) + q2 = self.l6(q2) + return q1, q2 + + + def Q1(self, all_state, action): + global_state=all_state[0] + local_state=all_state[1] + in_size = local_state.size(0) + out_local = self.conv1(local_state) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local, 2, 2) # (16,10,10) + out_local = self.conv2(out_local) + out_local = F.relu(out_local) + out_local = F.max_pool2d(out_local,2,2) + out_local = torch.mean(out_local,dim=1,keepdim=True) + out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层 + # import pdb;pdb.set_trace() + all_feature=torch.concat([global_state,out_local],1) + + sa = torch.cat([all_feature, action], 1) + + q1 = F.relu(self.l1(sa)) + q1 = F.relu(self.l2(q1)) + q1 = self.l3(q1) + return q1 + + +# predict state-wise \lamda(s) +class MultiplerNet(nn.Module): + def __init__(self, state_dim): + super(MultiplerNet, self).__init__() + + self.l1 = nn.Linear(state_dim, num) + self.l2 = nn.Linear(num, num) + self.l3 = nn.Linear(num, 1) + + + def forward(self, state): + a = F.relu(self.l1(state)) + a = F.relu(self.l2(a)) + #return F.relu(self.l3(a)) + return F.softplus(self.l3(a)) # lagrangian multipliers can not be negative + +# import numpy as np +# encoder=Encoder(5) +# a=np.zeros([1,5,23,23]) +# encoder=encoder.float() +# out=encoder(torch.tensor(a).float()) diff --git a/competition/safetyplusplus_folder/plus_logger.py b/competition/safetyplusplus_folder/plus_logger.py new file mode 100755 index 000000000..cd8c99444 --- /dev/null +++ b/competition/safetyplusplus_folder/plus_logger.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +__author__ = 'Linrui Zhang' + +import csv +import os +import json, time +import numpy as np +from safetyplusplus_folder import log_utils as lu + +class Logger(): + def __init__(self, log_dir="./logs", exp_name=None, env_name=None, seed=0, config=None, filename='evaluator.csv', debug=False): + self.exp_name = exp_name + self.env_name = env_name + self.seed = seed + self.previous_log_time = time.time() + self.start_log_time = self.previous_log_time + self.debug = debug + + if debug: + print(lu.colorize(f"\nDebug mode is activate !!!\nLog will NOT be saved !!!\n", 'red', bold=True)) + else: + num_exps = 0 + self.log_dir = f"./{log_dir}/{exp_name.replace('-', '_')}_{env_name.replace('-', '_')}-seed{seed}" + while True: + if os.path.exists(f"{self.log_dir}-{str(num_exps)}/"): + num_exps += 1 + else: + self.log_dir += f"-{str(num_exps)}/" + os.makedirs(self.log_dir) + break + self.csv_file = open(self.log_dir + '/' + filename, 'w', encoding='utf8') + # header={"t_start": time.time(), 'env_id' : env_name, 'exp_name': exp_name, 'seed': seed} + # header = '# {} \n'.format(json.dumps(header)) + # self.csv_file.write(header) + self.logger = csv.DictWriter(self.csv_file, fieldnames=('mean_score', 'total_steps', 'std_score', 'max_score', 'min_score')) + self.logger.writeheader() + self.csv_file.flush() + + if config != None: + lu.save_config(exp_name, config, self.log_dir) + + + def update(self, score, total_steps): + ''' + Score is a list + ''' + current_log_time = time.time() + avg_score = np.mean(score) + std_score = np.std(score) + max_score = np.max(score) + min_score = np.min(score) + + print(lu.colorize(f"\nTime: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Time spent from previous logger: {(current_log_time - self.previous_log_time):.3f} s", 'yellow', bold=True)) + print(lu.colorize(f"Evaluation over {len(score)} episodes after {total_steps}:", 'yellow', bold=True)) + print(lu.colorize(f"Avg: {avg_score:.3f} Std: {std_score:.3f} Max: {max_score:.3f} Min: {min_score:.3f}\n", 'yellow', bold=True)) + self.previous_log_time = current_log_time + + if not self.debug: + epinfo = {"mean_score": avg_score, "total_steps": total_steps, "std_score": std_score, "max_score": max_score, "min_score": min_score} + self.logger.writerow(epinfo) + self.csv_file.flush() + + +class SafeLogger(): + def __init__(self, log_dir="./logs", exp_name=None, env_name=None, seed=0,config=None, filename="logger.csv", fieldnames=[], debug=False): + self.exp_name = exp_name + self.env_name = env_name + self.seed = seed + self.previous_log_time = time.time() + self.start_log_time = self.previous_log_time + self.debug = debug + self.fieldnames = ["total_steps"] + fieldnames + + if debug: + print(lu.colorize(f"\nDebug mode is activate !!!\nLog will NOT be saved !!!\n", 'red', bold=True)) + else: + num_exps = 0 + self.log_dir = f"./{log_dir}/{exp_name.replace('-', '_')}_{env_name.replace('-', '_')}-seed{seed}" + while True: + if os.path.exists(f"{self.log_dir}-{str(num_exps)}/"): + num_exps += 1 + else: + self.log_dir += f"-{str(num_exps)}/" + os.makedirs(self.log_dir) + break + self.csv_file = open(self.log_dir + '/' + filename, 'w', encoding='utf8') + # header={"t_start": time.time(), 'env_id' : env_name, 'exp_name': exp_name, 'seed': seed} + # header = '# {} \n'.format(json.dumps(header)) + # self.csv_file.write(header) + self.logger = csv.DictWriter(self.csv_file, fieldnames=(self.fieldnames)) + self.logger.writeheader() + self.csv_file.flush() + + if config != None: + lu.save_config(exp_name, config, self.log_dir) + + + def update(self, fieldvalues, total_steps): + epinfo = {} + current_log_time = time.time() + fieldvalues = [total_steps] + [int(current_log_time - self.previous_log_time)]+ fieldvalues + print(lu.colorize(f"\nTime: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Time spent from previous logger: {(current_log_time - self.previous_log_time):.3f} s", 'blue', bold=True)) + print(lu.colorize(f"CustomLogger with fileds: {self.fieldnames}", 'blue', bold=True)) + print(lu.colorize(f"fieldvalues: {fieldvalues}\n", 'blue', bold=True)) + self.previous_log_time = current_log_time + + if not self.debug: + for filedname, filedvalue in zip(self.fieldnames, fieldvalues): + epinfo.update({filedname: filedvalue}) + self.logger.writerow(epinfo) + self.csv_file.flush() + \ No newline at end of file diff --git a/competition/safetyplusplus_folder/replay_buffer.py b/competition/safetyplusplus_folder/replay_buffer.py new file mode 100755 index 000000000..59be13c59 --- /dev/null +++ b/competition/safetyplusplus_folder/replay_buffer.py @@ -0,0 +1,311 @@ +import numpy as np +import torch + + +class SimpleReplayBufferIros(object): + def __init__(self, state_dim, action_dim, max_size=int(1e6)): + self.max_size = max_size + self.phrase=4 + self.one_phrase_max_size= int(max_size/self.phrase) + self.ptr = [0,0,0,0] + + self.size = [0,0,0,0] + + self.state = np.zeros(( self.phrase,self.one_phrase_max_size, state_dim)) + self.action = np.zeros(( self.phrase,self.one_phrase_max_size, action_dim)) + self.next_state = np.zeros(( self.phrase,self.one_phrase_max_size, state_dim)) + self.reward = np.zeros(( self.phrase,self.one_phrase_max_size, 1)) + self.not_done = np.zeros(( self.phrase,self.one_phrase_max_size, 1)) + + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + + def add(self,phrase, state, action, next_state, reward, done): + ptr=self.ptr[phrase] + self.state[phrase][ptr] = state + self.action[phrase][ptr] = action + self.next_state[phrase][ptr] = next_state + self.reward[phrase][ptr] = reward + self.not_done[phrase][ptr] = 1. - done + + self.ptr[phrase] = (self.ptr[phrase] + 1) % self.one_phrase_max_size + self.size[phrase] = min(self.size[phrase] + 1, self.one_phrase_max_size) + + + def sample(self, batch_size): + # inverse sample , give the end phrase the top priotity + size_3= 64 if self.size[3] > batch_size/4 else self.size[3] + size_2= 64 if self.size[2] > batch_size/4 else self.size[2] + size_1= 64 if self.size[1] > batch_size/4 else self.size[1] + size_0= batch_size -size_1 - size_2-size_3 + + ind0 = np.random.randint(0, self.size[0], size=size_0) + ind1 = np.random.randint(0, self.size[1], size=size_1) + ind2 = np.random.randint(0, self.size[2], size=size_2) + ind3 = np.random.randint(0, self.size[3], size=size_3) + + if size_0 !=0: + state=self.state[0][ind0] + action=self.action[0][ind0] + next_state=self.next_state[0][ind0] + reward=self.reward[0][ind0] + not_done=self.not_done[0][ind0] + if size_1!=0: + state=np.vstack((state,self.state[1][ind1])) + action=np.vstack((action,self.action[1][ind1])) + next_state=np.vstack((next_state,self.next_state[1][ind1])) + reward=np.vstack((reward,self.reward[1][ind1])) + not_done=np.vstack((not_done,self.not_done[1][ind1])) + if size_2!=0: + state=np.vstack((state,self.state[2][ind2])) + action=np.vstack((action,self.action[2][ind2])) + next_state=np.vstack((next_state,self.next_state[2][ind2])) + reward=np.vstack((reward,self.reward[2][ind2])) + not_done=np.vstack((not_done,self.not_done[2][ind2])) + if size_3!=0: + state=np.vstack((state,self.state[3][ind3])) + action=np.vstack((action,self.action[3][ind3])) + next_state=np.vstack((next_state,self.next_state[3][ind3])) + reward=np.vstack((reward,self.reward[3][ind3])) + not_done=np.vstack((not_done,self.not_done[3][ind3])) + return ( + torch.FloatTensor(state).to(self.device), + torch.FloatTensor(action).to(self.device), + torch.FloatTensor(next_state).to(self.device), + torch.FloatTensor(reward).to(self.device), + torch.FloatTensor(not_done).to(self.device), + ) + +class SimpleReplayBuffer(object): + def __init__(self, state_dim, action_dim, max_size=int(1e6)): + self.max_size = max_size + self.ptr = 0 + self.size = 0 + + self.state = np.zeros((max_size, state_dim)) + self.action = np.zeros((max_size, action_dim)) + self.next_state = np.zeros((max_size, state_dim)) + self.reward = np.zeros((max_size, 1)) + self.not_done = np.zeros((max_size, 1)) + + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + + def add(self, state, action, next_state, reward, done): + self.state[self.ptr] = state + self.action[self.ptr] = action + self.next_state[self.ptr] = next_state + self.reward[self.ptr] = reward + self.not_done[self.ptr] = 1. - done + + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + + def sample(self, batch_size): + ind = np.random.randint(0, self.size, size=batch_size) + return ( + torch.FloatTensor(self.state[ind]).to(self.device), + torch.FloatTensor(self.action[ind]).to(self.device), + torch.FloatTensor(self.next_state[ind]).to(self.device), + torch.FloatTensor(self.reward[ind]).to(self.device), + torch.FloatTensor(self.not_done[ind]).to(self.device), + ) + +class IrosReplayBuffer(object): + def __init__(self, global_state_dim,local_state_shape ,action_dim, max_size=int(1e5),load=True): + self.max_size = max_size + + z=local_state_shape[0] + x=local_state_shape[1] + y=local_state_shape[2] + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.global_state = torch.zeros((max_size, global_state_dim)).to(self.device) + self.local_state = torch.zeros((max_size, z,x,y)).to(self.device) + self.action = torch.zeros((max_size, action_dim)).to(self.device) + self.next_global_state = torch.zeros((max_size, global_state_dim)).to(self.device) + self.next_local_state = torch.zeros((max_size, z,x,y)).to(self.device) + self.reward = torch.zeros((max_size, 1)).to(self.device) + self.not_done = torch.zeros((max_size, 1)).to(self.device) + self.load=load + self.pretrain_size=int(1e4) + if self.load : + self.load_buffer() + self.ptr = self.pretrain_size + self.size = self.pretrain_size + else: + self.ptr = 0 + self.size = 0 + + def add(self, global_state,local_state, action, next_global_state,next_local_state, reward, done): + + self.global_state[self.ptr] = torch.FloatTensor(global_state) + self.local_state[self.ptr] = torch.FloatTensor(local_state) + self.action[self.ptr] = torch.FloatTensor(action) + self.next_global_state[self.ptr] = torch.FloatTensor(next_global_state) + self.next_local_state[self.ptr] = torch.FloatTensor(next_local_state) + self.reward[self.ptr] = torch.FloatTensor(np.array([reward])) + self.not_done[self.ptr] = torch.FloatTensor(np.array([1. - done])) + if self.load: + self.ptr = (self.ptr + 1) if self.ptr + 1 !=self.max_size else self.pretrain_size + else: + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + + def sample(self, batch_size): + ind = np.random.randint(0, self.size, size=batch_size) + # import pdb;pdb.set_trace() + # print(ind) + return (self.global_state[ind],self.local_state[ind],self.action[ind],self.next_global_state[ind],self.next_local_state[ind],self.reward[ind],self.not_done[ind]) + + def write(self): + np.save("buffer_npy/global_state.npy",np.array(self.global_state[0:self.pretrain_size].cpu())) + np.save("buffer_npy/local_state.npy",np.array(self.local_state[0:self.pretrain_size].cpu())) + np.save("buffer_npy/action.npy",np.array(self.action[0:self.pretrain_size].cpu())) + np.save("buffer_npy/next_global_state.npy",np.array(self.next_global_state[0:self.pretrain_size].cpu())) + np.save("buffer_npy/next_local_state.npy",np.array(self.next_local_state[0:self.pretrain_size].cpu())) + np.save("buffer_npy/reward.npy",np.array(self.reward[0:self.pretrain_size].cpu())) + np.save("buffer_npy/not_done.npy",np.array(self.not_done[0:self.pretrain_size].cpu())) + + def load_buffer(self): + expert_global_state=torch.FloatTensor(np.load("buffer_npy/global_state.npy")) + expert_local_state=torch.FloatTensor(np.load("buffer_npy/local_state.npy")) + expert_action=torch.FloatTensor(np.load("buffer_npy/action.npy")) + expert_next_global_state=torch.FloatTensor(np.load("buffer_npy/next_global_state.npy")) + expert_next_local_state=torch.FloatTensor(np.load("buffer_npy/next_local_state.npy")) + expert_reward=torch.FloatTensor(np.load("buffer_npy/reward.npy")) + expert_not_done=torch.FloatTensor(np.load("buffer_npy/not_done.npy")) + self.global_state[0:self.pretrain_size] = expert_global_state + self.local_state[0:self.pretrain_size] = expert_local_state + self.action[0:self.pretrain_size] = expert_action + self.next_global_state[0:self.pretrain_size] = expert_next_global_state + self.next_local_state[0:self.pretrain_size] = expert_next_local_state + self.reward[0:self.pretrain_size] = expert_reward + self.not_done[0:self.pretrain_size] = expert_not_done + print("load ok") + + +class CostReplayBuffer(object): + def __init__(self, state_dim, action_dim, max_size=int(1e6)): + self.max_size = max_size + self.ptr = 0 + self.size = 0 + + self.state = np.zeros((max_size, state_dim)) + self.action = np.zeros((max_size, action_dim)) + self.next_state = np.zeros((max_size, state_dim)) + self.reward = np.zeros((max_size, 1)) + self.cost = np.zeros((max_size, 1)) + self.not_done = np.zeros((max_size, 1)) + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + + def add(self, state, action, next_state, reward, cost, done): + self.state[self.ptr] = state + self.action[self.ptr] = action + self.next_state[self.ptr] = next_state + self.reward[self.ptr] = reward + self.cost[self.ptr] = cost + self.not_done[self.ptr] = 1. - done + + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + + def sample(self, batch_size): + ind = np.random.randint(0, self.size, size=batch_size) + return ( + torch.FloatTensor(self.state[ind]).to(self.device), + torch.FloatTensor(self.action[ind]).to(self.device), + torch.FloatTensor(self.next_state[ind]).to(self.device), + torch.FloatTensor(self.reward[ind]).to(self.device), + torch.FloatTensor(self.cost[ind]).to(self.device), + torch.FloatTensor(self.not_done[ind]).to(self.device), + ) + +class RecReplayBuffer(object): + def __init__(self, state_dim, action_dim, max_size=int(1e6)): + self.max_size = max_size + self.ptr = 0 + self.size = 0 + + self.state = np.zeros((max_size, state_dim)) + self.raw_action = np.zeros((max_size, action_dim)) + self.rec_action = np.zeros((max_size, action_dim)) + self.next_state = np.zeros((max_size, state_dim)) + self.reward = np.zeros((max_size, 1)) + self.cost = np.zeros((max_size, 1)) + self.not_done = np.zeros((max_size, 1)) + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + + def add(self, state, raw_action, rec_action, next_state, reward, cost, done): + self.state[self.ptr] = state + self.raw_action[self.ptr] = raw_action + self.rec_action[self.ptr] = rec_action + self.next_state[self.ptr] = next_state + self.reward[self.ptr] = reward + self.cost[self.ptr] = cost + self.not_done[self.ptr] = 1. - done + + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + + def sample(self, batch_size): + ind = np.random.randint(0, self.size, size=batch_size) + + return ( + torch.FloatTensor(self.state[ind]).to(self.device), + torch.FloatTensor(self.raw_action[ind]).to(self.device), + torch.FloatTensor(self.rec_action[ind]).to(self.device), + torch.FloatTensor(self.next_state[ind]).to(self.device), + torch.FloatTensor(self.reward[ind]).to(self.device), + torch.FloatTensor(self.cost[ind]).to(self.device), + torch.FloatTensor(self.not_done[ind]).to(self.device), + ) + +class SafetyLayerReplayBuffer(object): + def __init__(self, state_dim, action_dim, max_size=int(1e6)): + self.max_size = max_size + self.ptr = 0 + self.size = 0 + + self.state = np.zeros((max_size, state_dim)) + self.action = np.zeros((max_size, action_dim)) + self.next_state = np.zeros((max_size, state_dim)) + self.reward = np.zeros((max_size, 1)) + self.cost = np.zeros((max_size, 1)) + self.prev_cost = np.zeros((max_size, 1)) + self.not_done = np.zeros((max_size, 1)) + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + + def add(self, state, action, next_state, reward, cost, prev_cost,done): + self.state[self.ptr] = state + self.action[self.ptr] = action + self.next_state[self.ptr] = next_state + self.reward[self.ptr] = reward + self.cost[self.ptr] = cost + self.prev_cost[self.ptr] = prev_cost + self.not_done[self.ptr] = 1. - done + + self.ptr = (self.ptr + 1) % self.max_size + self.size = min(self.size + 1, self.max_size) + + + def sample(self, batch_size): + ind = np.random.randint(0, self.size, size=batch_size) + return ( + torch.FloatTensor(self.state[ind]).to(self.device), + torch.FloatTensor(self.action[ind]).to(self.device), + torch.FloatTensor(self.next_state[ind]).to(self.device), + torch.FloatTensor(self.reward[ind]).to(self.device), + torch.FloatTensor(self.cost[ind]).to(self.device), + torch.FloatTensor(self.prev_cost[ind]).to(self.device), + torch.FloatTensor(self.not_done[ind]).to(self.device), + ) \ No newline at end of file diff --git a/competition/safetyplusplus_folder/safetyplusplus.py b/competition/safetyplusplus_folder/safetyplusplus.py new file mode 100755 index 000000000..18d5e3094 --- /dev/null +++ b/competition/safetyplusplus_folder/safetyplusplus.py @@ -0,0 +1,225 @@ +import copy +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from safetyplusplus_folder.networks import C_Critic,Critic,Actor + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +class TD3Usl(object): + def __init__( + self, + state_dim, + action_dim, + max_action, + kappa, + delta, + eta=0.05, + K = 20, + rew_discount=0.99, + cost_discount=0.99, + tau=0.005, + policy_noise=0.2, + noise_clip=0.5, + policy_freq=2, + expl_noise = 0.1, + ): + + self.actor = Actor(state_dim, action_dim, max_action).to(device) + self.actor_target = copy.deepcopy(self.actor) + self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=3e-4) + + self.critic = Critic(state_dim, action_dim).to(device) + self.critic_target = copy.deepcopy(self.critic) + self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=3e-4) + + self.action_dim = action_dim + self.max_action = max_action + self.rew_discount = rew_discount + self.cost_discount = cost_discount + self.tau = tau + self.policy_noise = policy_noise + self.noise_clip = noise_clip + self.policy_freq = policy_freq + + self.expl_noise = expl_noise + + self.total_it = 0 + + self.C_critic = C_Critic(state_dim, action_dim).to(device) + self.C_critic_target = copy.deepcopy(self.C_critic) + self.C_critic_optimizer = torch.optim.Adam(self.C_critic.parameters(), lr=3e-4) + + + self.eta = eta + self.K = K + self.kappa = kappa + self.delta = delta + + + def select_action(self, state,use_usl=False, usl_iter=20,exploration=False): + state = torch.FloatTensor(state.reshape(1, -1)).to(device) + action = self.actor(state).cpu().data.numpy().flatten() + if exploration: + noise = np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim) + action = (action + noise).clip(-self.max_action, self.max_action) + if use_usl: + action = self.safety_correction(state,action,eta=self.eta,Niter=usl_iter) + return action + + def pred_cost(self, state, action): + state = torch.FloatTensor(state.reshape(1, -1)).to(device) + action = torch.FloatTensor(action.reshape(1, -1)).to(device) + return self.C_critic(state,action).item() + + def safety_correction(self,state,action,eta,Niter,verbose=False): + if not torch.is_tensor(state): + state = torch.tensor(state.reshape(1, -1),requires_grad=False).float().to(device) + if not torch.is_tensor(action): + action = torch.tensor(action.reshape(1, -1),requires_grad=True).float().to(device) + pred = self.C_critic_target(state,action) + if pred.item() <= self.delta: + return torch.clamp(action,-self.max_action,self.max_action).cpu().data.numpy().flatten() + else: + for i in range(Niter): + if max(np.abs(action.cpu().data.numpy().flatten())) > self.max_action: + break + action.retain_grad() + self.C_critic_target.zero_grad() + pred = self.C_critic_target(state,action) + pred.backward(retain_graph=True) + if verbose and i % 5 == 0: + print(f'a{i} = {action.cpu().data.numpy().flatten()}, C = {pred.item()}') + if pred.item() <= self.delta: + break + Z = np.max(np.abs(action.grad.cpu().data.numpy().flatten())) + action = action - eta * action.grad / (Z + 1e-8) + #print(i,pred.item()) + return torch.clamp(action,-self.max_action,self.max_action).cpu().data.numpy().flatten() + + def train(self, replay_buffer, batch_size=256): + # Sample replay buffer + state, action, next_state, reward, cost, not_done = replay_buffer.sample(batch_size) + + # Compute the target C value + target_C = self.C_critic_target(next_state, self.actor_target(next_state)) + target_C = cost + (not_done * self.cost_discount * target_C).detach() + + # Get current C estimate + current_C = self.C_critic(state, action) + + # Compute critic loss + C_critic_loss = F.mse_loss(current_C, target_C) + + # Optimize the critic + self.C_critic_optimizer.zero_grad() + C_critic_loss.backward() + self.C_critic_optimizer.step() + + with torch.no_grad(): + # Select action according to policy and add clipped noise + noise = ( + torch.randn_like(action) * self.policy_noise + ).clamp(-self.noise_clip, self.noise_clip) + + next_action = ( + self.actor_target(next_state) + noise + ).clamp(-self.max_action, self.max_action) + + # Compute the target Q value + target_Q1, target_Q2 = self.critic_target(next_state, next_action) + target_Q = torch.min(target_Q1, target_Q2) + target_Q = reward + not_done * self.rew_discount * target_Q + + # Get current Q estimates + current_Q1, current_Q2 = self.critic(state, action) + + # Compute critic loss + critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q) + + # Optimize the critic + self.critic_optimizer.zero_grad() + critic_loss.backward() + self.critic_optimizer.step() + + + # Delayed policy updates + if self.total_it % self.policy_freq == 0: + + # Compute actor loss + action = self.actor(state) + actor_loss = ( + - self.critic.Q1(state, action) \ + + self.kappa * F.relu(self.C_critic(state, action) - self.delta) \ + ).mean() + + # Optimize the actor + self.actor_optimizer.zero_grad() + actor_loss.backward() + self.actor_optimizer.step() + + # Update the frozen target models + for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): + target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) + + for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): + target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) + + for param, target_param in zip(self.C_critic.parameters(), self.C_critic_target.parameters()): + target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) + + + + def save(self, filename): + torch.save(self.critic.state_dict(), filename + "_critic") + torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer") + + torch.save(self.C_critic.state_dict(), filename + "_C_critic") + torch.save(self.C_critic_optimizer.state_dict(), filename + "_C_critic_optimizer") + + torch.save(self.actor.state_dict(), filename + "_actor") + torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer") + + + def load(self, filename): + self.critic.load_state_dict(torch.load(filename + "_critic")) + self.critic_optimizer.load_state_dict(torch.load(filename + "_critic_optimizer")) + self.critic_target = copy.deepcopy(self.critic) + + self.C_critic.load_state_dict(torch.load(filename + "_C_critic")) + self.C_critic_optimizer.load_state_dict(torch.load(filename + "_C_critic_optimizer")) + self.C_critic_target = copy.deepcopy(self.C_critic) + + self.actor.load_state_dict(torch.load(filename + "_actor")) + self.actor_optimizer.load_state_dict(torch.load(filename + "_actor_optimizer")) + self.actor_target = copy.deepcopy(self.actor) + + +# Runs policy for X episodes and returns average reward +# A fixed seed is used for the eval environment +def eval_policy(policy, eval_env, seed, flag, eval_episodes=5,use_usl=False,usl_iter=20): + avg_reward = 0. + avg_cost = 0. + for _ in range(eval_episodes): + if flag == 'constraint_violation': + reset_info, done = eval_env.reset(), False + state = reset_info[0] + else: + state, done = eval_env.reset(), False + while not done: + action = policy.select_action(np.array(state),use_usl=use_usl, usl_iter=usl_iter) + state, reward, done, info = eval_env.step(action) + avg_reward += reward + if info[flag]!=0: + avg_cost += 1 + + avg_reward /= eval_episodes + avg_cost /= eval_episodes + + print("---------------------------------------") + print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f} Cost {avg_cost:.3f}.\ + Use Unrolling Safety Layer : {use_usl}") + print("---------------------------------------") + return avg_reward,avg_cost \ No newline at end of file diff --git a/competition/safetyplusplus_folder/slam.py b/competition/safetyplusplus_folder/slam.py new file mode 100644 index 000000000..e80a0cd74 --- /dev/null +++ b/competition/safetyplusplus_folder/slam.py @@ -0,0 +1,260 @@ +import numpy as np +from matplotlib import pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import cv2 +import time + +class SLAM(): + def __init__(self,gs=0.05) -> None: + # slam建图范围 + self.X_MIN = -3.5 + self.X_MAX = 3.5 + self.Y_MIN = -3.5 + self.Y_MAX = 3.5 + self.Z_MIN = 0.0 + self.Z_MAX = 2.0 + # 栅格/元胞大小 grid shape = 5cm * 5cm * 5cm + self.gs = gs + # 障碍物初始位置 + self.obstacles = [ + [1.5, -2.5, 0, 0, 0, 0], + [0.5, -1, 0, 0, 0, 0], + [1.5, 0, 0, 0, 0, 0], + [-1, 0, 0, 0, 0, 0] + ] + # 障碍物高度 + self.obstacle_height = 1.05 + # 门的初始位置,用不上,info里有更精确的 + self.gates = [ + [0.5, -2.5, 0, 0, 0, -1.57, 0], + [2, -1.5, 0, 0, 0, 0, 1], + [0, 0.2, 0, 0, 0, 1.57, 1], + [-0.5, 1.5, 0, 0, 0, 0, 0] + ] + # 门的尺寸 + self.gate_tall = 1.0 + self.gate_low = 0.525 + self.edge = 0.45/2 + + def reset_occ_map(self): + # reset the occupation map + self.occ_map = np.zeros(( + round((self.Z_MAX -self.Z_MIN)/self.gs), + round((self.X_MAX -self.X_MIN)/self.gs), + round((self.Y_MAX -self.Y_MIN)/self.gs), + ),dtype=int) + + # # 不用初始化门 + # for gate in self.gates: + # self._set_gate_occupied(gate[:-1],gate[-1]) + + # 初始化障碍物 + for obstacle in self.obstacles: + self._set_obstacle_occupied(obstacle) + + self.current_id = -1 + + def update_occ_map(self,info): + # env reset 时,info的信息里没有这项 + # 另外,视野里没有时,无需更新 + if not info.get('current_target_gate_in_range'): + return + else: + if info.get('current_target_gate_id') > self.current_id: + self.current_id = info.get('current_target_gate_id') + self._set_gate_occupied(info.get('current_target_gate_pos'),info.get('current_target_gate_type')) + self.plot_occ_map(str(self.current_id+1)) + print(self.current_id) + + def _set_pos_occupied(self,x,y,z): + # 设置绝对坐标(x,y,z)处为障碍占据 + assert self.X_MIN <= x <= self.X_MAX \ + and self.Y_MIN <= y <= self.Y_MAX \ + and self.Z_MIN <= z <= self.Z_MAX + x_idx = round((x-self.X_MIN)/self.gs) + y_idx = round((y-self.Y_MIN)/self.gs) + z_idx = round((z-self.Z_MIN)/self.gs) + self.occ_map[z_idx][x_idx][y_idx] = -1 + + def _set_obstacle_occupied(self,obstacle): + # 设置(x,y)处的柱子为障碍占据 + x,y,_,_,_,_, = obstacle + for x_ in np.arange(x-0.05,x+0.1,0.05): + for y_ in np.arange(y-0.05,y+0.1,0.05): + z = 0 + while z <= self.obstacle_height: + self._set_pos_occupied(x_,y_,z) + z += self.gs + + # 这个地方不对,并不是只有0度和90度,门的角度yaw也会随机 + # 目前只是简单的按照绝对值做了处理, + def _set_gate_occupied(self,gate,gate_type=0): + # 设置(x,y)处类型为type的门为障碍占据 + x,y,_,_,_,direct = gate + height = self.gate_tall if gate_type == 0 else self.gate_low + z = 0 + # 基座 + while z <= height - self.edge-0.05: + self._set_pos_occupied(x,y,z) + z += self.gs + # 下门框 + while z <= height - self.edge+0.05: + if abs(direct) <= 0.15: + for tmp_x in np.arange(x-self.edge,x+self.edge+self.gs,self.gs/2): + self._set_pos_occupied(tmp_x,y,z) + else: + for tmp_y in np.arange(y-self.edge,y+self.edge+self.gs,self.gs/2): + self._set_pos_occupied(x,tmp_y,z) + z += self.gs + # 左右门框 + while z <= height + self.edge-0.05: + if abs(direct) <= 0.15: + self._set_pos_occupied(x-self.edge,y,z) + self._set_pos_occupied(x+self.edge,y,z) + else: + self._set_pos_occupied(x,y-self.edge,z) + self._set_pos_occupied(x,y+self.edge,z) + z += self.gs + # 上门框 + while z <= height + self.edge+0.05: + if abs(direct) <= 0.15: + for tmp_x in np.arange(x-self.edge,x+self.edge+self.gs,self.gs/2): + self._set_pos_occupied(tmp_x,y,z) + else: + for tmp_y in np.arange(y-self.edge,y+self.edge+self.gs,self.gs/2): + self._set_pos_occupied(x,tmp_y,z) + z += self.gs + + # 绘制 + def plot_occ_map(self,name='test'): + xs = [] + ys = [] + zs = [] + C,W,H= self.occ_map.shape + for z in range(C): + for x in range(W): + for y in range(H): + if self.occ_map[z][x][y] == -1 : + xs.append(x*self.gs+self.X_MIN) + ys.append(y*self.gs+self.Y_MIN) + zs.append(z*self.gs+self.Z_MIN) + + ax = plt.subplot(111, projection='3d') # 创建一个三维的绘图工程 + # 将数据点分成三部分画,在颜色上有区分度 + ax.scatter(xs, ys, zs, c='black') # 绘制数据点 + # plt.xlim((self.X_MIN, self.X_MAX)) + # plt.ylim((self.Y_MIN, self.Y_MAX)) + # plt.savefig(name+'.png') + + # 根据所在高度,生成2d 障碍物图像 + # TODO 出现问题,在刚开始训练的时候会超过约定的边界, + def generate_obs_img(self,obs,name='test',save=False): + x,y,z=obs[0],obs[2],obs[4] + x_idx = round((x-self.X_MIN)/self.gs) + y_idx = round((y-self.Y_MIN)/self.gs) + z_idx = round((z-self.Z_MIN)/self.gs) + if save: + occ_map_2d = self.occ_map[z_idx] * 255 + try: + occ_map_2d[x_idx][y_idx] = 125 + except: + print('out of region') + cv2.imwrite('obs/'+name+'.png',occ_map_2d) + z_max=len(self.occ_map) - 1 + return self.occ_map[z_idx] if z_idx<=z_max else self.occ_map[z_max] + + def generate_3obs_img(self,obs,target_vector,name='test',save=False): + + z_center=2 + center=11 + tall,height,width=[(2*z_center+1),(2*center+1),(2*center+1)] + # tall, height,width. 智能体是在最中间的位置 + obs_img=np.zeros([(2*z_center+1),(2*center+1),(2*center+1)]) + + x,y,z=obs[0],obs[2],obs[4] + x_idx = round((x-self.X_MIN)/self.gs) + y_idx = round((y-self.Y_MIN)/self.gs) + z_idx = round((z-self.Z_MIN)/self.gs) + + z_low=z_idx-z_center + z_top=z_idx+z_center + z_max=len(self.occ_map) - 1 + # fill_z_idx=0 + if z_low<0: # z_low<0 z_top>0 + fill_z_idx=0 + for z_index in range(z_low,0,1): + obs_img[fill_z_idx]=- np.ones([(2*center+1),(2*center+1)]) + fill_z_idx+=1 + for z_index in range(0,z_top+1,1): + if x_idx>=center and x_idx + center <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1: + for x in range(0,(2*center+1)): + obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)] + fill_z_idx+=1 + elif z_top<=z_max: # z_low>0,z_top <= z_max + fill_z_idx=0 + if x_idx>=center and x_idx + center <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1: + for z_index in range(z_low,z_top+1,1): + for x in range(0,(2*center+1)): + obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)] + fill_z_idx+=1 + + else : # z_low>0,z_top > z_max + fill_z_idx=0 + for z_index in range(z_low,z_max+1,1): + if x_idx>=center and x_idx + center <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1: + for x in range(0,(2*center+1)): + obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)] + fill_z_idx+=1 + while fill_z_idx <2*z_center+1: + obs_img[fill_z_idx]=- np.ones([(2*center+1),(2*center+1)]) + fill_z_idx+=1 + + # fill target_vector way 2 + if target_vector[2]>=0: + for _ in range(z_center,2*z_center+1): + if target_vector[0]>=0 : + if target_vector[1]>=0: + obs_img[_][center:center*2+1,center:center*2+1]=np.where(obs_img[_][center:center*2+1,center:center*2+1]==-1,-1,0.5) + else: + obs_img[_][center:center*2+1,0:center+1]=np.where(obs_img[_][center:center*2+1,0:center+1]==-1,-1,0.5) + else: + if target_vector[1]>=0: + obs_img[_][0:center+1,center:center*2+1]=np.where(obs_img[_][0:center+1,center:center*2+1]==-1,-1,0.5) + else: + obs_img[_][0:center+1,0:center+1]=np.where(obs_img[_][0:center+1,0:center+1]==-1,-1,0.5) + else: + for _ in range(0,z_center+1): + if target_vector[0]>=0 : + if target_vector[1]>=0: + obs_img[_][center:center*2+1,center:center*2+1]=np.where(obs_img[_][center:center*2+1,center:center*2+1]==-1,-1,0.5) + else: + obs_img[_][center:center*2+1,0:center+1]=np.where(obs_img[_][center:center*2+1,0:center+1]==-1,-1,0.5) + else: + if target_vector[1]>=0: + obs_img[_][0:center+1,center:center*2+1]=np.where(obs_img[_][0:center+1,center:center*2+1]==-1,-1,0.5) + else: + obs_img[_][0:center+1,0:center+1]=np.where(obs_img[_][0:center+1,0:center+1]==-1,-1,0.5) + obs_img[z_center-1:z_center+2,center-1:center+2,center-1:center+2]+=0.2 + obs_img[z_center][center][center]+=0.2 + # if save: + # i=0 + # obs_img = (obs_img * 255 + 255)/2 + # for z_index in range(len(obs_img)) : + # occ_map_2d = self.occ_map[z_idx-z_center + z_index if z_idx-z_center + z_index>=0 else 0] * 255 + 255 + # occ_map_2d [x_idx][y_idx] = 125 + # obs_img[z_index][center][center]=125 + # cv2.imwrite('obs2/'+str(i)+'.png',obs_img[z_index]) + # cv2.imwrite('obs2/'+str(i)+'_occ.png',occ_map_2d) + # i+=1 + # import pdb;pdb.set_trace() + + return obs_img + +# if __name__ == '__main__': +# m_slam = SLAM() +# m_slam.reset_occ_map() +# m_slam.plot_occ_map('test') + # for i in range(len(m_slam.occ_map)): + # occ_map_2d = m_slam.occ_map[i] * 255 + # time.sleep(0.01) + # cv2.imwrite('test/'+str(int(round(time.time() * 1000)))+'.png',occ_map_2d) \ No newline at end of file diff --git a/competition/safetyplusplus_folder/unconstrained.py b/competition/safetyplusplus_folder/unconstrained.py new file mode 100755 index 000000000..56a2e0d2e --- /dev/null +++ b/competition/safetyplusplus_folder/unconstrained.py @@ -0,0 +1,159 @@ +import copy +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from safetyplusplus_folder.networks import Critic,Actor ,Encoder + +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + +class TD3(object): + def __init__( + self, + state_dim, + action_dim, + max_action, + rew_discount=0.99, + tau=0.005, + policy_noise=0.2, + noise_clip=0.5, + policy_freq=2, + expl_noise = 0.1, + ): + local_feature_map_in_channels=5 + example_encoder=Encoder(local_feature_map_in_channels) + a=np.zeros([1,local_feature_map_in_channels,23,23]) + example_encoder=example_encoder.float() + self.local_feature_map_out_channels=example_encoder(torch.tensor(a).float()).shape[1] + + + self.actor = Actor(state_dim,local_feature_map_in_channels,self.local_feature_map_out_channels, action_dim, max_action).to(device) + self.actor_target = copy.deepcopy(self.actor) + self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=3e-4) + + self.critic = Critic(state_dim,local_feature_map_in_channels,self.local_feature_map_out_channels, action_dim).to(device) + self.critic_target = copy.deepcopy(self.critic) + self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=3e-4) + torch.set_num_threads(10) + self.action_dim = action_dim + self.max_action = max_action + self.rew_discount = rew_discount + self.tau = tau + self.policy_noise = policy_noise + self.noise_clip = noise_clip + self.policy_freq = policy_freq + + self.expl_noise = expl_noise + self.total_it = 0 + + + + def select_action(self,all_state,exploration=False): + global_state=torch.FloatTensor(all_state[0]).unsqueeze(0).to(device) + local_state=torch.FloatTensor(all_state[1]).unsqueeze(0).to(device) + action = self.actor([global_state,local_state]).cpu().data.numpy().flatten() + if exploration: + noise = np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim) + action = (action + noise).clip(-self.max_action, self.max_action) + return action + + + def train(self, replay_buffer, batch_size=256,train_nums=200): + for _ in range(train_nums): + self.total_it+=1 + # Sample replay buffer + global_state,local_state, action, next_global_state,next_local_state, reward, not_done = replay_buffer.sample(batch_size) + # import pdb;pdb.set_trace() + with torch.no_grad(): + # Select action according to policy and add clipped noise + noise = ( + torch.randn_like(action) * self.policy_noise + ).clamp(-self.noise_clip, self.noise_clip) + + next_action = ( + self.actor_target([next_global_state,next_local_state]) + noise + ).clamp(-self.max_action, self.max_action) + + # Compute the target Q value + target_Q1, target_Q2 = self.critic_target([next_global_state,next_local_state], next_action) + target_Q = torch.min(target_Q1, target_Q2) + target_Q = reward + not_done * self.rew_discount * target_Q + + # Get current Q estimates + current_Q1, current_Q2 = self.critic([global_state,local_state], action) + + # Compute critic loss + critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q) + + # Optimize the critic + self.critic_optimizer.zero_grad() + critic_loss.backward() + self.critic_optimizer.step() + + + # Delayed policy updates + if self.total_it % self.policy_freq == 0: + + # Compute actor loss + action = self.actor([global_state,local_state]) + actor_loss = - self.critic.Q1([global_state,local_state], action).mean() + + # Optimize the actor + self.actor_optimizer.zero_grad() + actor_loss.backward() + self.actor_optimizer.step() + + # Update the frozen target models + for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): + target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) + + for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): + target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data) + # import pdb;pdb.set_trace() + + + + def save(self, filename): + torch.save(self.critic.state_dict(), f"{filename}_critic") + torch.save(self.critic_optimizer.state_dict(), f"{filename}_critic_optimizer") + + torch.save(self.actor.state_dict(), f"{filename}_actor") + torch.save(self.actor_optimizer.state_dict(), f"{filename}_actor_optimizer") + + + def load(self, filename): + self.critic.load_state_dict(torch.load(f"{filename}_critic",map_location=lambda storage, loc: storage.cuda(0))) + self.critic_optimizer.load_state_dict(torch.load(f"{filename}_critic_optimizer",map_location=lambda storage, loc: storage.cuda(0))) + self.critic_target = copy.deepcopy(self.critic) + + self.actor.load_state_dict(torch.load(f"{filename}_actor",map_location=lambda storage, loc: storage.cuda(0))) + self.actor_optimizer.load_state_dict(torch.load(f"{filename}_actor_optimizer",map_location=lambda storage, loc: storage.cuda(0))) + self.actor_target = copy.deepcopy(self.actor) + + +# Runs policy for X episodes and returns average reward +# A fixed seed is used for the eval environment +# def eval_policy(policy, eval_env, seed, flag, eval_episodes=5): +# avg_reward = 0. +# avg_cost = 0. +# for _ in range(eval_episodes): +# if flag == 'constraint_violation': +# reset_info, done = eval_env.reset(), False +# state = reset_info[0] +# else: +# state, done = eval_env.reset(), False +# while not done: +# action = policy.select_action(np.array(state)) +# state, reward, done, info = eval_env.step(action) +# avg_reward += reward +# if info[flag]!=0: +# avg_cost += 1 + +# avg_reward /= eval_episodes +# avg_cost /= eval_episodes + +# print("---------------------------------------") +# print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f} Cost {avg_cost:.3f}.") +# print("---------------------------------------") +# return avg_reward,avg_cost \ No newline at end of file diff --git a/competition/submit_models/1016_01_1800_actor b/competition/submit_models/1016_01_1800_actor new file mode 100644 index 000000000..b23a81f57 Binary files /dev/null and b/competition/submit_models/1016_01_1800_actor differ diff --git a/competition/submit_models/1016_01_1800_actor_optimizer b/competition/submit_models/1016_01_1800_actor_optimizer new file mode 100644 index 000000000..694cdace3 Binary files /dev/null and b/competition/submit_models/1016_01_1800_actor_optimizer differ diff --git a/competition/submit_models/1016_01_1800_critic b/competition/submit_models/1016_01_1800_critic new file mode 100644 index 000000000..c126acb3a Binary files /dev/null and b/competition/submit_models/1016_01_1800_critic differ diff --git a/competition/submit_models/1016_01_1800_critic_optimizer b/competition/submit_models/1016_01_1800_critic_optimizer new file mode 100644 index 000000000..e909cce38 Binary files /dev/null and b/competition/submit_models/1016_01_1800_critic_optimizer differ diff --git a/competition/utils.py b/competition/utils.py new file mode 100644 index 000000000..6affe40bb --- /dev/null +++ b/competition/utils.py @@ -0,0 +1,95 @@ +import numpy as np + + + +def get_state(obs,info,NOMINAL_OBSTACLES,NOMINAL_GATES): + # state info : obs_info(3) + goal_info(3) + all_gate_info(1 + 16) + all_obstacle_info(12) = 35 + # x,y,z 3 + current_x=obs[0] + current_y=obs[2] + current_z=obs[4] + + # obstacle info + # all_obstacles_pos=np.array(NOMINAL_OBSTACLES)[:,0:3] + # for one_obstacle_info in all_obstacles_pos: + # one_obstacle_info[2] = 1.05 # quadrotor.py reset() + + # agent_pos = np.array([current_x, current_y, current_z]) + # dis = [sum((i - agent_pos) ** 2) for i in all_obstacles_pos] + # visual_index = [i for i in range(len(dis)) if dis[i] < 0.75] + # visual_obstacles = obstacles[visual_index] + + # obstacles_state = np.zeros(8) + # x_agent = agent_pos[0] + # y_agent = agent_pos[1] + # for obstacle in visual_obstacles: + # x = obstacle[0] + # y = obstacle[1] + # delta_x = x - x_agent + # delta_y = y - y_agent + # if delta_y * delta_x == 0: + # if delta_y == 0: + # if delta_x >= 0: + # obstacles_state[2] = 1 + # else: + # obstacles_state[6] = 1 + # elif delta_x == 0: + # if delta_y >= 0: + # obstacles_state[0] = 1 + # else: + # obstacles_state[4] = 1 + # else: + # if abs(delta_x) / abs(delta_y) >= 1.71: # gen hao 3 + # if delta_x > 0: + # obstacles_state[2] = 1 + # else: + # obstacles_state[6] = 1 + # elif abs(delta_x) / abs(delta_y) <= 1 / 1.71: # gen hao 3 + # if delta_y > 0: + # obstacles_state[0] = 1 + # else: + # obstacles_state[4] = 1 + # else: + # if delta_y * delta_x > 0: + # if delta_y > 0 and delta_x > 0: + # obstacles_state[1] = 1 + # elif delta_y < 0 and delta_x < 0: + # obstacles_state[5] = 1 + # else: + # if delta_y > 0 and delta_x < 0: + # obstacles_state[7] = 1 + # elif delta_y < 0 and delta_x > 0: + # obstacles_state[3] = 1 + + + # [0.5, -2.5, 0, 0, 0, -1.57, 0],[2, -1.5, 0, 0, 0, 0, 1],[0, 0.2, 0, 0, 0, 1.57, 1],[-0.5, 1.5, 0, 0, 0, 0, 0] + #-> [0.5,-2.5,1,-1.57 , 2,-1.5,0.525,0 , 0,0.2,0.525,1.57, -0.5,1.5,1,0 ] + # all_gates_pos=np.array(NOMINAL_GATES) + # for one_gate_info in all_gates_pos: + # one_gate_info[2]=1 if one_gate_info[6] == 0 else 0.525 + # all_gates_pos=all_gates_pos[:,[0,1,2,5]].flatten() + + if info !={}: + # goal [x,y,z] + if info['current_target_gate_id'] == -1 : + current_goal_pos = goal_pos + current_target_gate_in_range= 0 + else : + # 1 or 0 1 + current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0 + # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy (r,p is zero ,ignored ) + current_target_gate_pos = info['current_target_gate_pos'] + if current_target_gate_pos[2]==0: # means that current_target_gate_in_range is False, add default height. + current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525 + current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]] + current_goal_pos=current_target_gate_pos[:3] + + else : + current_target_gate_in_range= 0 + current_target_gate_pos = np.zeros(4) + current_goal_pos=np.zeros(3) + state=np.array([current_x,current_y,current_z,current_goal_pos[0]-current_x,current_goal_pos[1]-current_y,current_goal_pos[2]-current_z,current_target_gate_in_range,info['current_target_gate_id']]) + # state=np.append(state,all_obstacles_pos) + # state=np.append(state,all_gates_pos) + # state=np.append(state,current_target_gate_pos) + return state diff --git a/figures/terminal0.png b/figures/terminal0.png deleted file mode 100644 index 5068ab3f5..000000000 Binary files a/figures/terminal0.png and /dev/null differ diff --git a/figures/terminal1.png b/figures/terminal1.png deleted file mode 100644 index 224da45de..000000000 Binary files a/figures/terminal1.png and /dev/null differ diff --git a/poetry-core b/poetry-core new file mode 160000 index 000000000..50f5fd3a7 --- /dev/null +++ b/poetry-core @@ -0,0 +1 @@ +Subproject commit 50f5fd3a79e40b9ecd6be8a19a283c5fa3f3eb10 diff --git a/pyproject.toml b/pyproject.toml index c1a91f2b8..6f7b0c950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,5 +31,5 @@ rich = "^12.5.1 " [tool.poetry.dev-dependencies] [build-system] -requires = ["poetry-core @ git+https://github.com/python-poetry/poetry-core.git@master"] +requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/safe_control_gym/controllers/firmware/firmware_wrapper.py b/safe_control_gym/controllers/firmware/firmware_wrapper.py index 8de5ff7be..22102f33b 100644 --- a/safe_control_gym/controllers/firmware/firmware_wrapper.py +++ b/safe_control_gym/controllers/firmware/firmware_wrapper.py @@ -219,7 +219,8 @@ def step(self, sim_time, action): ''' self._process_command_queue(sim_time) - + total_reward=0 + break_violation_nums=0 # Draws setpoint for debugging purposes if self.verbose: if self.last_visualized_setpoint is not None: @@ -230,11 +231,14 @@ def step(self, sim_time, action): [self.setpoint.position.x, self.setpoint.position.y, self.setpoint.position.z], p.getQuaternionFromEuler([0,0,0]), physicsClientId=self.pyb_client) - + i=1 + # 500Hz 17-18 while self.tick / self.firmware_freq < sim_time + self.ctrl_dt: + # Step the environment and print all returned information. obs, reward, done, info = self.env.step(action) - + total_reward+=reward + break_violation_nums += info['constraint_violation'] # Get state values from pybullet cur_pos=np.array([obs[0], obs[2], obs[4]]) # global coord, m cur_vel=np.array([obs[1], obs[3], obs[5]]) # global coord, m/s @@ -292,7 +296,9 @@ def step(self, sim_time, action): done = True self.action = action - return obs, reward, done, info, action + i+=1 + # info['constraint_violation']=break_violation_nums + return obs, total_reward, done, info, action def _update_initial_state(self, obs): @@ -494,6 +500,7 @@ def sendFullStateCmd(self, pos, vel, acc, yaw, rpy_rate, timestep): rpy_rate (list): roll, pitch, yaw rates (rad/s) timestep (float): simulation time when command is sent (s) """ + self.command_queue += [['_sendFullStateCmd', [pos, vel, acc, yaw, rpy_rate, timestep]]] @@ -631,7 +638,7 @@ def sendStopCmd(self): """ self.command_queue += [['_sendStopCmd', []]] def _sendStopCmd(self): - print(f"INFO_{self.tick}: Stop command sent.") + # print(f"INFO_{self.tick}: Stop command sent.") firm.crtpCommanderHighLevelStop() self.full_state_cmd_override = False @@ -647,7 +654,7 @@ def sendGotoCmd(self, pos, yaw, duration_s, relative): """ self.command_queue += [['_sendGotoCmd', [pos, yaw, duration_s, relative]]] def _sendGotoCmd(self, pos, yaw, duration_s, relative): - print(f"INFO_{self.tick}: Go to command sent.") + # print(f"INFO_{self.tick}: Go to command sent.") firm.crtpCommanderHighLevelGoTo(*pos, yaw, duration_s, relative) self.full_state_cmd_override = False diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 613472ef4..4962a8204 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -353,6 +353,10 @@ def __init__(self, else: self.DONE_ON_COMPLETION = False + + ## + self.last_state=None + def reset(self): """(Re-)initializes the environment to start an episode. diff --git a/safe_control_gym/utils/utils.py b/safe_control_gym/utils/utils.py index 2dad183cb..9f45f7f90 100644 --- a/safe_control_gym/utils/utils.py +++ b/safe_control_gym/utils/utils.py @@ -163,7 +163,7 @@ def set_device_from_config(config): """ use_cuda = config.use_gpu and torch.cuda.is_available() - config.device = "cuda" if use_cuda else "cpu" + config.device = "cuda:0" if use_cuda else "cpu" def save_video(name, frames, fps=20):