diff --git a/.gitignore b/.gitignore
index a6880fd68..318e4375d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,3 +149,10 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+competition/logs/
+competition/results/
+*.png
+competition/pretrain_models/
+*.npy
+competition/models/
diff --git a/action_test.py b/action_test.py
new file mode 100644
index 000000000..9eac64de8
--- /dev/null
+++ b/action_test.py
@@ -0,0 +1,221 @@
+   
+# action test:
+""" test 
+#----------------test action---------------------
+# if self.iteration == 0:
+#     z = 1
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 2
+#     command_type = Command(5)  # goTo.
+#     args = [[0, 0, z], yaw, duration, True]
+
+# elif self.iteration > 0.5 *self.CTRL_FREQ and self.iteration < 1.5 *self.CTRL_FREQ :
+#     command_type = Command(4)  # Stop.
+#     args = []
+# else:
+#     command_type = Command(0)  # None.
+#     args = []
+#----------------test action---------------------
+"""
+
+""" test  go to action
+#------------------------------------- 
+# go to not very exactly(exist error).  if not random , always this (getting_started.yaml)
+#         x                                   y                                z
+# even if send  command "only change y " from 60steps (2*CTRL_FREQ)  z will continue increase  ,meybe the problem about the Asynchronous 
+# init:  [-0.96609923  0.                  -2.90654614  0.                   0.04960551  0. 0.02840174 -0.05744736  0.07373744  0.          0.          0.        ]
+# 0-- [-0.96661885 -0.0279078           -2.90698836 -0.00873109            0.04799388 -0.05964756 0.02340614 -0.04682872  0.07295734 -0.60471722  1.66696169 -0.06040739]
+# 15- [-0.98957009  0.0177486           -2.91855744  0.01612694            0.08783306  0.40195587 -0.01363278  0.039587    0.07313598  0.1224719  -0.18560217 -0.0133112 ]
+# 30- [-9.74126116e-01 -6.12250160e-03  -2.90933867e+00  2.79855108e-02    5.77164080e-01  1.57968746e+00  1.13573342e-03  2.79637762e-04 7.34927365e-02 -3.14627467e-02  1.04491797e-01 -3.82468499e-05]
+# 45- [-0.96690019  0.02592517          -2.9244567  -0.04027908            1.58570938  2.21925943 -0.04590852  0.00406897  0.07356948 -0.20060068 -0.07108402 -0.00401296]
+# 60- [-0.97490167 -0.00690929          -2.90244781  0.03042487            2.5512753   1.51622867 0.03011076  0.02357646  0.07466138 -0.6881512  -0.51120803  0.0308384 ]
+# 75- [-0.95789148  0.04922068          -2.84188733  0.34668242            3.25083697  1.17400743 -0.23051172 -0.00670337  0.07419264 -1.46396087  0.74862298  0.10745695]
+# 90- [-0.96644482  0.03573701          -2.35380695  1.60398413            3.55250003 -0.0314102 -0.30132277  0.04541282  0.06064814 -0.13881515  1.02686106 -0.1751778 ]
+# 105-[-0.96541722  0.00741155          -1.35492773  2.16873779            3.27599927 -0.96907974 -0.02264677  0.00403805  0.07491015  0.86021552  1.42216285  0.04174302]
+# 120-[-9.69886183e-01  3.45681240e-02  -3.70208598e-01  1.58408557e+00    2.75920080e+00 -8.91757966e-01  1.95222977e-01  6.89915684e-04 7.17125806e-02 -2.54350558e-01  9.00435832e-01  2.10910125e-01]
+# 135-[-0.93853971  0.14409335          0.299702    1.2196867              2.35446502 -0.66440774 0.08730616  0.0404132   0.0763123   1.39507016 -0.06357986 -0.14670249]
+# if self.iteration == 0:
+#     z = 3
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 3
+#     command_type = Command(5)  # goTo.
+#     args = [[0, 0, z], yaw, duration, True]
+# elif self.iteration == 2  *self.CTRL_FREQ:
+#     y = 3
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 3
+#     command_type = Command(5)  # goTo.
+#     args = [[0, y, 0], yaw, duration, True]
+# elif self.iteration == 4  *self.CTRL_FREQ  + 1:
+#     x = 1
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 3
+#     command_type = Command(5)  # goTo.
+#     args = [[x, 0, 0], yaw, duration, True]
+# else:
+#     command_type = Command(0)  # None.
+#     args = []
+#----------------test action end ---------------------
+"""
+
+"""test  go to  and stop action
+#----------------test  go to  and stop action---------------------
+# result :  stop can break the commmand but have to observe the rule about the intertia.
+# if self.iteration == 0:
+#     z = 1
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 0.2
+#     command_type = Command(5)  # goTo.
+#     args = [[0, 0, z], yaw, duration, True]
+# elif self.iteration == 2  *self.CTRL_FREQ :
+#     command_type = Command(4)  # Stop.  
+#     args = []
+# elif self.iteration == 3  *self.CTRL_FREQ:
+#     y = -3
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 5
+#     command_type = Command(5)  # goTo.
+#     args = [[0, y, 0], yaw, duration, True]
+# elif self.iteration == 5  *self.CTRL_FREQ  :
+#     command_type = Command(4)  # Stop.
+#     args = []
+# elif self.iteration == 6  *self.CTRL_FREQ  :
+#     x = -1
+#     yaw = 0.
+#     # duration 会影响时间
+#     duration = 5
+#     command_type = Command(5)  # goTo.
+#     args = [[x, 0, 0], yaw, duration, True]
+# else:
+#     command_type = Command(0)  # None.
+#     args = []
+#----------------test action end ---------------------
+"""
+
+"""test the min time and the max distance about the go to  action
+#----------------test   the min time and the max distance about the go to  action---------------------
+# result :  
+if self.iteration == 0:
+    z = 0.15
+    # duration 会影响时间
+    duration = 0.33
+    command_type = Command(5)  # goTo.
+    args = [[0, 0, z], 0, duration, True]
+else:
+    command_type = Command(0)  # None.
+    args = []
+#----------------test action end ---------------------
+"""
+
+""" tst the cmdFullState command  the next command will break the last command
+x=obs[0]
+y=obs[2]
+z=obs[4]
+if self.iteration == 0:
+    height = 1
+    duration = 2
+    command_type = Command(2)  # Take-off.
+    args = [height, duration]
+# 90-[-9.75560513e-01  3.98563938e-02 -2.90579683e+00 -2.46277672e-03 9.46152214e-01 -4.69072173e-02  5.22578314e-03  3.41655474e-02 5.11630895e-02 -2.64937344e-01  1.81993538e+00 -7.07496076e-01]
+elif self.iteration == 3 *self.CTRL_FREQ:
+    target_pos = np.array([x+0.1,y,z])
+    target_vel = np.zeros(3)
+    target_acc = np.zeros(3)
+    target_yaw = 0.
+    target_rpy_rates = np.zeros(3)
+
+    command_type = Command(1)  # cmdFullState.
+    args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+
+# 105-[-9.09596873e-01  2.00324028e-01 -2.90993115e+00 -3.19762174e-02 9.08722645e-01 -6.57070694e-02 -4.07341352e-02 -3.57578141e-02 2.78315544e-03 -4.06931208e-01  1.00299233e+00  2.78734319e-01]
+elif self.iteration == 3.5 *self.CTRL_FREQ:
+    target_pos = np.array([x, y+0.1, z])
+    target_vel = np.zeros(3)
+    target_acc = np.zeros(3)
+    target_yaw = 0.
+    target_rpy_rates = np.zeros(3)
+
+    command_type = Command(1)  # cmdFullState.
+    args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+
+# 120-[-9.09082306e-01 -8.87231485e-02 -2.84472628e+00  2.40919527e-01 8.64559996e-01  1.77237485e-02  3.32461760e-02  2.48394834e-02 2.12280677e-03  9.54833499e-01  1.31716466e+00  1.11926138e-01]
+elif self.iteration == 4 *self.CTRL_FREQ:
+    target_pos = np.array([x, y, z+0.1])
+    target_vel = np.zeros(3)
+    target_acc = np.zeros(3)
+    target_yaw = 0.
+    target_rpy_rates = np.zeros(3)
+
+    command_type = Command(1)  # cmdFullState.
+    args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+else :
+    command_type = Command(0)  # None.
+    args = []
+#  135-[-0.90734541  0.07334976 -2.83234816 -0.10398782  0.90615406  0.03580154 -0.0083358  -0.00963472  0.00332174  0.67757902  0.5731973   0.09230706]
+"""
+
+"""
+# Example action : Handwritten solution for GitHub's getting_stated scenario.
+# # default action
+# if self.iteration == 0:
+#     height = 1
+#     duration = 2
+#     command_type = Command(2)  # Take-off.
+#     args = [height, duration]
+
+# elif self.iteration >= 3*self.CTRL_FREQ and self.iteration < 20*self.CTRL_FREQ:
+#     step = min(self.iteration-3*self.CTRL_FREQ, len(self.ref_x) -1)
+#     target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]])
+#     target_vel = np.zeros(3)
+#     target_acc = np.zeros(3)
+#     target_yaw = 0.
+#     target_rpy_rates = np.zeros(3)
+
+#     command_type = Command(1)  # cmdFullState.
+#     args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+
+# elif self.iteration == 20*self.CTRL_FREQ:
+#     command_type = Command(6)  # notify setpoint stop.
+#     args = []
+
+# elif self.iteration == 20*self.CTRL_FREQ+1:
+#     x = self.ref_x[-1]
+#     y = self.ref_y[-1]
+#     z = 1.5 
+#     yaw = 0.
+#     duration = 2.5
+
+#     command_type = Command(5)  # goTo.
+#     args = [[x, y, z], yaw, duration, False]
+
+# elif self.iteration == 23*self.CTRL_FREQ:
+#     x = self.initial_obs[0]
+#     y = self.initial_obs[2]
+#     z = 1.5
+#     yaw = 0.
+#     duration = 6
+
+#     command_type = Command(5)  # goTo.
+#     args = [[x, y, z], yaw, duration, False]
+
+# elif self.iteration == 30*self.CTRL_FREQ:
+#     height = 0.
+#     duration = 3
+
+#     command_type = Command(3)  # Land.
+#     args = [height, duration]
+
+# elif self.iteration == 33*self.CTRL_FREQ-1:
+#     command_type = Command(-1)  # Terminate command to be sent once trajectory is completed.
+#     args = []
+
+# else:
+#     command_type = Command(0)  # None.
+#     args = []
+"""
diff --git a/competition/competition_utils.py b/competition/competition_utils.py
index d00fe85ab..059c20405 100644
--- a/competition/competition_utils.py
+++ b/competition/competition_utils.py
@@ -259,7 +259,8 @@ def wrap(*args, **keyword_args):
         args[0].interstep_learning_time += elapsed
         args[0].interstep_learning_occurrences += 1
         if elapsed >= args[0].CTRL_TIMESTEP:
-            print('\n[WARNING] Function "{}" took: {} sec (too slow)'.format(function.__name__, elapsed))
+            # print('\n[WARNING] Function "{}" took: {} sec (too slow)'.format(function.__name__, elapsed))
+            pass
         if args[0].VERBOSE and args[0].interstep_counter%int(args[0].CTRL_FREQ/2) == 0:
             print('\n{}-th call to function "{}" took: {} sec'.format(args[0].interstep_counter, function.__name__, elapsed))
         return result
diff --git a/competition/create_buffer.py b/competition/create_buffer.py
new file mode 100644
index 000000000..05a8ec2d5
--- /dev/null
+++ b/competition/create_buffer.py
@@ -0,0 +1,499 @@
+"""Write your control strategy.
+
+Then run:
+
+    $ python3 getting_started.py --overrides ./getting_started.yaml
+
+Tips:
+    Search for strings `INSTRUCTIONS:` and `REPLACE THIS (START)` in this file.
+
+    Change the code between the 4 blocks starting with
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+    and ending with
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+    with your own code.
+
+    They are in methods:
+        1) __init__
+        2) cmdFirmware
+        3) interStepLearn (optional)
+        4) interEpisodeLearn (optional)
+
+"""
+from bdb import set_trace
+from tkinter import Y
+import numpy as np
+import time
+from collections import deque
+
+try:
+    from competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory
+except ImportError:
+    # Test import.
+    from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory
+from gym.spaces import Box
+from slam import SLAM
+
+from safetyplusplus_folder.plus_logger import SafeLogger
+import random
+file_name='test'
+sim_only=False
+
+class Controller():
+    """Template controller class.
+
+    """
+
+    def __init__(self,
+                 initial_obs,
+                 initial_info,
+                 use_firmware: bool = False,
+                 buffer_size: int = 100,
+                 verbose: bool = False
+                 ):
+        """Initialization of the controller.
+
+        INSTRUCTIONS:
+            The controller's constructor has access the initial state `initial_obs` and the a priori infromation
+            contained in dictionary `initial_info`. Use this method to initialize constants, counters, pre-plan
+            trajectories, etc.
+
+        Args:
+            initial_obs (ndarray): The initial observation of the quadrotor's state
+                [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r].
+            initial_info (dict): The a priori information as a dictionary with keys
+                'symbolic_model', 'nominal_physical_parameters', 'nominal_gates_pos_and_type', etc.
+            use_firmware (bool, optional): Choice between the on-board controll in `pycffirmware`
+                or simplified software-only alternative.
+            buffer_size (int, optional): Size of the data buffers used in method `learn()`.
+            verbose (bool, optional): Turn on and off additional printouts and plots.
+
+        """
+        # Save environment and conrol parameters.
+        self.CTRL_TIMESTEP = initial_info["ctrl_timestep"]
+        self.CTRL_FREQ = initial_info["ctrl_freq"]
+
+
+        # when level 1 this will be changed
+        # [-0.9660992341982342, 0, -2.906546142854587, 0, 0.04960550660033104, 0, 0.02840173653255687, -0.05744735611733429, 0.07373743557600966, 0, 0, 0]
+        # [-0.96609923  0.         -2.90654614  0.          0.04960551  0.        0.02840174           -0.05744736           0.07373744   0.      0.    0.        ]
+        self.initial_obs = initial_obs
+
+        self.VERBOSE = verbose
+        self.BUFFER_SIZE = buffer_size
+
+        # Store a priori scenario information.
+
+        # when level 2 this will be changed
+        # x y z r p y 
+        # initial is [[0.5, -2.5, 0, 0, 0, -1.57, 0], [2, -1.5, 0, 0, 0, 0, 1], [0, 0.2, 0, 0, 0, 1.57, 1], [-0.5, 1.5, 0, 0, 0, 0, 0]]
+        #  when target gate show in horizon , be like "Current target gate position: [0.6143489615604684, -2.634075935292277, 1.0, 0.0, 0.0, -1.6783756661588305]"
+        self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"]
+        
+        # when level 2 this will be changed
+        # x y z r p y 
+        # [[1.5, -2.5, 0, 0, 0, 0], [0.5, -1, 0, 0, 0, 0], [1.5, 0, 0, 0, 0, 0], [-1, 0, 0, 0, 0, 0]]  
+        self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] 
+        
+        # Check for pycffirmware.
+        if use_firmware:
+            self.ctrl = None
+        else:
+            # Initialize a simple PID Controller ror debugging and test
+            # Do NOT use for the IROS 2022 competition. 
+            self.ctrl = PIDController()
+            # Save additonal environment parameters.
+            self.KF = initial_info["quadrotor_kf"]
+
+
+
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+        import torch
+        torch.manual_seed(101)
+        np.random.seed(101)
+        self.net_work_freq=0.5     #  time gap  1  1s/次  0.5s/次   0.2m  400episode 
+
+        # state-based 
+        self.mass=initial_info['nominal_physical_parameters']['quadrotor_mass']
+        self.global_state_dim = 9
+        self.local_state_dim=[5,23,23]
+        self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        
+        self.m_slam = SLAM()
+        self.m_slam.reset_occ_map()
+
+        # action    
+        action_dim = 3
+        max_action=2
+        min_action=max_action * (-1)
+        self.action_space=Box(np.array([min_action,min_action,min_action],dtype=np.float64),np.array([max_action,max_action,max_action],dtype=np.float64))
+        self.action_space.seed(1)
+        self.current_action=np.zeros(action_dim)
+        self.last_action=np.zeros(action_dim)
+        # network
+        kwargs = {
+            "state_dim": self.global_state_dim,
+            "action_dim": action_dim,
+            "max_action": max_action,
+            "rew_discount": 0.99,
+            "tau":0.005,
+            "policy_noise": 0.2 * max_action,
+            "noise_clip": 0.5 * max_action,
+            "policy_freq": 2,
+        }
+        from safetyplusplus_folder import safetyplusplus,replay_buffer,unconstrained
+        # td3 policy
+        self.policy = unconstrained.TD3(**kwargs)
+        self.replay_buffer = replay_buffer.IrosReplayBuffer(self.global_state_dim,self.local_state_dim,action_dim,load=False)
+
+        # env-based variable
+        self.episode_reward = 0
+        self.episode_cost = 0
+        self.collisions_count=self.violations_count=0
+        self.episode_iteration=-1
+        self.target_gate_id=0
+        self.goal_pos=[initial_info['x_reference'][0],initial_info['x_reference'][2],initial_info['x_reference'][4]]
+        self.target_offset=np.array([0,0,0])
+        self.get_offset(info=None)
+        
+        # Reset counters and buffers.
+        self.reset()
+        self.interEpisodeReset()
+
+        # logger
+        self.logger_plus = SafeLogger(exp_name=file_name, env_name="compitition", seed=0,
+                                fieldnames=['Eptime','EpRet', 'EpCost', 'collision_num','vilation_num','target_gate'])   
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+
+    def get_offset(self,info):
+        # init
+        if info is None:
+             self.target_offset=np.array([0.2,0,0])
+        # step cross gate
+        elif info['current_target_gate_id'] == 1 or info['current_target_gate_id'] == 3:
+            self.target_offset=np.array([0,0.2,0])
+        elif info['current_target_gate_id'] == 2:
+            self.target_offset=np.array([-0.2,0,0])
+        else:
+            self.target_offset=np.array([0,0,0])
+
+    def get_state(self,obs,info):
+        # state info : mass(1) + obs_info(3) + goal_info(3) + pic_info     
+        # x,y,z  3 
+        current_pos=[obs[0],obs[2],obs[4]]
+
+        if info !={}:
+            # goal [x,y,z]
+            if info['current_target_gate_id'] == -1 :
+                current_goal_pos = self.goal_pos 
+                current_target_gate_in_range= 0 
+            else :
+                # 1 or 0  1
+                current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0
+                # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy  (r,p is zero ,ignored )
+                current_target_gate_pos = info['current_target_gate_pos']
+                if current_target_gate_pos[2]==0: #  means that  current_target_gate_in_range is False, add default height.
+                    current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525
+                current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]]
+                current_goal_pos=current_target_gate_pos[:3] 
+            current_goal_pos += self.target_offset
+        else :
+            current_target_gate_in_range= 0 
+            current_goal_pos=np.zeros(3)
+        target_vector=[current_goal_pos[0]- current_pos[0],current_goal_pos[1]- current_pos[1],current_goal_pos[2]- current_pos[2]]
+        global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2],
+                               current_target_gate_in_range,info['current_target_gate_id'],self.mass])
+        local_state = self.m_slam.generate_3obs_img(obs,target_vector,name=self.episode_iteration,save=False)   
+        return [global_state,local_state]
+           
+    def cmdFirmware(self,ctime,obs,reward=None,done=None,info=None,exploration=True):
+        """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface.
+
+        INSTRUCTIONS:
+            Re-implement this method to return the target position, velocity, acceleration, attitude, and attitude rates to be sent
+            from Crazyswarm to the Crazyflie using, e.g., a `cmdFullState` call.
+
+        Args:
+            time (float): Episode's elapsed time, in seconds.
+            obs (ndarray): The quadrotor's Vicon data [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0].
+            reward (float, optional): The reward signal.
+            done (bool, optional): Wether the episode has terminated.
+            info (dict, optional): Current step information as a dictionary with keys
+                'constraint_violation', 'current_target_gate_pos', etc.
+
+        Returns:
+            Command: selected type of command (takeOff, cmdFullState, etc., see Enum-like class `Command`).
+            List: arguments for the type of command (see comments in class `Command`)
+
+        """
+        self.episode_iteration+=1
+       
+        if self.ctrl is not None:
+            raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.")
+
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+        self.m_slam.update_occ_map(info)
+        
+        # begin with take off 
+        if self.episode_iteration == 0:
+            height = 1
+            duration = 1.5
+            command_type = Command(2)  # Take-off.
+            args = [height, duration]
+
+        # using network to choose action
+        elif self.episode_iteration >= 3 * self.CTRL_FREQ :
+            
+            if self.episode_iteration % (30*self.net_work_freq) ==0:
+                # cmdFullState
+                command_type =  Command(1)   # cmdFullState.
+                all_state=self.get_state(obs,info)
+                global_state=all_state[0]
+                current_pos=np.array([global_state[0],global_state[1],global_state[2]])
+                target_pos=np.array([global_state[3]+current_pos[0],global_state[4]+current_pos[1],global_state[5]+current_pos[2]])
+                action_raw=(target_pos-current_pos)
+                action_raw= np.array([_+1 if _>=0 else _-1 for _ in action_raw])  
+                action= np.where(action_raw<=2,action_raw,2)
+                action= np.where(action_raw>=-2,action_raw,-2)
+                # action = self.policy.select_action(all_state, exploration=exploration)  # array  delta_x , delta_y, delta_z
+                action /= 10
+                self.current_all_state=all_state
+                self.current_action=action
+                # import pdb;pdb.set_trace()
+                target_pos = global_state[[0,1,2]] + action
+                target_vel = np.zeros(3)
+                target_acc = np.zeros(3)
+                target_yaw = 0.
+                target_rpy_rates = np.zeros(3)
+                args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+                # other time do nothing
+            else :
+                command_type = Command(0)  # None.
+                args = []
+       
+        else :
+            command_type = Command(0)  # None.
+            args = []
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+        
+        return command_type, args
+
+    # NOT need to re-implement
+    def cmdSimOnly(self,time,obs,reward=None,done=None,info=None):
+        """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller.
+
+        INSTRUCTIONS:
+            You do NOT need to re-implement this method for the IROS 2022 Safe Robot Learning competition.
+            Only re-implement this method when `use_firmware` == False to return the target position and velocity.
+
+        Args:
+            time (float): Episode's elapsed time, in seconds.
+            obs (ndarray): The quadrotor's state [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r].
+            reward (float, optional): The reward signal.
+            done (bool, optional): Wether the episode has terminated.
+            info (dict, optional): Current step information as a dictionary with keys
+                'constraint_violation', 'current_target_gate_pos', etc.
+
+        Returns:
+            List: target position (len == 3).
+            List: target velocity (len == 3).
+
+        """
+        self.episode_iteration+=1
+        if self.ctrl is None:
+            raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.")
+
+        self.iteration = int(time*self.CTRL_FREQ)
+        self.m_slam.update_occ_map(info)
+        # import pdb;pdb.set_trace()
+        #########################
+        self.interstep_counter += 1
+        if self.episode_iteration <= 40:
+            target_p = np.array([-0.9,-2.9,1])
+        else :
+            all_state=self.get_state(obs,info)
+            global_state=all_state[0]
+            if self.interepisode_counter < 30:
+                action= self.action_space.sample() 
+            else:
+                action = self.policy.select_action(all_state, exploration=True)  # array  delta_x , delta_y, delta_z
+            action /= 10
+            self.current_all_state=all_state
+            self.current_action=action
+            target_p = global_state[[0,1,2]] + action
+
+        target_v = np.zeros(3)
+        #########################
+
+        return target_p, target_v
+
+    @timing_step
+    def interStepLearn(self,args,obs,reward,done,info):
+        """Learning and controller updates called between control steps.
+
+        INSTRUCTIONS:
+            Use the historically collected information in the five data buffers of actions, observations,
+            rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan.
+
+        Args:
+            action (List): Most recent applied action.
+            obs (List): Most recent observation of the quadrotor state.
+            reward (float): Most recent reward.
+            done (bool): Most recent done flag.
+            info (dict): Most recent information dictionary.
+
+        """
+        self.interstep_counter += 1
+        reward=0
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+        if sim_only==False:
+            # add experience when use network to decide
+            if  self.episode_iteration == 3 * self.CTRL_FREQ:
+                self.last_all_state=self.current_all_state
+                self.last_action = self.current_action
+                # import pdb;pdb.set_trace()
+
+            if  self.episode_iteration> 3 * self.CTRL_FREQ   :
+                if self.episode_iteration % (30*self.net_work_freq) ==0:
+                    last_pos= self.last_all_state[0][[0,1,2]]
+                    current_pos=self.current_all_state[0][[0,1,2]]
+
+                    last2goal_vector= self.last_all_state[0][[3,4,5]]
+                    last2cur_vector=current_pos-last_pos
+                    cur2goal_vector=self.current_all_state[0][[3,4,5]]
+
+                    # std_last2goal_vector=last2goal_vector/(min(abs(last2goal_vector)))
+                    # std_last2goal_vector=np.array([max(min(_,1.),-1.) for _ in std_last2goal_vector])
+
+                    # std_last2cur_vector=last2cur_vector/(min(abs(last2cur_vector)))
+                    # std_last2cur_vector=np.array([max(min(_,1.),-1.) for _ in std_last2cur_vector])
+                
+                    cur2goal_dis=sum(cur2goal_vector * cur2goal_vector)
+                    last2goal_dis=sum(last2goal_vector * last2goal_vector)
+                    # import pdb;pdb.set_trace()
+
+                    # 对于跨过门动作 给一个大的奖励
+                    if self.last_all_state[0][-2] == self.current_all_state[0][-2]:
+                            reward =( last2goal_dis - cur2goal_dis ) * 20
+                    else:
+                        reward = 10
+                    # cross the gate
+                    if info['current_target_gate_id']!=self.target_gate_id :
+                        reward += 100
+                        print(f"STEP{self.episode_iteration} , step gate{self.target_gate_id}")
+                        self.get_offset(info)
+                    if info['at_goal_position']:
+                        reward += 100
+                    if info['constraint_violation'] :
+                        reward -= 10
+                    if info["collision"][1] :
+                        reward -= 10
+                    if info["collision"][1]:
+                        self.collisions_count += 1 
+                        self.episode_cost+=1
+                    if 'constraint_values' in info and info['constraint_violation'] == True:
+                        self.violations_count += 1
+                        self.episode_cost+=1
+                    # cmdFullState
+                    self.replay_buffer.add(self.last_all_state[0],self.last_all_state[1],self.last_action * 10 ,self.current_all_state[0],self.current_all_state[1],reward,done)
+                    if self.episode_iteration % 900 ==0  :
+                        print(f"Step{self.episode_iteration} add buffer:\nlast_pos:{last_pos} aim vector: {last2goal_vector} ")
+                        print(f"action_infer: {self.last_action * 10}\t lastDis-CurDis:{( last2goal_dis - cur2goal_dis )}")
+                        print(f"last2cur_pos_vector: {last2cur_vector } \t reward: {reward}")
+                        print(f"target_gate_id:{info['current_target_gate_id']} ; pos: {info['current_target_gate_pos']}")
+                        print("*************************************************************************************")
+                    # import pdb;pdb.set_trace()
+                    self.episode_reward+=reward
+
+                    # ready for next update
+                    self.last_all_state=self.current_all_state
+                    self.last_action=self.current_action
+                    self.target_gate_id= info['current_target_gate_id']
+                else :
+                    pass
+                
+                # change_Train_Num Better
+                # if self.interepisode_counter > 5:
+                #     self.policy.train(self.replay_buffer,batch_size=128,train_nums=int(5))
+        
+            if self.replay_buffer.size==1e4:
+                self.replay_buffer.write()
+                print("write done")
+                import sys
+                sys.exit(0)
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+    @timing_ep
+    def interEpisodeLearn(self,info):
+        """Learning and controller updates called between episodes.
+
+        INSTRUCTIONS:
+            Use the historically collected information in the five data buffers of actions, observations,
+            rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan.
+
+        """
+        self.interepisode_counter += 1
+        
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+
+        # if self.interepisode_counter >= 20 :
+        #     self.policy.train(self.replay_buffer,batch_size=256,train_nums=(30 /self.net_work_freq ))
+
+        if self.interepisode_counter % 100 == 0 :
+            self.policy.save(filename=f"{self.logger_plus.log_dir}/{self.interepisode_counter}")
+
+        print(f"Episode Num: {self.interepisode_counter}  Reward: {self.episode_reward:.3f} Cost: {self.episode_cost:.3f} violation: {self.violations_count:.3f}  collision:{self.collisions_count:.3f} ,")
+        print(f"gates_passed:{info['current_target_gate_id']},at_goal_position : {info['at_goal_position']}  task_completed: {info['task_completed']}")
+        self.logger_plus.update([self.episode_reward, self.episode_cost,self.collisions_count,self.violations_count,info['current_target_gate_id']], total_steps=self.interepisode_counter)
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+    def reset(self):
+        # Counters.
+        self.interstep_counter = 0
+        self.interepisode_counter = 0
+
+    def interEpisodeReset(self):
+        """Initialize/reset learning timing variables.
+
+        Called between episodes in `getting_started.py`.
+
+        """
+        # Timing stats variables.
+        self.interstep_learning_time = 0
+        self.interstep_learning_occurrences = 0
+        self.interepisode_learning_time = 0
+        self.episode_reward = 0
+        self.episode_cost = 0
+        self.collisions_count=self.violations_count=0
+        self.episode_iteration=-1
+        # env-based variable
+        self.target_gate_id=0
+        self.m_slam.reset_occ_map()
+        self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.target_offset=np.array([0,0,0])
+
diff --git a/competition/edit_this.py b/competition/edit_this.py
index e61f25d55..2a2c4457c 100644
--- a/competition/edit_this.py
+++ b/competition/edit_this.py
@@ -24,8 +24,10 @@
         4) interEpisodeLearn (optional)
 
 """
+from bdb import set_trace
+from tkinter import Y
 import numpy as np
-
+import time
 from collections import deque
 
 try:
@@ -33,7 +35,19 @@
 except ImportError:
     # PyTest import.
     from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory
+from gym.spaces import Box
+
+from safetyplusplus_folder.slam import SLAM
+from safetyplusplus_folder.plus_logger import SafeLogger
+import random
+
+
+file_name='Submit'
 
+Pretrain=False
+test=True
+sim_only=False
+model_name='submit_models/1016_01_1800'
 #########################
 # REPLACE THIS (START) ##
 #########################
@@ -55,6 +69,7 @@ class Controller():
 
     """
 
+
     def __init__(self,
                  initial_obs,
                  initial_info,
@@ -86,11 +101,9 @@ def __init__(self,
         self.initial_obs = initial_obs
         self.VERBOSE = verbose
         self.BUFFER_SIZE = buffer_size
-
-        # Store a priori scenario information.
         self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"]
-        self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"]
-
+        self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"] 
+        
         # Check for pycffirmware.
         if use_firmware:
             self.ctrl = None
@@ -101,71 +114,131 @@ def __init__(self,
             # Save additonal environment parameters.
             self.KF = initial_info["quadrotor_kf"]
 
-        # Reset counters and buffers.
-        self.reset()
-        self.interEpisodeReset()
+
 
         #########################
         # REPLACE THIS (START) ##
         #########################
+        import torch
+        torch.manual_seed(101)
+        torch.cuda.manual_seed(101)
+        torch.cuda.manual_seed_all(101)
+        np.random.seed(101)
+        random.seed(101)
+        self.begin_train_seconds=1.5
+        self.begin_net_infer_epo=80
+        self.begin_train_epo=100
+        self.net_work_freq=0.5     #  time gap  1  1s/次  0.5s/次   0.2m  400episode 
+        max_action=2
+        self.global_state_dim = 9
+        self.set_offset=True
+        self.batch_size=128
+        
+        # state-based 
+        self.mass=initial_info['nominal_physical_parameters']['quadrotor_mass']
+        self.local_state_dim=[5,23,23]
+        self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        
+        self.m_slam = SLAM()
+        self.m_slam.reset_occ_map()
+
+        # action    
+        action_dim = 3
+        min_action=max_action * (-1)
+        self.action_space=Box(np.array([min_action,min_action,min_action],dtype=np.float64),np.array([max_action,max_action,max_action],dtype=np.float64))
+        self.action_space.seed(1)
+        self.current_action=np.zeros(action_dim)
+        self.last_action=np.zeros(action_dim)
+        # network
+        kwargs = {
+            "state_dim": self.global_state_dim,
+            "action_dim": action_dim,
+            "max_action": max_action,
+            "rew_discount": 0.99,
+            "tau":0.005,
+            "policy_noise": 0.2 * max_action,
+            "noise_clip": 0.5 * max_action,
+            "policy_freq": 2,
+        }
+        from safetyplusplus_folder import safetyplusplus,replay_buffer,unconstrained
+        # td3 policy
+        self.policy = unconstrained.TD3(**kwargs)
+        self.replay_buffer = replay_buffer.IrosReplayBuffer(self.global_state_dim,self.local_state_dim,action_dim,load=False)
+        if test or Pretrain:
+            self.policy.load(model_name)
+            print("load ok ")
+
+        # env-based variable
+        self.info=None
+        self.cur2goal_dis=0
+        self.episode_reward = 0
+        self.episode_cost = 0
+        self.collisions_count=self.violations_count=0
+        self.episode_iteration=-1
+        self.target_gate_id=0
+        self.goal_pos=[initial_info['x_reference'][0],initial_info['x_reference'][2],initial_info['x_reference'][4]]
+        self.target_offset=np.array([0,0,0])
+        self.trigger=False
+        self.get_offset(info=None)
+        self.rule_control_time=0
+        # Reset counters and buffers.
+        self.reset()
+        self.interEpisodeReset()
 
-        # Call a function in module `example_custom_utils`.
-        ecu.exampleFunction()
-
-        # Example: hardcode waypoints through the gates.
-        if use_firmware:
-            waypoints = [(self.initial_obs[0], self.initial_obs[2], initial_info["gate_dimensions"]["tall"]["height"])]  # Height is hardcoded scenario knowledge.
-        else:
-            waypoints = [(self.initial_obs[0], self.initial_obs[2], self.initial_obs[4])]
-        for idx, g in enumerate(self.NOMINAL_GATES):
-            height = initial_info["gate_dimensions"]["tall"]["height"] if g[6] == 0 else initial_info["gate_dimensions"]["low"]["height"]
-            if g[5] > 0.75 or g[5] < 0:
-                if idx == 2:  # Hardcoded scenario knowledge (direction in which to take gate 2).
-                    waypoints.append((g[0]+0.3, g[1]-0.3, height))
-                    waypoints.append((g[0]-0.3, g[1]-0.3, height))
-                else:
-                    waypoints.append((g[0]-0.3, g[1], height))
-                    waypoints.append((g[0]+0.3, g[1], height))
-            else:
-                if idx == 3:  # Hardcoded scenario knowledge (correct how to take gate 3).
-                    waypoints.append((g[0]+0.1, g[1]-0.3, height))
-                    waypoints.append((g[0]+0.1, g[1]+0.3, height))
-                else:
-                    waypoints.append((g[0], g[1]-0.3, height))
-                    waypoints.append((g[0], g[1]+0.3, height))
-        waypoints.append([initial_info["x_reference"][0], initial_info["x_reference"][2], initial_info["x_reference"][4]])
-
-        # Polynomial fit.
-        self.waypoints = np.array(waypoints)
-        deg = 6
-        t = np.arange(self.waypoints.shape[0])
-        fx = np.poly1d(np.polyfit(t, self.waypoints[:,0], deg))
-        fy = np.poly1d(np.polyfit(t, self.waypoints[:,1], deg))
-        fz = np.poly1d(np.polyfit(t, self.waypoints[:,2], deg))
-        duration = 15
-        t_scaled = np.linspace(t[0], t[-1], int(duration*self.CTRL_FREQ))
-        self.ref_x = fx(t_scaled)
-        self.ref_y = fy(t_scaled)
-        self.ref_z = fz(t_scaled)
-
-        if self.VERBOSE:
-            # Plot trajectory in each dimension and 3D.
-            plot_trajectory(t_scaled, self.waypoints, self.ref_x, self.ref_y, self.ref_z)
-
-            # Draw the trajectory on PyBullet's GUI.
-            draw_trajectory(initial_info, self.waypoints, self.ref_x, self.ref_y, self.ref_z)
-
+        # logger
+        self.logger_plus = SafeLogger(exp_name=file_name, env_name="compitition", seed=0,
+                                fieldnames=['Eptime','EpRet', 'EpCost', 'collision_num','vilation_num','target_gate','dis'])   
         #########################
         # REPLACE THIS (END) ####
         #########################
-
-    def cmdFirmware(self,
-                    time,
-                    obs,
-                    reward=None,
-                    done=None,
-                    info=None
-                    ):
+        
+
+    def get_offset(self,info):
+        # init
+        if info is None:
+             self.target_offset=np.array([0.05,0,0])
+        # step cross gate
+        elif info['current_target_gate_id'] == 1 or info['current_target_gate_id'] == 3:
+            self.target_offset=np.array([0,0.05,0])
+        elif info['current_target_gate_id'] == 2:
+            self.target_offset=np.array([-0.05,0,0])
+        else:
+            self.target_offset=np.array([0,0,0])
+
+    def get_state(self,obs,info):
+        # state info :+ obs_info(3) + goal_info(3) + gate_in_range(1)+target_gate_id(1) + mass(1) + pic_info(16)  
+        # x,y,z  3 
+        current_pos=[obs[0],obs[2],obs[4]]
+
+        if info !={}:
+            # goal [x,y,z]
+            if info['current_target_gate_id'] == -1 :
+                current_goal_pos = self.goal_pos
+                current_target_gate_in_range= 0 
+            else :
+                # 1 or 0  1
+                current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0
+                # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy  (r,p is zero ,ignored )
+                current_target_gate_pos = info['current_target_gate_pos']
+                if current_target_gate_pos[2]==0: #  means that  current_target_gate_in_range is False, add default height.
+                    current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525
+                current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]]
+                current_goal_pos=current_target_gate_pos[:3]
+            if self.set_offset:
+                current_goal_pos += self.target_offset
+        else :
+            current_target_gate_in_range= 0 
+            current_goal_pos=np.zeros(3)
+        target_vector=[current_goal_pos[0]- current_pos[0],current_goal_pos[1]- current_pos[1],current_goal_pos[2]- current_pos[2]]
+        # 10.09 V0.1
+        global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2],
+                               current_target_gate_in_range,info['current_target_gate_id'],self.mass])
+        # global_state=np.array([current_pos[0], current_pos[1], current_pos[2],target_vector[0],target_vector[1],target_vector[2],self.mass])    
+        local_state = self.m_slam.generate_3obs_img(obs,target_vector,name=self.episode_iteration,save=False if self.episode_iteration % 20!=0 else True)   
+        return [global_state,local_state]
+           
+    def cmdFirmware(self,ctime,obs,reward=None,done=None,info=None,exploration=True):
         """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface.
 
         INSTRUCTIONS:
@@ -185,87 +258,102 @@ def cmdFirmware(self,
             List: arguments for the type of command (see comments in class `Command`)
 
         """
+        self.episode_iteration+=1
+       
         if self.ctrl is not None:
             raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.")
 
-        iteration = int(time*self.CTRL_FREQ)
-
         #########################
         # REPLACE THIS (START) ##
         #########################
-
-        # Handwritten solution for GitHub's getting_stated scenario.
-
-        if iteration == 0:
+        self.m_slam.update_occ_map(info)
+        
+        
+        # begin with take off 
+        if self.episode_iteration == 0:
             height = 1
-            duration = 2
-
+            duration = 1.5
             command_type = Command(2)  # Take-off.
             args = [height, duration]
-
-        elif iteration >= 3*self.CTRL_FREQ and iteration < 20*self.CTRL_FREQ:
-            step = min(iteration-3*self.CTRL_FREQ, len(self.ref_x) -1)
-            target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]])
+        elif info!={} and info['current_target_gate_id'] == -1 and self.episode_iteration % 5 ==0:
+            self.rule_control_time+=1
+            # navigate to the goal_pos.and stop
+            command_type =  Command(1)
+            if self.rule_control_time <=5: 
+                target_pos = np.array(self.goal_pos)
+                target_pos[1] -=0.4 
+            elif self.rule_control_time <=20: 
+                target_pos = np.array(self.goal_pos)
+                target_pos[1] -=0.2
+            else:
+                target_pos = np.array(self.goal_pos)
             target_vel = np.zeros(3)
             target_acc = np.zeros(3)
             target_yaw = 0.
             target_rpy_rates = np.zeros(3)
-
-            command_type = Command(1)  # cmdFullState.
-            args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
-
-        elif iteration == 20*self.CTRL_FREQ:
-            command_type = Command(6)  # Notify setpoint stop.
-            args = []
-
-        elif iteration == 20*self.CTRL_FREQ+1:
-            x = self.ref_x[-1]
-            y = self.ref_y[-1]
-            z = 1.5 
-            yaw = 0.
-            duration = 2.5
-
-            command_type = Command(5)  # goTo.
-            args = [[x, y, z], yaw, duration, False]
-
-        elif iteration == 23*self.CTRL_FREQ:
-            x = self.initial_obs[0]
-            y = self.initial_obs[2]
-            z = 1.5
-            yaw = 0.
-            duration = 6
-
-            command_type = Command(5)  # goTo.
-            args = [[x, y, z], yaw, duration, False]
-
-        elif iteration == 30*self.CTRL_FREQ:
-            height = 0.
-            duration = 3
-
-            command_type = Command(3)  # Land.
-            args = [height, duration]
-
-        elif iteration == 33*self.CTRL_FREQ-1:
-            command_type = Command(-1)  # Terminate command to be sent once the trajectory is completed.
-            args = []
-
-        else:
+            args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+
+            if  self.episode_iteration % (30*self.net_work_freq) ==0:
+                all_state=self.get_state(obs,info)
+                global_state=all_state[0]
+                action=target_pos-global_state[[0,1,2]]
+                action /= 10
+                
+                self.current_all_state=all_state
+                self.current_action=action
+        # using network to choose action   
+        elif self.episode_iteration >= self.begin_train_seconds * self.CTRL_FREQ and self.episode_iteration % (30*self.net_work_freq) ==0 :
+            # cmdFullState
+            command_type =  Command(1)  
+            all_state=self.get_state(obs,info)
+            global_state=all_state[0]
+            if not (test or Pretrain) and self.interepisode_counter < self.begin_net_infer_epo:
+                action= self.action_space.sample() 
+            else:
+                action = self.policy.select_action(all_state, exploration=True if not test else False)  # array  delta_x , delta_y, delta_z
+            action /= 10
+            target_pos = global_state[[0,1,2]] + action
+            self.current_all_state=all_state
+            self.current_action=action
+            target_vel = np.zeros(3)
+            target_acc = np.zeros(3)
+            target_yaw = 0.
+            target_rpy_rates = np.zeros(3)
+            args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+        # other time do nothing
+        else :
             command_type = Command(0)  # None.
             args = []
 
+        # if test and info !={}:
+        #     all_state=self.get_state(obs,info)
+        #     global_state=all_state[0]
+        #     current_local_space=all_state[1][2-1:2+2,11-1:11+2,11-1:11+2]
+        #     if (current_local_space<0).any():
+        #         command_type =  Command(1)  
+        #         if info['current_target_gate_id'] ==-1:
+        #             last_gate_id=3
+        #         elif info['current_target_gate_id'] ==0:
+        #             last_gate_id=0
+        #         else:
+        #             last_gate_id=info['current_target_gate_id']-1
+        #         last_gate_pos=self.NOMINAL_GATES[last_gate_id]
+        #         last_gate_pos[2]=1 if last_gate_pos[-1]==0 else 0.525
+        #         last_gate_pos=np.array(last_gate_pos[0:3])
+        #         target_pos=last_gate_pos
+        #         target_vel = np.zeros(3)
+        #         target_acc = np.zeros(3)
+        #         target_yaw = 0.
+        #         target_rpy_rates = np.zeros(3)
+        #         args=[target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
         #########################
         # REPLACE THIS (END) ####
         #########################
-
+        
         return command_type, args
 
-    def cmdSimOnly(self,
-                   time,
-                   obs,
-                   reward=None,
-                   done=None,
-                   info=None
-                   ):
+    # NOT need to re-implement
+    def cmdSimOnly(self,time,obs,reward=None,done=None,info=None):
         """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller.
 
         INSTRUCTIONS:
@@ -285,28 +373,36 @@ def cmdSimOnly(self,
             List: target velocity (len == 3).
 
         """
+        self.episode_iteration+=1
         if self.ctrl is None:
             raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.")
 
-        iteration = int(time*self.CTRL_FREQ)
+        self.iteration = int(time*self.CTRL_FREQ)
+        self.m_slam.update_occ_map(info)
 
         #########################
-        if iteration < len(self.ref_x):
-            target_p = np.array([self.ref_x[iteration], self.ref_y[iteration], self.ref_z[iteration]])
-        else:
-            target_p = np.array([self.ref_x[-1], self.ref_y[-1], self.ref_z[-1]])
+        self.interstep_counter += 1
+        if self.episode_iteration <= 40:
+            target_p = np.array([-0.9,-2.9,1])
+        else :
+            all_state=self.get_state(obs,info)
+            global_state=all_state[0]
+            if self.interepisode_counter < 30:
+                action= self.action_space.sample() 
+            else:
+                action = self.policy.select_action(all_state, exploration=True)  # array  delta_x , delta_y, delta_z
+            action /= 10
+            self.current_all_state=all_state
+            self.current_action=action
+            target_p = global_state[[0,1,2]] + action
+
         target_v = np.zeros(3)
         #########################
 
         return target_p, target_v
 
     @timing_step
-    def interStepLearn(self,
-                       action,
-                       obs,
-                       reward,
-                       done,
-                       info):
+    def interStepLearn(self,args,obs,reward,done,info):
         """Learning and controller updates called between control steps.
 
         INSTRUCTIONS:
@@ -322,20 +418,82 @@ def interStepLearn(self,
 
         """
         self.interstep_counter += 1
-
-        # Store the last step's events.
-        self.action_buffer.append(action)
-        self.obs_buffer.append(obs)
-        self.reward_buffer.append(reward)
-        self.done_buffer.append(done)
-        self.info_buffer.append(info)
-
+        reward=0
         #########################
         # REPLACE THIS (START) ##
         #########################
-
-        pass
-
+        if not sim_only:
+            # add experience when use network to decide
+            if  self.episode_iteration == self.begin_train_seconds * self.CTRL_FREQ:
+                self.last_all_state=self.current_all_state
+                self.last_action = self.current_action
+
+            if  self.episode_iteration> self.begin_train_seconds * self.CTRL_FREQ   :
+                if info['constraint_violation']:
+                    self.reward -= 50
+                if info["collision"][1]:
+                    self.reward -= 100
+                    self.collisions_count += 1 
+                    self.episode_cost+=1
+                if 'constraint_values' in info and info['constraint_violation'] == True:
+                    self.violations_count += 1
+                    self.episode_cost+=1
+                # if info['constraint_violation'] and (  test or self.interepisode_counter> 100 ) :
+                #     print(f"step{self.episode_iteration} , constraint_violation : current pos : {current_pos}")
+                # if info["collision"][1] and (  test or self.interepisode_counter> 100 ):
+                #     print(info["collision"])
+                
+                if self.episode_iteration % (30*self.net_work_freq) ==0:
+                    last_pos= self.last_all_state[0][[0,1,2]]
+                    current_pos=self.current_all_state[0][[0,1,2]]
+                    current_local_space=self.current_all_state[1][2-1:2+2,11-1:11+2,11-1:11+2]
+
+                    last2goal_vector= self.last_all_state[0][[3,4,5]]
+                    last2cur_vector=current_pos-last_pos
+                    cur2goal_vector=self.current_all_state[0][[3,4,5]]
+                    cur2goal_dis=sum(cur2goal_vector * cur2goal_vector)
+                    last2goal_dis=sum(last2goal_vector * last2goal_vector)
+
+                    if self.target_gate_id == info['current_target_gate_id']:
+                        self.reward +=( last2goal_dis - cur2goal_dis ) * 20
+                    # cross the gate ,get the big reward
+                    else:
+                        self.reward = self.reward + 100 * ( (info['current_target_gate_id'] if info['current_target_gate_id']!=-1 else 4) +1)
+                        print(f"STEP{self.episode_iteration} , step gate{self.target_gate_id}")
+                        if info['current_target_gate_id']==-1:
+                            print("step all gates")
+                            self.trigger=True
+                        self.get_offset(info)
+                    if info['at_goal_position']:
+                        self.reward += 100
+                    # if (current_local_space<0).any():
+                    #     reward -= 5    
+                       
+                    # cmdFullState
+                    self.replay_buffer.add(self.last_all_state[0],self.last_all_state[1],self.last_action * 10 ,self.current_all_state[0],self.current_all_state[1],self.reward,done)
+                    if self.episode_iteration % 900 ==0  and not test:
+                        print(f"Step{self.episode_iteration} add buffer:\nlast_pos:{last_pos} aim vector: {last2goal_vector} ")
+                        print(f"action_infer: {self.last_action * 10}\t lastDis-CurDis:{( last2goal_dis - cur2goal_dis )}")
+                        print(f"last2cur_pos_vector: {last2cur_vector } \t reward: {self.reward}")
+                        print(f"target_gate_id:{info['current_target_gate_id']} ; pos: {info['current_target_gate_pos']} ; distance : {cur2goal_dis}")
+                        print("*************************************************************************************")
+                    self.episode_reward+=self.reward
+
+                    # ready for next update
+                    self.last_all_state=self.current_all_state
+                    self.last_action=self.current_action
+                    self.target_gate_id= info['current_target_gate_id']
+                    self.cur2goal_dis=cur2goal_dis
+                    self.info=info
+                    self.reward=0
+                    
+                    
+                else :
+                    pass
+            # change_Train_Num Better
+            if (self.interepisode_counter > self.begin_train_epo  and not test) or ( Pretrain and self.interepisode_counter > 20) :
+                # self.policy.train(self.replay_buffer,batch_size=min(128,64*int(self.interepisode_counter/100+1)),train_nums=int(1))   
+                self.policy.train(self.replay_buffer,batch_size=self.batch_size,train_nums=int(1)) 
         #########################
         # REPLACE THIS (END) ####
         #########################
@@ -350,34 +508,23 @@ def interEpisodeLearn(self):
 
         """
         self.interepisode_counter += 1
-
+        
         #########################
         # REPLACE THIS (START) ##
         #########################
 
-        _ = self.action_buffer
-        _ = self.obs_buffer
-        _ = self.reward_buffer
-        _ = self.done_buffer
-        _ = self.info_buffer
+        if self.interepisode_counter % 300 == 0 or self.interepisode_counter==1000:
+            self.policy.save(filename=f"{self.logger_plus.log_dir}/{self.interepisode_counter}")
+
+        print(f"Episode Num: {self.interepisode_counter}  step Num : {self.episode_iteration} ,Reward: {self.episode_reward:.3f} Cost: {self.episode_cost:.3f} violation: {self.violations_count:.3f}  collision:{self.collisions_count:.3f} ,")
+        print(f"gates_passed:{self.info['current_target_gate_id']},at_goal_position : {self.info['at_goal_position']}  task_completed: {self.info['task_completed']}")
+        self.logger_plus.update([self.episode_reward, self.episode_cost,self.collisions_count,self.violations_count,self.info['current_target_gate_id'],self.cur2goal_dis], total_steps=self.interepisode_counter)
 
         #########################
         # REPLACE THIS (END) ####
         #########################
 
     def reset(self):
-        """Initialize/reset data buffers and counters.
-
-        Called once in __init__().
-
-        """
-        # Data buffers.
-        self.action_buffer = deque([], maxlen=self.BUFFER_SIZE)
-        self.obs_buffer = deque([], maxlen=self.BUFFER_SIZE)
-        self.reward_buffer = deque([], maxlen=self.BUFFER_SIZE)
-        self.done_buffer = deque([], maxlen=self.BUFFER_SIZE)
-        self.info_buffer = deque([], maxlen=self.BUFFER_SIZE)
-
         # Counters.
         self.interstep_counter = 0
         self.interepisode_counter = 0
@@ -392,3 +539,19 @@ def interEpisodeReset(self):
         self.interstep_learning_time = 0
         self.interstep_learning_occurrences = 0
         self.interepisode_learning_time = 0
+        self.episode_reward = 0
+        self.episode_cost = 0
+        self.collisions_count=self.violations_count=0
+        self.episode_iteration=-1
+        # env-based variable
+        self.target_gate_id=0
+        self.m_slam.reset_occ_map()
+        self.current_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.last_all_state=[np.zeros(self.global_state_dim),np.zeros(self.local_state_dim)]
+        self.target_offset=np.array([0,0,0])
+        self.cur2goal_dis=0
+        self.info=None
+        self.trigger=False
+        self.rule_control_time=0
+        self.reward=0
+
diff --git a/competition/edit_this_base.py b/competition/edit_this_base.py
new file mode 100644
index 000000000..d1c2af38a
--- /dev/null
+++ b/competition/edit_this_base.py
@@ -0,0 +1,376 @@
+"""Write your control strategy.
+
+Then run:
+
+    $ python3 getting_started.py --overrides ./getting_started.yaml
+
+Tips:
+    Search for strings `INSTRUCTIONS:` and `REPLACE THIS (START)` in this file.
+
+    Change the code between the 4 blocks starting with
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+    and ending with
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+    with your own code.
+
+    They are in methods:
+        1) __init__
+        2) cmdFirmware
+        3) interStepLearn (optional)
+        4) interEpisodeLearn (optional)
+
+"""
+import numpy as np
+
+from collections import deque
+
+try:
+    from competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory
+except ImportError:
+    # Test import.
+    from .competition_utils import Command, PIDController, timing_step, timing_ep, plot_trajectory, draw_trajectory
+
+
+class Controller():
+    """Template controller class.
+
+    """
+
+    def __init__(self,
+                 initial_obs,
+                 initial_info,
+                 use_firmware: bool = False,
+                 buffer_size: int = 100,
+                 verbose: bool = False
+                 ):
+        """Initialization of the controller.
+
+        INSTRUCTIONS:
+            The controller's constructor has access the initial state `initial_obs` and the a priori infromation
+            contained in dictionary `initial_info`. Use this method to initialize constants, counters, pre-plan
+            trajectories, etc.
+
+        Args:
+            initial_obs (ndarray): The initial observation of the quadrotor's state
+                [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r].
+            initial_info (dict): The a priori information as a dictionary with keys
+                'symbolic_model', 'nominal_physical_parameters', 'nominal_gates_pos_and_type', etc.
+            use_firmware (bool, optional): Choice between the on-board controll in `pycffirmware`
+                or simplified software-only alternative.
+            buffer_size (int, optional): Size of the data buffers used in method `learn()`.
+            verbose (bool, optional): Turn on and off additional printouts and plots.
+
+        """
+        # Save environment and conrol parameters.
+        self.CTRL_TIMESTEP = initial_info["ctrl_timestep"]
+        self.CTRL_FREQ = initial_info["ctrl_freq"]
+        self.initial_obs = initial_obs
+        self.VERBOSE = verbose
+        self.BUFFER_SIZE = buffer_size
+
+        # Store a priori scenario information.
+        self.NOMINAL_GATES = initial_info["nominal_gates_pos_and_type"]
+        self.NOMINAL_OBSTACLES = initial_info["nominal_obstacles_pos"]
+
+        # Check for pycffirmware.
+        if use_firmware:
+            self.ctrl = None
+        else:
+            # Initialize a simple PID Controller ror debugging and test
+            # Do NOT use for the IROS 2022 competition. 
+            self.ctrl = PIDController()
+            # Save additonal environment parameters.
+            self.KF = initial_info["quadrotor_kf"]
+
+        # Reset counters and buffers.
+        self.reset()
+        self.interEpisodeReset()
+
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+
+        # Example: harcode waypoints through the gates.
+        if use_firmware:
+            waypoints = [(self.initial_obs[0], self.initial_obs[2], initial_info["gate_dimensions"]["tall"]["height"])]  # Height is hardcoded scenario knowledge.
+        else:
+            waypoints = [(self.initial_obs[0], self.initial_obs[2], self.initial_obs[4])]
+        for idx, g in enumerate(self.NOMINAL_GATES):
+            height = initial_info["gate_dimensions"]["tall"]["height"] if g[6] == 0 else initial_info["gate_dimensions"]["low"]["height"]
+            if g[5] > 0.75 or g[5] < 0:
+                if idx == 2:  # Hardcoded scenario knowledge (direction in which to take gate 2).
+                    waypoints.append((g[0]+0.3, g[1]-0.3, height))
+                    waypoints.append((g[0]-0.3, g[1]-0.3, height))
+                else:
+                    waypoints.append((g[0]-0.3, g[1], height))
+                    waypoints.append((g[0]+0.3, g[1], height))
+            else:
+                if idx == 3:  # Hardcoded scenario knowledge (correct how to take gate 3).
+                    waypoints.append((g[0]+0.1, g[1]-0.3, height))
+                    waypoints.append((g[0]+0.1, g[1]+0.3, height))
+                else:
+                    waypoints.append((g[0], g[1]-0.3, height))
+                    waypoints.append((g[0], g[1]+0.3, height))
+        waypoints.append([initial_info["x_reference"][0], initial_info["x_reference"][2], initial_info["x_reference"][4]])
+
+        # Polynomial fit
+        self.waypoints = np.array(waypoints)
+        deg = 6
+        t = np.arange(self.waypoints.shape[0])
+        fx = np.poly1d(np.polyfit(t, self.waypoints[:,0], deg))
+        fy = np.poly1d(np.polyfit(t, self.waypoints[:,1], deg))
+        fz = np.poly1d(np.polyfit(t, self.waypoints[:,2], deg))
+        duration = 15
+        t_scaled = np.linspace(t[0], t[-1], int(duration*self.CTRL_FREQ))
+        self.ref_x = fx(t_scaled)
+        self.ref_y = fy(t_scaled)
+        self.ref_z = fz(t_scaled)
+
+        if self.VERBOSE:
+            # Plot trajectory in each dimension and 3D.
+            plot_trajectory(t_scaled, self.waypoints, self.ref_x, self.ref_y, self.ref_z)
+
+            # Draw the trajectory on PyBullet's GUI
+            draw_trajectory(initial_info, self.waypoints, self.ref_x, self.ref_y, self.ref_z)
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+    def cmdFirmware(self,
+                    time,
+                    obs,
+                    reward=None,
+                    done=None,
+                    info=None
+                    ):
+        """Pick command sent to the quadrotor through a Crazyswarm/Crazyradio-like interface.
+
+        INSTRUCTIONS:
+            Re-implement this method to return the target position, velocity, acceleration, attitude, and attitude rates to be sent
+            from Crazyswarm to the Crazyflie using, e.g., a `cmdFullState` call.
+
+        Args:
+            time (float): Episode's elapsed time, in seconds.
+            obs (ndarray): The quadrotor's Vicon data [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0].
+            reward (float, optional): The reward signal.
+            done (bool, optional): Wether the episode has terminated.
+            info (dict, optional): Current step information as a dictionary with keys
+                'constraint_violation', 'current_target_gate_pos', etc.
+
+        Returns:
+            Command: selected type of command (takeOff, cmdFullState, etc., see Enum-like class `Command`).
+            List: arguments for the type of command (see comments in class `Command`)
+
+        """
+        if self.ctrl is not None:
+            raise RuntimeError("[ERROR] Using method 'cmdFirmware' but Controller was created with 'use_firmware' = False.")
+
+        iteration = int(time*self.CTRL_FREQ)
+        # print(iteration)
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+
+        # Handwritten solution for GitHub's getting_stated scenario.
+
+        if iteration == 0:
+            height = 1
+            duration = 2
+
+            command_type = Command(2)  # Take-off.
+            args = [height, duration]
+
+        elif iteration >= 3*self.CTRL_FREQ and iteration < 20*self.CTRL_FREQ:
+            step = min(iteration-3*self.CTRL_FREQ, len(self.ref_x) -1)
+            target_pos = np.array([self.ref_x[step], self.ref_y[step], self.ref_z[step]])
+            target_vel = np.zeros(3)
+            target_acc = np.zeros(3)
+            target_yaw = 0.
+            target_rpy_rates = np.zeros(3)
+
+            command_type = Command(1)  # cmdFullState.
+            args = [target_pos, target_vel, target_acc, target_yaw, target_rpy_rates]
+
+        elif iteration == 20*self.CTRL_FREQ:
+            command_type = Command(6)  # notify setpoint stop.
+            args = []
+
+        elif iteration == 20*self.CTRL_FREQ+1:
+            x = self.ref_x[-1]
+            y = self.ref_y[-1]
+            z = 1.5 
+            yaw = 0.
+            duration = 2.5
+
+            command_type = Command(5)  # goTo.
+            args = [[x, y, z], yaw, duration, False]
+
+        elif iteration == 23*self.CTRL_FREQ:
+            x = self.initial_obs[0]
+            y = self.initial_obs[2]
+            z = 1.5
+            yaw = 0.
+            duration = 6
+
+            command_type = Command(5)  # goTo.
+            args = [[x, y, z], yaw, duration, False]
+
+        elif iteration == 30*self.CTRL_FREQ:
+            height = 0.
+            duration = 3
+
+            command_type = Command(3)  # Land.
+            args = [height, duration]
+
+        elif iteration == 33*self.CTRL_FREQ-1:
+            command_type = Command(-1)  # Terminate command to be sent once trajectory is completed.
+            args = []
+
+        else:
+            command_type = Command(0)  # None.
+            args = []
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+        return command_type, args
+
+    def cmdSimOnly(self,
+                   time,
+                   obs,
+                   reward=None,
+                   done=None,
+                   info=None
+                   ):
+        """PID per-propeller thrusts with a simplified, software-only PID quadrotor controller.
+
+        INSTRUCTIONS:
+            You do NOT need to re-implement this method for the IROS 2022 Safe Robot Learning competition.
+            Only re-implement this method when `use_firmware` == False to return the target position and velocity.
+
+        Args:
+            time (float): Episode's elapsed time, in seconds.
+            obs (ndarray): The quadrotor's state [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r].
+            reward (float, optional): The reward signal.
+            done (bool, optional): Wether the episode has terminated.
+            info (dict, optional): Current step information as a dictionary with keys
+                'constraint_violation', 'current_target_gate_pos', etc.
+
+        Returns:
+            List: target position (len == 3).
+            List: target velocity (len == 3).
+
+        """
+        if self.ctrl is None:
+            raise RuntimeError("[ERROR] Attempting to use method 'cmdSimOnly' but Controller was created with 'use_firmware' = True.")
+
+        iteration = int(time*self.CTRL_FREQ)
+
+        #########################
+        if iteration < len(self.ref_x):
+            target_p = np.array([self.ref_x[iteration], self.ref_y[iteration], self.ref_z[iteration]])
+        else:
+            target_p = np.array([self.ref_x[-1], self.ref_y[-1], self.ref_z[-1]])
+        target_v = np.zeros(3)
+        #########################
+
+        return target_p, target_v
+
+    @timing_step
+    def interStepLearn(self,
+                       action,
+                       obs,
+                       reward,
+                       done,
+                       info):
+        """Learning and controller updates called between control steps.
+
+        INSTRUCTIONS:
+            Use the historically collected information in the five data buffers of actions, observations,
+            rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan.
+
+        Args:
+            action (List): Most recent applied action.
+            obs (List): Most recent observation of the quadrotor state.
+            reward (float): Most recent reward.
+            done (bool): Most recent done flag.
+            info (dict): Most recent information dictionary.
+
+        """
+        self.interstep_counter += 1
+
+        # Store the last step's events.
+        self.action_buffer.append(action)
+        self.obs_buffer.append(obs)
+        self.reward_buffer.append(reward)
+        self.done_buffer.append(done)
+        self.info_buffer.append(info)
+
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+
+        pass
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+    @timing_ep
+    def interEpisodeLearn(self,file_name):
+        """Learning and controller updates called between episodes.
+
+        INSTRUCTIONS:
+            Use the historically collected information in the five data buffers of actions, observations,
+            rewards, done flags, and information dictionaries to learn, adapt, and/or re-plan.
+
+        """
+        self.interepisode_counter += 1
+
+        #########################
+        # REPLACE THIS (START) ##
+        #########################
+
+        _ = self.action_buffer
+        _ = self.obs_buffer
+        _ = self.reward_buffer
+        _ = self.done_buffer
+        _ = self.info_buffer
+
+        #########################
+        # REPLACE THIS (END) ####
+        #########################
+
+    def reset(self):
+        """Initialize/reset data buffers and counters.
+
+        Called once in __init__().
+
+        """
+        # Data buffers.
+        self.action_buffer = deque([], maxlen=self.BUFFER_SIZE)
+        self.obs_buffer = deque([], maxlen=self.BUFFER_SIZE)
+        self.reward_buffer = deque([], maxlen=self.BUFFER_SIZE)
+        self.done_buffer = deque([], maxlen=self.BUFFER_SIZE)
+        self.info_buffer = deque([], maxlen=self.BUFFER_SIZE)
+
+        # Counters.
+        self.interstep_counter = 0
+        self.interepisode_counter = 0
+
+    def interEpisodeReset(self):
+        """Initialize/reset learning timing variables.
+
+        Called between episodes in `getting_started.py`.
+
+        """
+        # Timing stats variables.
+        self.interstep_learning_time = 0
+        self.interstep_learning_occurrences = 0
+        self.interepisode_learning_time = 0
\ No newline at end of file
diff --git a/competition/getting_started.py b/competition/getting_started.py
index ae2ef1951..97e417b92 100644
--- a/competition/getting_started.py
+++ b/competition/getting_started.py
@@ -15,7 +15,8 @@
 from functools import partial
 from rich.tree import Tree
 from rich import print
-
+import sys
+sys.path.append("..")
 from safe_control_gym.utils.configuration import ConfigFactory
 from safe_control_gym.utils.registration import make
 from safe_control_gym.utils.utils import sync
@@ -249,11 +250,11 @@ def run(test=False):
         # If an episode is complete, reset the environment.
         if done:
             # Plot logging (comment as desired).
-            if not test:
-                logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True)
+            # if not test:
+            #     logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True)
 
             # CSV save.
-            logger.save_as_csv(comment="get_start-episode-"+str(episodes_count))
+            # logger.save_as_csv(comment="get_start-episode-"+str(episodes_count))
 
             # Update the controller internal state and models.
             ctrl.interEpisodeLearn()
@@ -329,6 +330,7 @@ def run(test=False):
           ))
 
     # Print episodes summary.
+
     tree = Tree("Summary")
     for idx, ep in enumerate(stats):
         ep_tree = tree.add('Episode ' + str(idx+1))
@@ -339,4 +341,4 @@ def run(test=False):
     print('\n\n')
 
 if __name__ == "__main__":
-    run()
+    run()
\ No newline at end of file
diff --git a/competition/getting_started_my.py b/competition/getting_started_my.py
new file mode 100644
index 000000000..88c1d35ae
--- /dev/null
+++ b/competition/getting_started_my.py
@@ -0,0 +1,345 @@
+"""Base script.
+
+Run as:
+
+    $ python3 getting_started.py --overrides ./getting_started.yaml
+
+Look for instructions in `README.md` and `edit_this.py`.
+
+"""
+import time
+import inspect
+import numpy as np
+import pybullet as p
+
+from functools import partial
+from rich.tree import Tree
+from rich import print
+import sys
+sys.path.append("..")
+from safe_control_gym.utils.configuration import ConfigFactory
+from safe_control_gym.utils.registration import make
+from safe_control_gym.utils.utils import sync
+from safe_control_gym.envs.gym_pybullet_drones.Logger import Logger
+
+try:
+    from competition_utils import Command, thrusts
+    from edit_this import Controller
+except ImportError:
+    # Test import.
+    from .competition_utils import Command, thrusts
+    from .edit_this import Controller
+
+try:
+    import pycffirmware
+except ImportError:
+    FIRMWARE_INSTALLED = False
+else:
+    FIRMWARE_INSTALLED = True
+finally:
+    print("Module 'cffirmware' available:", FIRMWARE_INSTALLED)
+
+
+def run(test=False):
+    """The main function creating, running, and closing an environment over N episodes.
+
+    """
+
+    # Start a timer.
+    START = time.time()
+
+    # Load configuration.
+    CONFIG_FACTORY = ConfigFactory()
+    config = CONFIG_FACTORY.merge()
+
+    # Testing (without pycffirmware).
+    if test:
+        config['use_firmware'] = False
+        config['verbose'] = False
+        config.quadrotor_config['ctrl_freq'] = 60
+        config.quadrotor_config['pyb_freq'] = 240
+        config.quadrotor_config['gui'] = False
+
+    # Check firmware configuration.
+    if config.use_firmware and not FIRMWARE_INSTALLED:
+        raise RuntimeError("[ERROR] Module 'cffirmware' not installed.")
+    CTRL_FREQ = config.quadrotor_config['ctrl_freq']
+    CTRL_DT = 1/CTRL_FREQ
+
+    # Create environment.
+    if config.use_firmware:
+        FIRMWARE_FREQ = 500
+        assert(config.quadrotor_config['pyb_freq'] % FIRMWARE_FREQ == 0), "pyb_freq must be a multiple of firmware freq"
+        # The env.step is called at a firmware_freq rate, but this is not as intuitive to the end user, and so 
+        # we abstract the difference. This allows ctrl_freq to be the rate at which the user sends ctrl signals, 
+        # not the firmware. 
+        config.quadrotor_config['ctrl_freq'] = FIRMWARE_FREQ
+        env_func = partial(make, 'quadrotor', **config.quadrotor_config)
+        firmware_wrapper = make('firmware',
+                    env_func, FIRMWARE_FREQ, CTRL_FREQ
+                    ) 
+        obs, info = firmware_wrapper.reset()
+        info['ctrl_timestep'] = CTRL_DT
+        info['ctrl_freq'] = CTRL_FREQ
+        env = firmware_wrapper.env
+    else:
+        env = make('quadrotor', **config.quadrotor_config)
+        # Reset the environment, obtain the initial observations and info dictionary.
+        obs, info = env.reset()
+    
+    # Create controller.
+    vicon_obs = [obs[0], 0, obs[2], 0, obs[4], 0, obs[6], obs[7], obs[8], 0, 0, 0]
+        # obs = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r}.
+        # vicon_obs = {x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0}.
+    ctrl = Controller(vicon_obs, info, config.use_firmware, verbose=config.verbose)
+
+    # Create a logger and counters
+    logger = Logger(logging_freq_hz=CTRL_FREQ)
+    episodes_count = 1
+    cumulative_reward = 0
+    collisions_count = 0
+    collided_objects = set()
+    violations_count = 0
+    episode_start_iter = 0
+    time_label_id = p.addUserDebugText("", textPosition=[0, 0, 1],physicsClientId=env.PYB_CLIENT)
+    num_of_gates = len(config.quadrotor_config.gates)
+    stats = []
+
+    # Wait for keyboard input to start.
+    # input("Press any key to start")
+
+    # Initial printouts.
+    if config.verbose:
+        print('\tInitial observation [x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0]: ' + str(obs))
+        print('\tControl timestep: ' + str(info['ctrl_timestep']))
+        print('\tControl frequency: ' + str(info['ctrl_freq']))
+        print('\tMaximum episode duration: ' + str(info['episode_len_sec']))
+        print('\tNominal quadrotor mass and inertia: ' + str(info['nominal_physical_parameters']))
+        print('\tGates properties: ' + str(info['gate_dimensions']))
+        print('\tObstacles properties: ' + str(info['obstacle_dimensions']))
+        print('\tNominal gates positions [x, y, z, r, p, y, type]: ' + str(info['nominal_gates_pos_and_type']))
+        print('\tNominal obstacles positions [x, y, z, r, p, y]: ' + str(info['nominal_obstacles_pos']))
+        print('\tFinal target hover position [x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r]: ' + str(info['x_reference']))
+        print('\tDistribution of the error on the initial state: ' + str(info['initial_state_randomization']))
+        print('\tDistribution of the error on the inertial properties: ' + str(info['inertial_prop_randomization']))
+        print('\tDistribution of the error on positions of gates and obstacles: ' + str(info['gates_and_obs_randomization']))
+        print('\tDistribution of the disturbances: ' + str(info['disturbances']))
+        print('\tA priori symbolic model:')
+        print('\t\tState: ' + str(info['symbolic_model'].x_sym).strip('vertcat'))
+        print('\t\tInput: ' + str(info['symbolic_model'].u_sym).strip('vertcat'))
+        print('\t\tDynamics: ' + str(info['symbolic_model'].x_dot).strip('vertcat'))
+        print('Input constraints lower bounds: ' + str(env.constraints.input_constraints[0].lower_bounds))
+        print('Input constraints upper bounds: ' + str(env.constraints.input_constraints[0].upper_bounds))
+        print('State constraints active dimensions: ' + str(config.quadrotor_config.constraints[1].active_dims))
+        print('State constraints lower bounds: ' + str(env.constraints.state_constraints[0].lower_bounds))
+        print('State constraints upper bounds: ' + str(env.constraints.state_constraints[0].upper_bounds))
+        print('\tSymbolic constraints: ')
+        for fun in info['symbolic_constraints']:
+            print('\t' + str(inspect.getsource(fun)).strip('\n'))
+
+    # Run an experiment.
+    ep_start = time.time()
+    first_ep_iteration = True
+    for i in range(config.num_episodes*CTRL_FREQ*env.EPISODE_LEN_SEC):
+
+        # Step by keyboard input.
+        # _ = input("Press any key to continue")
+
+        # Elapsed sim time.
+        curr_time = (i-episode_start_iter)*CTRL_DT
+
+        # Print episode time in seconds on the GUI.
+        time_label_id = p.addUserDebugText("Ep. time: {:.2f}s".format(curr_time),
+                                           textPosition=[0, 0, 1.5],
+                                           textColorRGB=[1, 0, 0],
+                                           lifeTime=3*CTRL_DT,
+                                           textSize=1.5,
+                                           parentObjectUniqueId=0,
+                                           parentLinkIndex=-1,
+                                           replaceItemUniqueId=time_label_id,
+                                           physicsClientId=env.PYB_CLIENT)
+
+        # Compute control input.
+        if config.use_firmware:
+            vicon_obs = [obs[0], 0, obs[2], 0, obs[4], 0, obs[6], obs[7], obs[8], 0, 0, 0]
+                # obs = {x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p, q, r}.
+                # vicon_obs = {x, 0, y, 0, z, 0, phi, theta, psi, 0, 0, 0}.
+            if first_ep_iteration:
+                action = np.zeros(4)
+                reward = 0
+                done = False
+                info = {}
+                first_ep_iteration = False
+            command_type, args = ctrl.cmdFirmware(curr_time, vicon_obs, reward, done, info)
+
+            # Select interface.
+            if command_type == Command.FULLSTATE:
+                firmware_wrapper.sendFullStateCmd(*args, curr_time)
+            elif command_type == Command.TAKEOFF:
+                firmware_wrapper.sendTakeoffCmd(*args)
+            elif command_type == Command.LAND:
+                firmware_wrapper.sendLandCmd(*args)
+            elif command_type == Command.STOP:
+                firmware_wrapper.sendStopCmd()
+            elif command_type == Command.GOTO:
+                firmware_wrapper.sendGotoCmd(*args)
+            elif command_type == Command.NOTIFYSETPOINTSTOP:
+                firmware_wrapper.notifySetpointStop()
+            elif command_type == Command.NONE:
+                pass
+            else:
+                raise ValueError("[ERROR] Invalid command_type.")
+
+            # Step the environment.
+            obs, reward, done, info, action = firmware_wrapper.step(curr_time, action)
+        else:
+            if first_ep_iteration:
+                reward = 0
+                done = False
+                info = {}
+                first_ep_iteration = False
+            target_pos, target_vel = ctrl.cmdSimOnly(curr_time, obs, reward, done, info)
+            action = thrusts(ctrl.ctrl, ctrl.CTRL_TIMESTEP, ctrl.KF, obs, target_pos, target_vel)
+            obs, reward, done, info = env.step(action)
+
+        # Update the controller internal state and models.
+        ctrl.interStepLearn(action, obs, reward, done, info)
+
+        # Add up reward, collisions, violations.
+        cumulative_reward += reward
+        if info["collision"][1]:
+            collisions_count += 1
+            collided_objects.add(info["collision"][0])
+        if 'constraint_values' in info and info['constraint_violation'] == True:
+            violations_count += 1
+
+        # Printouts.
+        if config.verbose and i%int(CTRL_FREQ/2) == 0:
+            print('\n'+str(i)+'-th step.')
+            print('\tApplied action: ' + str(action))
+            print('\tObservation: ' + str(obs))
+            print('\tReward: ' + str(reward) + ' (Cumulative: ' + str(cumulative_reward) +')')
+            print('\tDone: ' + str(done))
+            print('\tCurrent target gate ID: ' + str(info['current_target_gate_id']))
+            print('\tCurrent target gate type: ' + str(info['current_target_gate_type']))
+            print('\tCurrent target gate in range: ' + str(info['current_target_gate_in_range']))
+            print('\tCurrent target gate position: ' + str(info['current_target_gate_pos']))
+            print('\tAt goal position: ' + str(info['at_goal_position']))
+            print('\tTask completed: ' + str(info['task_completed']))
+            if 'constraint_values' in info:
+                print('\tConstraints evaluations: ' + str(info['constraint_values']))
+                print('\tConstraints violation: ' + str(bool(info['constraint_violation'])))
+            print('\tCollision: ' + str(info["collision"]))
+            print('\tTotal collisions: ' + str(collisions_count))
+            print('\tCollided objects (history): ' + str(collided_objects))
+
+        # Log data.
+        pos = [obs[0],obs[2],obs[4]]
+        rpy = [obs[6],obs[7],obs[8]]
+        vel = [obs[1],obs[3],obs[5]]
+        bf_rates = [obs[9],obs[10],obs[11]]
+        # logger.log(drone=0,
+        #            timestamp=i/CTRL_FREQ,
+        #            state=np.hstack([pos, np.zeros(4), rpy, vel, bf_rates, np.sqrt(action/env.KF)])
+        #            )
+
+        # Synchronize the GUI.
+        if config.quadrotor_config.gui:
+            sync(i-episode_start_iter, ep_start, CTRL_DT)
+
+        # If an episode is complete, reset the environment.
+        if done:
+            
+            # Plot logging (comment as desired).
+            # if not test:
+            #     logger.plot(comment="get_start-episode-"+str(episodes_count), autoclose=True)
+
+            # # CSV save.
+            # logger.save_as_csv(comment="get_start-episode-"+str(episodes_count))
+
+            # Update the controller internal state and models.
+            ctrl.interEpisodeLearn()
+
+            # Append episode stats.
+            # if info['current_target_gate_id'] == -1:
+            #     gates_passed = num_of_gates
+            # else:
+            #     gates_passed = info['current_target_gate_id']
+            # if config.quadrotor_config.done_on_collision and info["collision"][1]:
+            #     termination = 'COLLISION'
+            # elif config.quadrotor_config.done_on_completion and info['task_completed']:
+            #     termination = 'TASK COMPLETION'
+            # elif config.quadrotor_config.done_on_violation and info['constraint_violation']:
+            #     termination = 'CONSTRAINT VIOLATION'
+            # else:
+            #     termination = 'MAX EPISODE DURATION'
+            # if ctrl.interstep_learning_occurrences != 0:
+            #     interstep_learning_avg = ctrl.interstep_learning_time/ctrl.interstep_learning_occurrences
+            # else:
+            #     interstep_learning_avg = ctrl.interstep_learning_time
+            # episode_stats = [
+            #     '[yellow]Flight time (s): '+str(curr_time),
+            #     '[yellow]Reason for termination: '+termination,
+            #     '[green]Gates passed: '+str(gates_passed),
+            #     '[green]Total reward: '+str(cumulative_reward),
+            #     '[red]Number of collisions: '+str(collisions_count),
+            #     '[red]Number of constraint violations: '+str(violations_count),
+            #     '[white]Total and average interstep learning time (s): '+str(ctrl.interstep_learning_time)+', '+str(interstep_learning_avg),
+            #     '[white]Interepisode learning time (s): '+str(ctrl.interepisode_learning_time),
+            #     ]
+            # stats.append(episode_stats)
+
+            # Create a new logger.
+            # logger = Logger(logging_freq_hz=CTRL_FREQ)
+
+            
+            # Reset/update counters.
+            episodes_count += 1
+            if episodes_count > config.num_episodes:
+                break
+            cumulative_reward = 0
+            collisions_count = 0
+            collided_objects = set()
+            violations_count = 0
+            ctrl.interEpisodeReset()
+
+            # Reset the environment.
+            if config.use_firmware:
+                # Re-initialize firmware.
+                new_initial_obs, _ = firmware_wrapper.reset()
+            else:
+                new_initial_obs, _ = env.reset()
+            first_ep_iteration = True
+
+            if config.verbose:
+                print(str(episodes_count)+'-th reset.')
+                print('Reset obs' + str(new_initial_obs))
+            
+            episode_start_iter = i+1
+            ep_start = time.time()
+
+    # Close the environment and print timing statistics.
+    env.close()
+    elapsed_sec = time.time() - START
+    print(str("\n{:d} iterations (@{:d}Hz) and {:d} episodes in {:.2f} sec, i.e. {:.2f} steps/sec for a {:.2f}x speedup.\n"
+          .format(i,
+                  env.CTRL_FREQ,
+                  config.num_episodes,
+                  elapsed_sec,
+                  i/elapsed_sec,
+                  (i*CTRL_DT)/elapsed_sec
+                  )
+          ))
+
+    # Print episodes summary.
+    tree = Tree("Summary")
+    for idx, ep in enumerate(stats):
+        ep_tree = tree.add('Episode ' + str(idx+1))
+        for val in ep:
+            ep_tree.add(val)
+    print('\n\n')
+    print(tree)
+    print('\n\n')
+
+if __name__ == "__main__":
+    run()
\ No newline at end of file
diff --git a/competition/level0.yaml b/competition/level0.yaml
index d2e910690..d536a2154 100644
--- a/competition/level0.yaml
+++ b/competition/level0.yaml
@@ -5,9 +5,9 @@
 # | `level0.yaml`       | **Yes**      | *No*                      | *No*                        | *No*                   | Perfect knowledge |
 
 # Configuration
-num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
+num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
 use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes)
-verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts
+verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts
 
 # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z
 # Thus, only scenarios that meet these constraints are suitable for sim2real experiments
@@ -23,9 +23,9 @@ quadrotor_config:
   pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq
   episode_len_sec: 33  # Maximum episode duration, in seconds
 
-  done_on_violation: True # Whether `done` becomes True when a constraint is violated
+  done_on_violation: False # Whether `done` becomes True when a constraint is violated
   done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal)
-  done_on_collision: True # Whether `done` becomes True when a collision happens
+  done_on_collision: False # Whether `done` becomes True when a collision happens
   cost: competition  # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward()
                      # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
   
@@ -50,14 +50,14 @@ quadrotor_config:
     init_r: 0
 
   # Nominal (pre-randomization) positions of gates and obstacles
-  gates:
+  gates: # high = 1 , low=0.525  yaw : 0 means 0°,1.57 means90°. edge is 0.45
     [  # x, y, z, r, p, y, type (0: `tall` obstacle, 1: `low` obstacle)
       [0.5, -2.5, 0, 0, 0, -1.57, 0],
       [2, -1.5, 0, 0, 0, 0, 1],
       [0, 0.2, 0, 0, 0, 1.57, 1],
       [-0.5, 1.5, 0, 0, 0, 0, 0]
     ]
-  obstacles:
+  obstacles:  # z default = 1.15
     [  # x, y, z, r, p, y
       [1.5, -2.5, 0, 0, 0, 0],
       [0.5, -1, 0, 0, 0, 0],
diff --git a/competition/level1.yaml b/competition/level1.yaml
index 30fb937c8..ad44d6ba3 100644
--- a/competition/level1.yaml
+++ b/competition/level1.yaml
@@ -5,9 +5,9 @@
 # | `level1.yaml`       | **Yes**      | **Yes**                   | *No*                        | *No**                  | Adaptive |
 
 # Configuration
-num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
+num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
 use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes)
-verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts
+verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts
 
 # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z
 # Thus, only scenarios that meet these constraints are suitable for sim2real experiments
@@ -23,9 +23,9 @@ quadrotor_config:
   pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq
   episode_len_sec: 33  # Maximum episode duration, in seconds
 
-  done_on_violation: True # Whether `done` becomes True when a constraint is violated
+  done_on_violation: False # Whether `done` becomes True when a constraint is violated
   done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal)
-  done_on_collision: True # Whether `done` becomes True when a collision happens
+  done_on_collision: False # Whether `done` becomes True when a collision happens
   cost: competition  # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward()
                      # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
   
diff --git a/competition/level2.yaml b/competition/level2.yaml
index c03d78447..8ed7375f6 100644
--- a/competition/level2.yaml
+++ b/competition/level2.yaml
@@ -5,9 +5,9 @@
 # | `level2.yaml`       | **Yes**      | **Yes**                   | **Yes**                     | *No*                   | Learning, re-planning |
 
 # Configuration
-num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
+num_episodes: 2000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
 use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes)
-verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts
+verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts
 
 # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z
 # Thus, only scenarios that meet these constraints are suitable for sim2real experiments
@@ -23,9 +23,9 @@ quadrotor_config:
   pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq
   episode_len_sec: 33  # Maximum episode duration, in seconds
 
-  done_on_violation: True # Whether `done` becomes True when a constraint is violated
+  done_on_violation: False # Whether `done` becomes True when a constraint is violated
   done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal)
-  done_on_collision: True # Whether `done` becomes True when a collision happens
+  done_on_collision: False # Whether `done` becomes True when a collision happens
   cost: competition  # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward()
                      # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
   
diff --git a/competition/level3.yaml b/competition/level3.yaml
index f897113eb..6c4579e26 100644
--- a/competition/level3.yaml
+++ b/competition/level3.yaml
@@ -5,9 +5,9 @@
 # | `level3.yaml`       | **Yes**      | **Yes**                   | **Yes**                     | **Yes**                | Robustness        |
 
 # Configuration
-num_episodes: 5 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
+num_episodes: 1000 # Customizable number of episodes (NOTE: solutions that require fewer episode will receive higher scores)
 use_firmware: True # Whether to use pycffirmware or not (NOTE: only set to False for debugging purposes)
-verbose: True # Boolean passed to the Controller class, can be used to turn on and off additional printouts
+verbose: False # Boolean passed to the Controller class, can be used to turn on and off additional printouts
 
 # Note: DSL's flyable/usable space in Toronto extends from -3 to +3 meters in x and y and 0 and 2 meters in z
 # Thus, only scenarios that meet these constraints are suitable for sim2real experiments
@@ -23,9 +23,9 @@ quadrotor_config:
   pyb_freq: 500 # If `use_firmware` == True, must be multiple of 500, otherwise it must be multiple of ctrl_freq
   episode_len_sec: 33  # Maximum episode duration, in seconds
 
-  done_on_violation: True # Whether `done` becomes True when a constraint is violated
+  done_on_violation: False # Whether `done` becomes True when a constraint is violated
   done_on_completion: True # Whether `done` becomes True when the task is completed (traveled all gates and 2'' hover at stabilization_goal)
-  done_on_collision: True # Whether `done` becomes True when a collision happens
+  done_on_collision: False # Whether `done` becomes True when a collision happens
   cost: competition  # Sparse reward function, if you desire, re-implement it in Quadrotor._get_reward()
                      # github.com/utiasDSL/safe-control-gym/blob/beta-iros-competition/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
   
diff --git a/competition/safetyplusplus_folder/log_utils.py b/competition/safetyplusplus_folder/log_utils.py
new file mode 100755
index 000000000..c62be32fa
--- /dev/null
+++ b/competition/safetyplusplus_folder/log_utils.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+__author__ = 'MICROYU'
+
+import json
+
+color2num = dict(
+    gray=30,
+    red=31,
+    green=32,
+    yellow=33,
+    blue=34,
+    magenta=35,
+    cyan=36,
+    white=37,
+    crimson=38
+)
+
+def colorize(string, color, bold=False, highlight=False):
+    """
+    Colorize a string.
+
+    This function was originally written by John Schulman.
+    """
+    attr = []
+    num = color2num[color]
+    if highlight: num += 10
+    attr.append(str(num))
+    if bold: attr.append('1')
+    return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
+
+def is_json_serializable(v):
+    try:
+        json.dumps(v)
+        return True
+    except:
+        return False
+
+def convert_json(obj):
+    """ Convert obj to a version which can be serialized with JSON. """
+    if is_json_serializable(obj):
+        return obj
+    else:
+        if isinstance(obj, dict):
+            return {convert_json(k): convert_json(v) 
+                    for k,v in obj.items()}
+
+        if isinstance(obj, tuple):
+            return (convert_json(x) for x in obj)
+
+        elif isinstance(obj, list):
+            return [convert_json(x) for x in obj]
+
+        elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
+            return convert_json(obj.__name__)
+
+        elif hasattr(obj,'__dict__') and obj.__dict__:
+            obj_dict = {convert_json(k): convert_json(v) 
+                        for k,v in obj.__dict__.items() if not str(k).startswith('_')}
+            return {'__dict__': obj_dict}
+
+        return str(obj)
+
+def save_config(exp_name, config, log_dir):
+    config_json = convert_json(config)
+    if exp_name is not None:
+        config_json['exp_name'] = exp_name
+    output = json.dumps(config_json, separators=(',',':\t'), indent=4, sort_keys=True)
+    print(colorize('Saving config:', color='cyan', bold=True))
+    print(colorize(output, color='cyan', bold=True))
+    with open(log_dir + "/config.json", 'w') as out:
+        out.write(output)
\ No newline at end of file
diff --git a/competition/safetyplusplus_folder/networks.py b/competition/safetyplusplus_folder/networks.py
new file mode 100755
index 000000000..bdd05d290
--- /dev/null
+++ b/competition/safetyplusplus_folder/networks.py
@@ -0,0 +1,166 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+num=256
+class Encoder(nn.Module):
+    def __init__(self, in_channels):
+        super(Encoder, self).__init__()
+        # (5,23,23)
+        self.conv1=nn.Conv2d(in_channels,16,3) # 
+        self.conv2=nn.Conv2d(16,32,3) # 
+    def forward(self,x):
+        # import pdb;pdb.set_trace()
+        in_size = x.size(0)
+        out = self.conv1(x)
+        out = F.relu(out)
+        out = F.max_pool2d(out, 2, 2) # (16,10,10)
+        out = self.conv2(out)
+        out = F.relu(out)
+        out = F.max_pool2d(out,2,2)
+        out = torch.mean(out,dim=1,keepdim=True)
+        out = out.view(in_size,-1) # 扁平化flat然后传入全连接层
+        return out
+
+
+class Actor(nn.Module):
+    def __init__(self, state_dim,in_channels,out_channels, action_dim, max_action):
+        super(Actor, self).__init__()
+
+        self.conv1=nn.Conv2d(in_channels,16,3) # 
+        self.conv2=nn.Conv2d(16,32,3) # 
+
+        self.l1 = nn.Linear(state_dim+out_channels, num)
+        self.l2 = nn.Linear(num, num)
+        self.l3 = nn.Linear(num, action_dim)
+        
+        self.max_action = max_action
+
+    
+    def forward(self, all_state):
+        global_state=all_state[0]
+        local_state=all_state[1]
+
+        
+        in_size = local_state.size(0)
+        out_local = self.conv1(local_state)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local, 2, 2) 
+        out_local = self.conv2(out_local)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local,2,2)
+        out_local = torch.mean(out_local,dim=1,keepdim=True)
+        out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层
+        
+        all_feature=torch.concat([global_state,out_local],1)
+
+        out = F.relu(self.l1(all_feature))
+        out = F.relu(self.l2(out))
+        return self.max_action * torch.tanh(self.l3(out))
+
+
+class C_Critic(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(C_Critic, self).__init__()
+
+        self.l1 = nn.Linear(state_dim + action_dim, num)
+        self.l2 = nn.Linear(num, num)
+        self.l3 = nn.Linear(num, 1)
+
+
+    def forward(self, state, action):
+        q = F.relu(self.l1(torch.cat([state, action], 1)))
+        q = F.relu(self.l2(q))
+        return self.l3(q)
+
+
+class Critic(nn.Module):
+    def __init__(self, state_dim,in_channels,out_channels, action_dim):
+        super(Critic, self).__init__()
+
+        # CNN
+        self.conv1=nn.Conv2d(in_channels,16,3) # 
+        self.conv2=nn.Conv2d(16,32,3) #
+        
+        # Q1 architecture
+        self.l1 = nn.Linear(state_dim + action_dim + out_channels, num)
+        self.l2 = nn.Linear(num, num)
+        self.l3 = nn.Linear(num, 1)
+
+        # Q2 architecture
+        self.l4 = nn.Linear(state_dim + action_dim + out_channels , num)
+        self.l5 = nn.Linear(num, num)
+        self.l6 = nn.Linear(num, 1)
+
+
+    def forward(self, all_state, action):
+        global_state=all_state[0]
+        local_state=all_state[1]
+        in_size = local_state.size(0)
+        out_local = self.conv1(local_state)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local, 2, 2) # (16,10,10)
+        out_local = self.conv2(out_local)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local,2,2)
+        out_local = torch.mean(out_local,dim=1,keepdim=True)
+        out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层
+        # import pdb;pdb.set_trace()
+        all_feature=torch.concat([global_state,out_local],1)
+
+        sa = torch.cat([all_feature, action], 1)
+
+        q1 = F.relu(self.l1(sa))
+        q1 = F.relu(self.l2(q1))
+        q1 = self.l3(q1)
+
+        q2 = F.relu(self.l4(sa))
+        q2 = F.relu(self.l5(q2))
+        q2 = self.l6(q2)
+        return q1, q2
+
+
+    def Q1(self, all_state, action):
+        global_state=all_state[0]
+        local_state=all_state[1]
+        in_size = local_state.size(0)
+        out_local = self.conv1(local_state)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local, 2, 2) # (16,10,10)
+        out_local = self.conv2(out_local)
+        out_local = F.relu(out_local)
+        out_local = F.max_pool2d(out_local,2,2)
+        out_local = torch.mean(out_local,dim=1,keepdim=True)
+        out_local = out_local.view(in_size,-1) # 扁平化flat然后传入全连接层
+        # import pdb;pdb.set_trace()
+        all_feature=torch.concat([global_state,out_local],1)
+
+        sa = torch.cat([all_feature, action], 1)
+
+        q1 = F.relu(self.l1(sa))
+        q1 = F.relu(self.l2(q1))
+        q1 = self.l3(q1)
+        return q1
+
+
+# predict state-wise \lamda(s)
+class MultiplerNet(nn.Module):
+    def __init__(self, state_dim):
+        super(MultiplerNet, self).__init__()
+
+        self.l1 = nn.Linear(state_dim, num)
+        self.l2 = nn.Linear(num, num)
+        self.l3 = nn.Linear(num, 1)
+
+        
+    def forward(self, state):
+        a = F.relu(self.l1(state))
+        a = F.relu(self.l2(a))
+        #return F.relu(self.l3(a))
+        return F.softplus(self.l3(a)) # lagrangian multipliers can not be negative
+
+# import numpy as np 
+# encoder=Encoder(5)
+# a=np.zeros([1,5,23,23])
+# encoder=encoder.float()
+# out=encoder(torch.tensor(a).float())
diff --git a/competition/safetyplusplus_folder/plus_logger.py b/competition/safetyplusplus_folder/plus_logger.py
new file mode 100755
index 000000000..cd8c99444
--- /dev/null
+++ b/competition/safetyplusplus_folder/plus_logger.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+__author__ = 'Linrui Zhang'
+
+import csv
+import os
+import json, time
+import numpy as np
+from safetyplusplus_folder import log_utils as lu
+
+class Logger():
+    def __init__(self, log_dir="./logs", exp_name=None, env_name=None, seed=0, config=None, filename='evaluator.csv', debug=False):
+        self.exp_name = exp_name
+        self.env_name = env_name
+        self.seed = seed
+        self.previous_log_time = time.time()
+        self.start_log_time = self.previous_log_time
+        self.debug = debug
+
+        if debug:
+            print(lu.colorize(f"\nDebug mode is activate !!!\nLog will NOT be saved !!!\n", 'red', bold=True))
+        else:
+            num_exps = 0
+            self.log_dir = f"./{log_dir}/{exp_name.replace('-', '_')}_{env_name.replace('-', '_')}-seed{seed}"
+            while True:
+                if os.path.exists(f"{self.log_dir}-{str(num_exps)}/"):
+                    num_exps += 1
+                else:
+                    self.log_dir += f"-{str(num_exps)}/"
+                    os.makedirs(self.log_dir)
+                    break
+            self.csv_file = open(self.log_dir + '/' + filename, 'w', encoding='utf8')
+            # header={"t_start": time.time(), 'env_id' : env_name, 'exp_name': exp_name, 'seed': seed}
+            # header = '# {} \n'.format(json.dumps(header))
+            # self.csv_file.write(header)
+            self.logger = csv.DictWriter(self.csv_file, fieldnames=('mean_score', 'total_steps', 'std_score', 'max_score', 'min_score'))
+            self.logger.writeheader()
+            self.csv_file.flush()
+
+            if config != None:
+                lu.save_config(exp_name, config, self.log_dir)
+
+
+    def update(self, score, total_steps):
+        '''
+            Score is a list
+        '''
+        current_log_time = time.time()
+        avg_score = np.mean(score)
+        std_score = np.std(score)
+        max_score = np.max(score)
+        min_score = np.min(score)
+
+        print(lu.colorize(f"\nTime: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Time spent from previous logger: {(current_log_time - self.previous_log_time):.3f} s", 'yellow', bold=True))
+        print(lu.colorize(f"Evaluation over {len(score)} episodes after {total_steps}:", 'yellow', bold=True))
+        print(lu.colorize(f"Avg: {avg_score:.3f} Std: {std_score:.3f} Max: {max_score:.3f} Min: {min_score:.3f}\n", 'yellow', bold=True))
+        self.previous_log_time = current_log_time
+        
+        if not self.debug:
+            epinfo = {"mean_score": avg_score, "total_steps": total_steps, "std_score": std_score, "max_score": max_score, "min_score": min_score}
+            self.logger.writerow(epinfo)
+            self.csv_file.flush()
+    
+
+class SafeLogger():
+    def __init__(self, log_dir="./logs", exp_name=None, env_name=None, seed=0,config=None, filename="logger.csv", fieldnames=[], debug=False):
+        self.exp_name = exp_name
+        self.env_name = env_name
+        self.seed = seed
+        self.previous_log_time = time.time()
+        self.start_log_time = self.previous_log_time
+        self.debug = debug
+        self.fieldnames = ["total_steps"] + fieldnames
+
+        if debug:
+            print(lu.colorize(f"\nDebug mode is activate !!!\nLog will NOT be saved !!!\n", 'red', bold=True))
+        else:
+            num_exps = 0
+            self.log_dir = f"./{log_dir}/{exp_name.replace('-', '_')}_{env_name.replace('-', '_')}-seed{seed}"
+            while True:
+                if os.path.exists(f"{self.log_dir}-{str(num_exps)}/"):
+                    num_exps += 1
+                else:
+                    self.log_dir += f"-{str(num_exps)}/"
+                    os.makedirs(self.log_dir)
+                    break     
+            self.csv_file = open(self.log_dir + '/' + filename, 'w', encoding='utf8')
+            # header={"t_start": time.time(), 'env_id' : env_name, 'exp_name': exp_name, 'seed': seed}
+            # header = '# {} \n'.format(json.dumps(header))
+            # self.csv_file.write(header)
+            self.logger = csv.DictWriter(self.csv_file, fieldnames=(self.fieldnames))
+            self.logger.writeheader()
+            self.csv_file.flush()
+
+            if config != None:
+                lu.save_config(exp_name, config, self.log_dir)
+
+
+    def update(self, fieldvalues, total_steps):
+        epinfo = {}
+        current_log_time = time.time()
+        fieldvalues = [total_steps] + [int(current_log_time - self.previous_log_time)]+ fieldvalues
+        print(lu.colorize(f"\nTime: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Time spent from previous logger: {(current_log_time - self.previous_log_time):.3f} s", 'blue', bold=True))
+        print(lu.colorize(f"CustomLogger with fileds: {self.fieldnames}", 'blue', bold=True))
+        print(lu.colorize(f"fieldvalues: {fieldvalues}\n", 'blue', bold=True))
+        self.previous_log_time = current_log_time
+       
+        if not self.debug:
+            for filedname, filedvalue in zip(self.fieldnames, fieldvalues):
+                epinfo.update({filedname: filedvalue})
+            self.logger.writerow(epinfo)
+            self.csv_file.flush()
+    
\ No newline at end of file
diff --git a/competition/safetyplusplus_folder/replay_buffer.py b/competition/safetyplusplus_folder/replay_buffer.py
new file mode 100755
index 000000000..59be13c59
--- /dev/null
+++ b/competition/safetyplusplus_folder/replay_buffer.py
@@ -0,0 +1,311 @@
+import numpy as np
+import torch
+
+
+class SimpleReplayBufferIros(object):
+    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
+        self.max_size = max_size
+        self.phrase=4
+        self.one_phrase_max_size= int(max_size/self.phrase)
+        self.ptr = [0,0,0,0]
+
+        self.size = [0,0,0,0]
+        
+        self.state = np.zeros(( self.phrase,self.one_phrase_max_size, state_dim))
+        self.action = np.zeros(( self.phrase,self.one_phrase_max_size, action_dim))
+        self.next_state = np.zeros(( self.phrase,self.one_phrase_max_size, state_dim))
+        self.reward = np.zeros(( self.phrase,self.one_phrase_max_size, 1))
+        self.not_done = np.zeros(( self.phrase,self.one_phrase_max_size, 1))
+
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+    def add(self,phrase, state, action, next_state, reward, done):
+        ptr=self.ptr[phrase]
+        self.state[phrase][ptr] = state
+        self.action[phrase][ptr] = action
+        self.next_state[phrase][ptr] = next_state
+        self.reward[phrase][ptr] = reward
+        self.not_done[phrase][ptr] = 1. - done
+
+        self.ptr[phrase] = (self.ptr[phrase] + 1) % self.one_phrase_max_size
+        self.size[phrase] = min(self.size[phrase] + 1, self.one_phrase_max_size)
+
+
+    def sample(self, batch_size):
+        # inverse sample  , give the end phrase the top priotity
+        size_3= 64 if self.size[3] > batch_size/4  else self.size[3]
+        size_2= 64 if self.size[2] > batch_size/4  else self.size[2]
+        size_1= 64 if self.size[1] > batch_size/4  else self.size[1]
+        size_0= batch_size -size_1 - size_2-size_3
+
+        ind0 = np.random.randint(0, self.size[0], size=size_0)
+        ind1 = np.random.randint(0, self.size[1], size=size_1)
+        ind2 = np.random.randint(0, self.size[2], size=size_2)
+        ind3 = np.random.randint(0, self.size[3], size=size_3)
+
+        if size_0 !=0:
+            state=self.state[0][ind0]
+            action=self.action[0][ind0]
+            next_state=self.next_state[0][ind0]
+            reward=self.reward[0][ind0]
+            not_done=self.not_done[0][ind0]
+        if size_1!=0:
+            state=np.vstack((state,self.state[1][ind1]))
+            action=np.vstack((action,self.action[1][ind1]))
+            next_state=np.vstack((next_state,self.next_state[1][ind1]))
+            reward=np.vstack((reward,self.reward[1][ind1]))
+            not_done=np.vstack((not_done,self.not_done[1][ind1]))
+        if size_2!=0:
+            state=np.vstack((state,self.state[2][ind2]))
+            action=np.vstack((action,self.action[2][ind2]))
+            next_state=np.vstack((next_state,self.next_state[2][ind2]))
+            reward=np.vstack((reward,self.reward[2][ind2]))
+            not_done=np.vstack((not_done,self.not_done[2][ind2]))
+        if size_3!=0:
+            state=np.vstack((state,self.state[3][ind3]))
+            action=np.vstack((action,self.action[3][ind3]))
+            next_state=np.vstack((next_state,self.next_state[3][ind3]))
+            reward=np.vstack((reward,self.reward[3][ind3]))
+            not_done=np.vstack((not_done,self.not_done[3][ind3])) 
+        return (
+            torch.FloatTensor(state).to(self.device),
+            torch.FloatTensor(action).to(self.device),
+            torch.FloatTensor(next_state).to(self.device),
+            torch.FloatTensor(reward).to(self.device),
+            torch.FloatTensor(not_done).to(self.device),
+        )
+
+class SimpleReplayBuffer(object):
+    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
+        self.max_size = max_size
+        self.ptr = 0
+        self.size = 0
+
+        self.state = np.zeros((max_size, state_dim))
+        self.action = np.zeros((max_size, action_dim))
+        self.next_state = np.zeros((max_size, state_dim))
+        self.reward = np.zeros((max_size, 1))
+        self.not_done = np.zeros((max_size, 1))
+
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+    def add(self, state, action, next_state, reward, done):
+        self.state[self.ptr] = state
+        self.action[self.ptr] = action
+        self.next_state[self.ptr] = next_state
+        self.reward[self.ptr] = reward
+        self.not_done[self.ptr] = 1. - done
+
+        self.ptr = (self.ptr + 1) % self.max_size
+        self.size = min(self.size + 1, self.max_size)
+
+
+    def sample(self, batch_size):
+        ind = np.random.randint(0, self.size, size=batch_size)
+        return (
+            torch.FloatTensor(self.state[ind]).to(self.device),
+            torch.FloatTensor(self.action[ind]).to(self.device),
+            torch.FloatTensor(self.next_state[ind]).to(self.device),
+            torch.FloatTensor(self.reward[ind]).to(self.device),
+            torch.FloatTensor(self.not_done[ind]).to(self.device),
+        )
+
+class IrosReplayBuffer(object):
+    def __init__(self, global_state_dim,local_state_shape ,action_dim, max_size=int(1e5),load=True):
+        self.max_size = max_size
+        
+        z=local_state_shape[0]
+        x=local_state_shape[1]
+        y=local_state_shape[2]
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.global_state = torch.zeros((max_size, global_state_dim)).to(self.device)
+        self.local_state = torch.zeros((max_size, z,x,y)).to(self.device)
+        self.action = torch.zeros((max_size, action_dim)).to(self.device)
+        self.next_global_state = torch.zeros((max_size, global_state_dim)).to(self.device)
+        self.next_local_state = torch.zeros((max_size, z,x,y)).to(self.device)
+        self.reward = torch.zeros((max_size, 1)).to(self.device)
+        self.not_done = torch.zeros((max_size, 1)).to(self.device)
+        self.load=load
+        self.pretrain_size=int(1e4)
+        if self.load :
+            self.load_buffer()
+            self.ptr = self.pretrain_size
+            self.size = self.pretrain_size
+        else:
+            self.ptr = 0
+            self.size = 0
+
+    def add(self, global_state,local_state, action, next_global_state,next_local_state, reward, done):
+        
+        self.global_state[self.ptr] = torch.FloatTensor(global_state)
+        self.local_state[self.ptr] = torch.FloatTensor(local_state)
+        self.action[self.ptr] = torch.FloatTensor(action) 
+        self.next_global_state[self.ptr] = torch.FloatTensor(next_global_state)
+        self.next_local_state[self.ptr] = torch.FloatTensor(next_local_state)
+        self.reward[self.ptr] = torch.FloatTensor(np.array([reward]))
+        self.not_done[self.ptr] = torch.FloatTensor(np.array([1. - done]))
+        if  self.load:
+            self.ptr = (self.ptr + 1) if self.ptr + 1 !=self.max_size else self.pretrain_size
+        else:
+            self.ptr = (self.ptr + 1) % self.max_size
+        self.size = min(self.size + 1, self.max_size)
+
+
+    def sample(self, batch_size):
+        ind = np.random.randint(0, self.size, size=batch_size)
+        # import pdb;pdb.set_trace()
+        # print(ind)
+        return (self.global_state[ind],self.local_state[ind],self.action[ind],self.next_global_state[ind],self.next_local_state[ind],self.reward[ind],self.not_done[ind])
+    
+    def write(self):
+        np.save("buffer_npy/global_state.npy",np.array(self.global_state[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/local_state.npy",np.array(self.local_state[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/action.npy",np.array(self.action[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/next_global_state.npy",np.array(self.next_global_state[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/next_local_state.npy",np.array(self.next_local_state[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/reward.npy",np.array(self.reward[0:self.pretrain_size].cpu()))
+        np.save("buffer_npy/not_done.npy",np.array(self.not_done[0:self.pretrain_size].cpu()))
+
+    def load_buffer(self):
+        expert_global_state=torch.FloatTensor(np.load("buffer_npy/global_state.npy"))
+        expert_local_state=torch.FloatTensor(np.load("buffer_npy/local_state.npy"))
+        expert_action=torch.FloatTensor(np.load("buffer_npy/action.npy"))
+        expert_next_global_state=torch.FloatTensor(np.load("buffer_npy/next_global_state.npy"))
+        expert_next_local_state=torch.FloatTensor(np.load("buffer_npy/next_local_state.npy"))
+        expert_reward=torch.FloatTensor(np.load("buffer_npy/reward.npy"))
+        expert_not_done=torch.FloatTensor(np.load("buffer_npy/not_done.npy"))
+        self.global_state[0:self.pretrain_size] = expert_global_state
+        self.local_state[0:self.pretrain_size] = expert_local_state
+        self.action[0:self.pretrain_size] = expert_action
+        self.next_global_state[0:self.pretrain_size] = expert_next_global_state
+        self.next_local_state[0:self.pretrain_size] = expert_next_local_state
+        self.reward[0:self.pretrain_size] = expert_reward
+        self.not_done[0:self.pretrain_size] = expert_not_done
+        print("load ok")
+    
+
+class CostReplayBuffer(object):
+    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
+        self.max_size = max_size
+        self.ptr = 0
+        self.size = 0
+
+        self.state = np.zeros((max_size, state_dim))
+        self.action = np.zeros((max_size, action_dim))
+        self.next_state = np.zeros((max_size, state_dim))
+        self.reward = np.zeros((max_size, 1))
+        self.cost = np.zeros((max_size, 1))
+        self.not_done = np.zeros((max_size, 1))
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+    def add(self, state, action, next_state, reward, cost, done):
+        self.state[self.ptr] = state
+        self.action[self.ptr] = action
+        self.next_state[self.ptr] = next_state
+        self.reward[self.ptr] = reward
+        self.cost[self.ptr] = cost
+        self.not_done[self.ptr] = 1. - done
+
+        self.ptr = (self.ptr + 1) % self.max_size
+        self.size = min(self.size + 1, self.max_size)
+
+
+    def sample(self, batch_size):
+        ind = np.random.randint(0, self.size, size=batch_size)
+        return (
+            torch.FloatTensor(self.state[ind]).to(self.device),
+            torch.FloatTensor(self.action[ind]).to(self.device),
+            torch.FloatTensor(self.next_state[ind]).to(self.device),
+            torch.FloatTensor(self.reward[ind]).to(self.device),
+            torch.FloatTensor(self.cost[ind]).to(self.device),
+            torch.FloatTensor(self.not_done[ind]).to(self.device),
+        )
+
+class RecReplayBuffer(object):
+    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
+        self.max_size = max_size
+        self.ptr = 0
+        self.size = 0
+
+        self.state = np.zeros((max_size, state_dim))
+        self.raw_action = np.zeros((max_size, action_dim))
+        self.rec_action = np.zeros((max_size, action_dim))
+        self.next_state = np.zeros((max_size, state_dim))
+        self.reward = np.zeros((max_size, 1))
+        self.cost = np.zeros((max_size, 1))
+        self.not_done = np.zeros((max_size, 1))
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+    def add(self, state, raw_action, rec_action, next_state, reward, cost, done):
+        self.state[self.ptr] = state
+        self.raw_action[self.ptr] = raw_action
+        self.rec_action[self.ptr] = rec_action
+        self.next_state[self.ptr] = next_state
+        self.reward[self.ptr] = reward
+        self.cost[self.ptr] = cost
+        self.not_done[self.ptr] = 1. - done
+
+        self.ptr = (self.ptr + 1) % self.max_size
+        self.size = min(self.size + 1, self.max_size)
+
+
+    def sample(self, batch_size):
+        ind = np.random.randint(0, self.size, size=batch_size)
+
+        return (
+            torch.FloatTensor(self.state[ind]).to(self.device),
+            torch.FloatTensor(self.raw_action[ind]).to(self.device),
+            torch.FloatTensor(self.rec_action[ind]).to(self.device),
+            torch.FloatTensor(self.next_state[ind]).to(self.device),
+            torch.FloatTensor(self.reward[ind]).to(self.device),
+            torch.FloatTensor(self.cost[ind]).to(self.device),
+            torch.FloatTensor(self.not_done[ind]).to(self.device),
+        )
+
+class SafetyLayerReplayBuffer(object):
+    def __init__(self, state_dim, action_dim, max_size=int(1e6)):
+        self.max_size = max_size
+        self.ptr = 0
+        self.size = 0
+
+        self.state = np.zeros((max_size, state_dim))
+        self.action = np.zeros((max_size, action_dim))
+        self.next_state = np.zeros((max_size, state_dim))
+        self.reward = np.zeros((max_size, 1))
+        self.cost = np.zeros((max_size, 1))
+        self.prev_cost = np.zeros((max_size, 1))
+        self.not_done = np.zeros((max_size, 1))
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+    def add(self, state, action, next_state, reward, cost, prev_cost,done):
+        self.state[self.ptr] = state
+        self.action[self.ptr] = action
+        self.next_state[self.ptr] = next_state
+        self.reward[self.ptr] = reward
+        self.cost[self.ptr] = cost
+        self.prev_cost[self.ptr] = prev_cost
+        self.not_done[self.ptr] = 1. - done
+
+        self.ptr = (self.ptr + 1) % self.max_size
+        self.size = min(self.size + 1, self.max_size)
+
+
+    def sample(self, batch_size):
+        ind = np.random.randint(0, self.size, size=batch_size)
+        return (
+            torch.FloatTensor(self.state[ind]).to(self.device),
+            torch.FloatTensor(self.action[ind]).to(self.device),
+            torch.FloatTensor(self.next_state[ind]).to(self.device),
+            torch.FloatTensor(self.reward[ind]).to(self.device),
+            torch.FloatTensor(self.cost[ind]).to(self.device),
+            torch.FloatTensor(self.prev_cost[ind]).to(self.device),
+            torch.FloatTensor(self.not_done[ind]).to(self.device),
+        )
\ No newline at end of file
diff --git a/competition/safetyplusplus_folder/safetyplusplus.py b/competition/safetyplusplus_folder/safetyplusplus.py
new file mode 100755
index 000000000..18d5e3094
--- /dev/null
+++ b/competition/safetyplusplus_folder/safetyplusplus.py
@@ -0,0 +1,225 @@
+import copy
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from safetyplusplus_folder.networks import C_Critic,Critic,Actor
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+class TD3Usl(object):
+    def __init__(
+        self,
+        state_dim,
+        action_dim,
+        max_action,
+        kappa,
+        delta,
+        eta=0.05,
+        K = 20,
+        rew_discount=0.99,
+        cost_discount=0.99,
+        tau=0.005,
+        policy_noise=0.2,
+        noise_clip=0.5,
+        policy_freq=2,
+        expl_noise = 0.1,
+    ):
+
+        self.actor = Actor(state_dim, action_dim, max_action).to(device)
+        self.actor_target = copy.deepcopy(self.actor)
+        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=3e-4)
+
+        self.critic = Critic(state_dim, action_dim).to(device)
+        self.critic_target = copy.deepcopy(self.critic)
+        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=3e-4)
+
+        self.action_dim = action_dim
+        self.max_action = max_action
+        self.rew_discount = rew_discount
+        self.cost_discount = cost_discount
+        self.tau = tau
+        self.policy_noise = policy_noise
+        self.noise_clip = noise_clip
+        self.policy_freq = policy_freq
+
+        self.expl_noise = expl_noise
+
+        self.total_it = 0
+
+        self.C_critic = C_Critic(state_dim, action_dim).to(device)
+        self.C_critic_target = copy.deepcopy(self.C_critic)
+        self.C_critic_optimizer = torch.optim.Adam(self.C_critic.parameters(), lr=3e-4)
+
+
+        self.eta = eta
+        self.K = K
+        self.kappa = kappa
+        self.delta = delta
+
+
+    def select_action(self, state,use_usl=False, usl_iter=20,exploration=False):
+        state = torch.FloatTensor(state.reshape(1, -1)).to(device)
+        action = self.actor(state).cpu().data.numpy().flatten()
+        if exploration:
+            noise = np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim)
+            action = (action + noise).clip(-self.max_action, self.max_action)
+        if use_usl:
+            action = self.safety_correction(state,action,eta=self.eta,Niter=usl_iter)
+        return action
+
+    def pred_cost(self, state, action):
+        state = torch.FloatTensor(state.reshape(1, -1)).to(device)
+        action = torch.FloatTensor(action.reshape(1, -1)).to(device)
+        return self.C_critic(state,action).item()
+    
+    def safety_correction(self,state,action,eta,Niter,verbose=False):
+        if not torch.is_tensor(state):
+            state = torch.tensor(state.reshape(1, -1),requires_grad=False).float().to(device)
+        if not torch.is_tensor(action):
+            action = torch.tensor(action.reshape(1, -1),requires_grad=True).float().to(device)
+        pred = self.C_critic_target(state,action)
+        if pred.item() <= self.delta:
+            return torch.clamp(action,-self.max_action,self.max_action).cpu().data.numpy().flatten()
+        else:
+            for i in range(Niter):
+                if max(np.abs(action.cpu().data.numpy().flatten())) > self.max_action:
+                    break
+                action.retain_grad()
+                self.C_critic_target.zero_grad()
+                pred = self.C_critic_target(state,action)
+                pred.backward(retain_graph=True)
+                if verbose and i % 5 == 0:
+                    print(f'a{i} = {action.cpu().data.numpy().flatten()}, C = {pred.item()}')
+                if pred.item() <= self.delta:
+                    break
+                Z = np.max(np.abs(action.grad.cpu().data.numpy().flatten()))
+                action = action - eta * action.grad / (Z + 1e-8)
+            #print(i,pred.item())
+            return torch.clamp(action,-self.max_action,self.max_action).cpu().data.numpy().flatten()
+
+    def train(self, replay_buffer, batch_size=256):
+        # Sample replay buffer 
+        state, action, next_state, reward, cost, not_done = replay_buffer.sample(batch_size)
+
+        # Compute the target C value
+        target_C = self.C_critic_target(next_state, self.actor_target(next_state))
+        target_C = cost + (not_done * self.cost_discount * target_C).detach()
+
+        # Get current C estimate
+        current_C = self.C_critic(state, action)
+
+        # Compute critic loss
+        C_critic_loss = F.mse_loss(current_C, target_C)
+
+        # Optimize the critic
+        self.C_critic_optimizer.zero_grad()
+        C_critic_loss.backward()
+        self.C_critic_optimizer.step()
+
+        with torch.no_grad():
+            # Select action according to policy and add clipped noise
+            noise = (
+                torch.randn_like(action) * self.policy_noise
+            ).clamp(-self.noise_clip, self.noise_clip)
+            
+            next_action = (
+                self.actor_target(next_state) + noise
+            ).clamp(-self.max_action, self.max_action)
+
+            # Compute the target Q value
+            target_Q1, target_Q2 = self.critic_target(next_state, next_action)
+            target_Q = torch.min(target_Q1, target_Q2)
+            target_Q = reward + not_done * self.rew_discount * target_Q
+
+        # Get current Q estimates
+        current_Q1, current_Q2 = self.critic(state, action)
+
+        # Compute critic loss
+        critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)
+        
+        # Optimize the critic
+        self.critic_optimizer.zero_grad()
+        critic_loss.backward()
+        self.critic_optimizer.step()
+
+
+        # Delayed policy updates
+        if self.total_it % self.policy_freq == 0:
+
+            # Compute actor loss
+            action = self.actor(state)
+            actor_loss = (
+                - self.critic.Q1(state, action) \
+                + self.kappa * F.relu(self.C_critic(state, action) - self.delta) \
+                ).mean()
+            
+            # Optimize the actor 
+            self.actor_optimizer.zero_grad()
+            actor_loss.backward()
+            self.actor_optimizer.step()
+
+            # Update the frozen target models
+            for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
+                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
+
+            for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
+                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
+        
+            for param, target_param in zip(self.C_critic.parameters(), self.C_critic_target.parameters()):
+                target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
+
+        
+        
+    def save(self, filename):
+        torch.save(self.critic.state_dict(), filename + "_critic")
+        torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer")
+
+        torch.save(self.C_critic.state_dict(), filename + "_C_critic")
+        torch.save(self.C_critic_optimizer.state_dict(), filename + "_C_critic_optimizer")
+        
+        torch.save(self.actor.state_dict(), filename + "_actor")
+        torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer")
+
+
+    def load(self, filename):
+        self.critic.load_state_dict(torch.load(filename + "_critic"))
+        self.critic_optimizer.load_state_dict(torch.load(filename + "_critic_optimizer"))
+        self.critic_target = copy.deepcopy(self.critic)
+
+        self.C_critic.load_state_dict(torch.load(filename + "_C_critic"))
+        self.C_critic_optimizer.load_state_dict(torch.load(filename + "_C_critic_optimizer"))
+        self.C_critic_target = copy.deepcopy(self.C_critic)
+
+        self.actor.load_state_dict(torch.load(filename + "_actor"))
+        self.actor_optimizer.load_state_dict(torch.load(filename + "_actor_optimizer"))
+        self.actor_target = copy.deepcopy(self.actor)
+
+
+# Runs policy for X episodes and returns average reward
+# A fixed seed is used for the eval environment
+def eval_policy(policy, eval_env, seed, flag, eval_episodes=5,use_usl=False,usl_iter=20):
+    avg_reward = 0.
+    avg_cost = 0.
+    for _ in range(eval_episodes):
+        if flag == 'constraint_violation':
+            reset_info, done = eval_env.reset(), False
+            state = reset_info[0]
+        else:
+            state, done = eval_env.reset(), False
+        while not done:
+            action = policy.select_action(np.array(state),use_usl=use_usl, usl_iter=usl_iter)
+            state, reward, done, info = eval_env.step(action)
+            avg_reward += reward
+            if info[flag]!=0:
+                avg_cost += 1
+
+    avg_reward /= eval_episodes
+    avg_cost /= eval_episodes
+
+    print("---------------------------------------")
+    print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f} Cost {avg_cost:.3f}.\
+            Use Unrolling Safety Layer : {use_usl}")
+    print("---------------------------------------")
+    return avg_reward,avg_cost
\ No newline at end of file
diff --git a/competition/safetyplusplus_folder/slam.py b/competition/safetyplusplus_folder/slam.py
new file mode 100644
index 000000000..e80a0cd74
--- /dev/null
+++ b/competition/safetyplusplus_folder/slam.py
@@ -0,0 +1,260 @@
+import numpy as np
+from matplotlib import pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import cv2
+import time
+
+class SLAM():
+    def __init__(self,gs=0.05) -> None:
+        # slam建图范围
+        self.X_MIN = -3.5
+        self.X_MAX = 3.5
+        self.Y_MIN = -3.5
+        self.Y_MAX = 3.5
+        self.Z_MIN = 0.0
+        self.Z_MAX = 2.0
+        # 栅格/元胞大小 grid shape = 5cm * 5cm * 5cm
+        self.gs = gs
+        # 障碍物初始位置
+        self.obstacles = [
+        [1.5, -2.5, 0, 0, 0, 0],
+        [0.5, -1, 0, 0, 0, 0],
+        [1.5, 0, 0, 0, 0, 0],
+        [-1, 0, 0, 0, 0, 0]
+        ]
+        # 障碍物高度
+        self.obstacle_height = 1.05
+        # 门的初始位置，用不上，info里有更精确的
+        self.gates = [
+        [0.5, -2.5, 0, 0, 0, -1.57, 0],
+        [2, -1.5, 0, 0, 0, 0, 1],
+        [0, 0.2, 0, 0, 0, 1.57, 1],
+        [-0.5, 1.5, 0, 0, 0, 0, 0]
+        ]
+        # 门的尺寸
+        self.gate_tall = 1.0
+        self.gate_low = 0.525
+        self.edge = 0.45/2
+
+    def reset_occ_map(self):
+        # reset the occupation map
+        self.occ_map = np.zeros((
+            round((self.Z_MAX -self.Z_MIN)/self.gs),
+            round((self.X_MAX -self.X_MIN)/self.gs),
+            round((self.Y_MAX -self.Y_MIN)/self.gs),
+        ),dtype=int)
+
+        # # 不用初始化门
+        # for gate in self.gates:
+        #     self._set_gate_occupied(gate[:-1],gate[-1])
+        
+        # 初始化障碍物
+        for obstacle in self.obstacles:
+            self._set_obstacle_occupied(obstacle)
+        
+        self.current_id = -1
+
+    def update_occ_map(self,info):
+        # env reset 时，info的信息里没有这项
+        # 另外，视野里没有时，无需更新
+        if not info.get('current_target_gate_in_range'):
+            return
+        else:
+            if info.get('current_target_gate_id') > self.current_id:
+                self.current_id = info.get('current_target_gate_id')
+                self._set_gate_occupied(info.get('current_target_gate_pos'),info.get('current_target_gate_type'))
+                self.plot_occ_map(str(self.current_id+1))
+                print(self.current_id)
+
+    def _set_pos_occupied(self,x,y,z):
+        # 设置绝对坐标(x,y,z)处为障碍占据
+        assert self.X_MIN <= x <= self.X_MAX  \
+                and self.Y_MIN <= y <= self.Y_MAX \
+                and self.Z_MIN <= z <= self.Z_MAX
+        x_idx = round((x-self.X_MIN)/self.gs)
+        y_idx = round((y-self.Y_MIN)/self.gs)
+        z_idx = round((z-self.Z_MIN)/self.gs)
+        self.occ_map[z_idx][x_idx][y_idx] = -1
+    
+    def _set_obstacle_occupied(self,obstacle):
+        # 设置(x,y)处的柱子为障碍占据
+        x,y,_,_,_,_, = obstacle
+        for x_ in np.arange(x-0.05,x+0.1,0.05):
+            for y_ in np.arange(y-0.05,y+0.1,0.05):
+                z = 0
+                while z <= self.obstacle_height:
+                    self._set_pos_occupied(x_,y_,z)
+                    z += self.gs
+    
+    # 这个地方不对，并不是只有0度和90度，门的角度yaw也会随机
+    # 目前只是简单的按照绝对值做了处理，
+    def _set_gate_occupied(self,gate,gate_type=0):
+        # 设置(x,y)处类型为type的门为障碍占据
+        x,y,_,_,_,direct = gate
+        height = self.gate_tall if gate_type == 0 else self.gate_low
+        z = 0
+        # 基座
+        while z <= height - self.edge-0.05:
+            self._set_pos_occupied(x,y,z)
+            z += self.gs
+        # 下门框
+        while z <= height - self.edge+0.05:
+            if abs(direct) <= 0.15:
+                for tmp_x in np.arange(x-self.edge,x+self.edge+self.gs,self.gs/2):
+                    self._set_pos_occupied(tmp_x,y,z)
+            else:
+                for tmp_y in np.arange(y-self.edge,y+self.edge+self.gs,self.gs/2):
+                    self._set_pos_occupied(x,tmp_y,z)
+            z += self.gs
+        # 左右门框
+        while z <= height + self.edge-0.05:
+            if abs(direct) <= 0.15:
+                self._set_pos_occupied(x-self.edge,y,z)
+                self._set_pos_occupied(x+self.edge,y,z)
+            else:
+                self._set_pos_occupied(x,y-self.edge,z)
+                self._set_pos_occupied(x,y+self.edge,z)
+            z += self.gs
+        # 上门框
+        while z <= height + self.edge+0.05:
+            if abs(direct) <= 0.15:
+                for tmp_x in np.arange(x-self.edge,x+self.edge+self.gs,self.gs/2):
+                    self._set_pos_occupied(tmp_x,y,z)
+            else:
+                for tmp_y in np.arange(y-self.edge,y+self.edge+self.gs,self.gs/2):
+                    self._set_pos_occupied(x,tmp_y,z)
+            z += self.gs
+        
+    # 绘制
+    def plot_occ_map(self,name='test'):
+        xs = []
+        ys = []
+        zs = []
+        C,W,H= self.occ_map.shape
+        for z in range(C):
+            for x in range(W):
+                for y in range(H):
+                    if self.occ_map[z][x][y] == -1 :
+                        xs.append(x*self.gs+self.X_MIN)
+                        ys.append(y*self.gs+self.Y_MIN)
+                        zs.append(z*self.gs+self.Z_MIN)
+            
+        ax = plt.subplot(111, projection='3d')  # 创建一个三维的绘图工程
+        #  将数据点分成三部分画，在颜色上有区分度
+        ax.scatter(xs, ys, zs, c='black')  # 绘制数据点
+        # plt.xlim((self.X_MIN, self.X_MAX))
+        # plt.ylim((self.Y_MIN, self.Y_MAX))
+        # plt.savefig(name+'.png')
+
+    # 根据所在高度，生成2d 障碍物图像
+    # TODO 出现问题，在刚开始训练的时候会超过约定的边界，
+    def generate_obs_img(self,obs,name='test',save=False):
+        x,y,z=obs[0],obs[2],obs[4]
+        x_idx = round((x-self.X_MIN)/self.gs)
+        y_idx = round((y-self.Y_MIN)/self.gs)
+        z_idx = round((z-self.Z_MIN)/self.gs)
+        if save:
+            occ_map_2d = self.occ_map[z_idx] * 255
+            try:
+                occ_map_2d[x_idx][y_idx] = 125
+            except:
+                print('out of region')
+            cv2.imwrite('obs/'+name+'.png',occ_map_2d)
+        z_max=len(self.occ_map) - 1 
+        return self.occ_map[z_idx] if z_idx<=z_max else self.occ_map[z_max]
+    
+    def generate_3obs_img(self,obs,target_vector,name='test',save=False):
+        
+        z_center=2
+        center=11
+        tall,height,width=[(2*z_center+1),(2*center+1),(2*center+1)]
+        # tall, height,width.  智能体是在最中间的位置
+        obs_img=np.zeros([(2*z_center+1),(2*center+1),(2*center+1)])
+        
+        x,y,z=obs[0],obs[2],obs[4]
+        x_idx = round((x-self.X_MIN)/self.gs)
+        y_idx = round((y-self.Y_MIN)/self.gs)
+        z_idx = round((z-self.Z_MIN)/self.gs)
+
+        z_low=z_idx-z_center
+        z_top=z_idx+z_center
+        z_max=len(self.occ_map) - 1 
+        # fill_z_idx=0
+        if z_low<0: # z_low<0 z_top>0
+            fill_z_idx=0
+            for z_index in range(z_low,0,1):
+                obs_img[fill_z_idx]=- np.ones([(2*center+1),(2*center+1)])
+                fill_z_idx+=1
+            for z_index in range(0,z_top+1,1):
+                if x_idx>=center and x_idx + center  <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1:
+                    for x in range(0,(2*center+1)):
+                        obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)]
+                    fill_z_idx+=1
+        elif z_top<=z_max:  # z_low>0,z_top <= z_max
+            fill_z_idx=0
+            if x_idx>=center and x_idx + center  <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1:
+                for z_index in range(z_low,z_top+1,1):
+                    for x in range(0,(2*center+1)):
+                        obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)]
+                    fill_z_idx+=1
+
+        else : # z_low>0,z_top > z_max
+            fill_z_idx=0
+            for z_index in range(z_low,z_max+1,1):
+                if x_idx>=center and x_idx + center <=len(self.occ_map[1])-1 and y_idx>=center and y_idx+center <=len(self.occ_map[1][1])-1:
+                    for x in range(0,(2*center+1)):
+                        obs_img[fill_z_idx][x]=self.occ_map[z_index][x_idx-center+x][y_idx-center:y_idx+(center+1)]
+                    fill_z_idx+=1
+            while fill_z_idx <2*z_center+1:
+                obs_img[fill_z_idx]=- np.ones([(2*center+1),(2*center+1)])
+                fill_z_idx+=1
+        
+        # fill target_vector way 2
+        if target_vector[2]>=0:
+            for _ in range(z_center,2*z_center+1):
+                if target_vector[0]>=0 :
+                    if  target_vector[1]>=0:
+                        obs_img[_][center:center*2+1,center:center*2+1]=np.where(obs_img[_][center:center*2+1,center:center*2+1]==-1,-1,0.5)
+                    else:
+                        obs_img[_][center:center*2+1,0:center+1]=np.where(obs_img[_][center:center*2+1,0:center+1]==-1,-1,0.5)
+                else:
+                    if target_vector[1]>=0:
+                        obs_img[_][0:center+1,center:center*2+1]=np.where(obs_img[_][0:center+1,center:center*2+1]==-1,-1,0.5)
+                    else:
+                        obs_img[_][0:center+1,0:center+1]=np.where(obs_img[_][0:center+1,0:center+1]==-1,-1,0.5)
+        else:
+            for _ in range(0,z_center+1):
+                if target_vector[0]>=0 :
+                    if  target_vector[1]>=0:
+                        obs_img[_][center:center*2+1,center:center*2+1]=np.where(obs_img[_][center:center*2+1,center:center*2+1]==-1,-1,0.5)
+                    else:
+                        obs_img[_][center:center*2+1,0:center+1]=np.where(obs_img[_][center:center*2+1,0:center+1]==-1,-1,0.5)
+                else:
+                    if target_vector[1]>=0:
+                        obs_img[_][0:center+1,center:center*2+1]=np.where(obs_img[_][0:center+1,center:center*2+1]==-1,-1,0.5)
+                    else:
+                        obs_img[_][0:center+1,0:center+1]=np.where(obs_img[_][0:center+1,0:center+1]==-1,-1,0.5)
+        obs_img[z_center-1:z_center+2,center-1:center+2,center-1:center+2]+=0.2
+        obs_img[z_center][center][center]+=0.2
+        # if save:
+        #     i=0
+        #     obs_img = (obs_img * 255 + 255)/2
+        #     for z_index in range(len(obs_img)) :
+        #         occ_map_2d = self.occ_map[z_idx-z_center + z_index if z_idx-z_center + z_index>=0 else 0] * 255 + 255
+        #         occ_map_2d [x_idx][y_idx] = 125
+        #         obs_img[z_index][center][center]=125
+        #         cv2.imwrite('obs2/'+str(i)+'.png',obs_img[z_index])
+        #         cv2.imwrite('obs2/'+str(i)+'_occ.png',occ_map_2d)
+        #         i+=1
+        #     import pdb;pdb.set_trace()
+
+        return obs_img
+
+# if __name__ == '__main__':
+#     m_slam = SLAM()
+#     m_slam.reset_occ_map()
+#     m_slam.plot_occ_map('test')
+    # for i in range(len(m_slam.occ_map)):
+    #     occ_map_2d = m_slam.occ_map[i] * 255
+    #     time.sleep(0.01)
+    #     cv2.imwrite('test/'+str(int(round(time.time() * 1000)))+'.png',occ_map_2d)
\ No newline at end of file
diff --git a/competition/safetyplusplus_folder/unconstrained.py b/competition/safetyplusplus_folder/unconstrained.py
new file mode 100755
index 000000000..56a2e0d2e
--- /dev/null
+++ b/competition/safetyplusplus_folder/unconstrained.py
@@ -0,0 +1,159 @@
+import copy
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from safetyplusplus_folder.networks import Critic,Actor ,Encoder
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+class TD3(object):
+    def __init__(
+        self,
+        state_dim,
+        action_dim,
+        max_action,
+        rew_discount=0.99,
+        tau=0.005,
+        policy_noise=0.2,
+        noise_clip=0.5,
+        policy_freq=2,
+        expl_noise = 0.1,
+    ):
+        local_feature_map_in_channels=5
+        example_encoder=Encoder(local_feature_map_in_channels)
+        a=np.zeros([1,local_feature_map_in_channels,23,23])
+        example_encoder=example_encoder.float()
+        self.local_feature_map_out_channels=example_encoder(torch.tensor(a).float()).shape[1]
+
+
+        self.actor = Actor(state_dim,local_feature_map_in_channels,self.local_feature_map_out_channels, action_dim, max_action).to(device)
+        self.actor_target = copy.deepcopy(self.actor)
+        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=3e-4)
+
+        self.critic = Critic(state_dim,local_feature_map_in_channels,self.local_feature_map_out_channels, action_dim).to(device)
+        self.critic_target = copy.deepcopy(self.critic)
+        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=3e-4)
+        torch.set_num_threads(10)
+        self.action_dim = action_dim
+        self.max_action = max_action
+        self.rew_discount = rew_discount
+        self.tau = tau
+        self.policy_noise = policy_noise
+        self.noise_clip = noise_clip
+        self.policy_freq = policy_freq
+
+        self.expl_noise = expl_noise
+        self.total_it = 0
+        
+
+
+    def select_action(self,all_state,exploration=False):
+        global_state=torch.FloatTensor(all_state[0]).unsqueeze(0).to(device)
+        local_state=torch.FloatTensor(all_state[1]).unsqueeze(0).to(device)
+        action = self.actor([global_state,local_state]).cpu().data.numpy().flatten()
+        if exploration:
+            noise = np.random.normal(0, self.max_action * self.expl_noise, size=self.action_dim)
+            action = (action + noise).clip(-self.max_action, self.max_action)
+        return action
+
+
+    def train(self, replay_buffer, batch_size=256,train_nums=200):
+        for _ in range(train_nums):
+            self.total_it+=1
+            # Sample replay buffer 
+            global_state,local_state, action, next_global_state,next_local_state, reward, not_done = replay_buffer.sample(batch_size)
+            # import pdb;pdb.set_trace()
+            with torch.no_grad():
+                # Select action according to policy and add clipped noise
+                noise = (
+                    torch.randn_like(action) * self.policy_noise
+                ).clamp(-self.noise_clip, self.noise_clip)
+                
+                next_action = (
+                    self.actor_target([next_global_state,next_local_state]) + noise
+                ).clamp(-self.max_action, self.max_action)
+
+                # Compute the target Q value
+                target_Q1, target_Q2 = self.critic_target([next_global_state,next_local_state], next_action)
+                target_Q = torch.min(target_Q1, target_Q2)
+                target_Q = reward + not_done * self.rew_discount * target_Q
+
+            # Get current Q estimates
+            current_Q1, current_Q2 = self.critic([global_state,local_state], action)
+
+            # Compute critic loss
+            critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)
+            
+            # Optimize the critic
+            self.critic_optimizer.zero_grad()
+            critic_loss.backward()
+            self.critic_optimizer.step()
+
+
+            # Delayed policy updates
+            if self.total_it % self.policy_freq == 0:
+
+                # Compute actor loss
+                action = self.actor([global_state,local_state])
+                actor_loss = - self.critic.Q1([global_state,local_state], action).mean()
+                
+                # Optimize the actor 
+                self.actor_optimizer.zero_grad()
+                actor_loss.backward()
+                self.actor_optimizer.step()
+
+                # Update the frozen target models
+                for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
+                    target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
+
+                for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
+                    target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
+            # import pdb;pdb.set_trace()
+            
+            
+            
+    def save(self, filename):
+        torch.save(self.critic.state_dict(), f"{filename}_critic")
+        torch.save(self.critic_optimizer.state_dict(), f"{filename}_critic_optimizer")
+ 
+        torch.save(self.actor.state_dict(), f"{filename}_actor")
+        torch.save(self.actor_optimizer.state_dict(), f"{filename}_actor_optimizer")
+
+
+    def load(self, filename):
+        self.critic.load_state_dict(torch.load(f"{filename}_critic",map_location=lambda storage, loc: storage.cuda(0)))
+        self.critic_optimizer.load_state_dict(torch.load(f"{filename}_critic_optimizer",map_location=lambda storage, loc: storage.cuda(0)))
+        self.critic_target = copy.deepcopy(self.critic)
+
+        self.actor.load_state_dict(torch.load(f"{filename}_actor",map_location=lambda storage, loc: storage.cuda(0)))
+        self.actor_optimizer.load_state_dict(torch.load(f"{filename}_actor_optimizer",map_location=lambda storage, loc: storage.cuda(0)))
+        self.actor_target = copy.deepcopy(self.actor)
+
+
+# Runs policy for X episodes and returns average reward
+# A fixed seed is used for the eval environment
+# def eval_policy(policy, eval_env, seed, flag, eval_episodes=5):
+#     avg_reward = 0.
+#     avg_cost = 0.
+#     for _ in range(eval_episodes):
+#         if flag == 'constraint_violation':
+#             reset_info, done = eval_env.reset(), False
+#             state = reset_info[0]
+#         else:
+#             state, done = eval_env.reset(), False
+#         while not done:
+#             action = policy.select_action(np.array(state))
+#             state, reward, done, info = eval_env.step(action)
+#             avg_reward += reward
+#             if info[flag]!=0:
+#                 avg_cost += 1
+
+#     avg_reward /= eval_episodes
+#     avg_cost /= eval_episodes
+
+#     print("---------------------------------------")
+#     print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f} Cost {avg_cost:.3f}.")
+#     print("---------------------------------------")
+#     return avg_reward,avg_cost
\ No newline at end of file
diff --git a/competition/submit_models/1016_01_1800_actor b/competition/submit_models/1016_01_1800_actor
new file mode 100644
index 000000000..b23a81f57
Binary files /dev/null and b/competition/submit_models/1016_01_1800_actor differ
diff --git a/competition/submit_models/1016_01_1800_actor_optimizer b/competition/submit_models/1016_01_1800_actor_optimizer
new file mode 100644
index 000000000..694cdace3
Binary files /dev/null and b/competition/submit_models/1016_01_1800_actor_optimizer differ
diff --git a/competition/submit_models/1016_01_1800_critic b/competition/submit_models/1016_01_1800_critic
new file mode 100644
index 000000000..c126acb3a
Binary files /dev/null and b/competition/submit_models/1016_01_1800_critic differ
diff --git a/competition/submit_models/1016_01_1800_critic_optimizer b/competition/submit_models/1016_01_1800_critic_optimizer
new file mode 100644
index 000000000..e909cce38
Binary files /dev/null and b/competition/submit_models/1016_01_1800_critic_optimizer differ
diff --git a/competition/utils.py b/competition/utils.py
new file mode 100644
index 000000000..6affe40bb
--- /dev/null
+++ b/competition/utils.py
@@ -0,0 +1,95 @@
+import numpy as np 
+
+
+
+def get_state(obs,info,NOMINAL_OBSTACLES,NOMINAL_GATES):
+    # state info : obs_info(3) + goal_info(3) + all_gate_info(1 + 16) + all_obstacle_info(12)     = 35
+    # x,y,z  3 
+    current_x=obs[0]
+    current_y=obs[2]
+    current_z=obs[4]
+
+    # obstacle info
+    # all_obstacles_pos=np.array(NOMINAL_OBSTACLES)[:,0:3]
+    # for one_obstacle_info in all_obstacles_pos:
+    #         one_obstacle_info[2] = 1.05  # quadrotor.py reset()
+    
+    # agent_pos = np.array([current_x, current_y, current_z])
+    # dis = [sum((i - agent_pos) ** 2) for i in all_obstacles_pos]
+    # visual_index = [i for i in range(len(dis)) if dis[i] < 0.75]
+    # visual_obstacles = obstacles[visual_index]
+
+    # obstacles_state = np.zeros(8)
+    # x_agent = agent_pos[0]
+    # y_agent = agent_pos[1]
+    # for obstacle in visual_obstacles:
+    #     x = obstacle[0]
+    #     y = obstacle[1]
+    #     delta_x = x - x_agent
+    #     delta_y = y - y_agent
+    #     if delta_y * delta_x == 0:
+    #         if delta_y == 0:
+    #             if delta_x >= 0:
+    #                 obstacles_state[2] = 1
+    #             else:
+    #                 obstacles_state[6] = 1
+    #         elif delta_x == 0:
+    #             if delta_y >= 0:
+    #                 obstacles_state[0] = 1
+    #             else:
+    #                 obstacles_state[4] = 1
+    #     else:
+    #         if abs(delta_x) / abs(delta_y) >= 1.71:  # gen hao 3
+    #             if delta_x > 0:
+    #                 obstacles_state[2] = 1
+    #             else:
+    #                 obstacles_state[6] = 1
+    #         elif abs(delta_x) / abs(delta_y) <= 1 / 1.71:  # gen hao 3
+    #             if delta_y > 0:
+    #                 obstacles_state[0] = 1
+    #             else:
+    #                 obstacles_state[4] = 1
+    #         else:
+    #             if delta_y * delta_x > 0:
+    #                 if delta_y > 0 and delta_x > 0:
+    #                     obstacles_state[1] = 1
+    #                 elif delta_y < 0 and delta_x < 0:
+    #                     obstacles_state[5] = 1
+    #             else:
+    #                 if delta_y > 0 and delta_x < 0:
+    #                     obstacles_state[7] = 1
+    #                 elif delta_y < 0 and delta_x > 0:
+    #                     obstacles_state[3] = 1
+
+
+    #   [0.5, -2.5, 0, 0, 0, -1.57, 0],[2, -1.5, 0, 0, 0, 0, 1],[0, 0.2, 0, 0, 0, 1.57, 1],[-0.5, 1.5, 0, 0, 0, 0, 0]
+    #-> [0.5,-2.5,1,-1.57 ,  2,-1.5,0.525,0  , 0,0.2,0.525,1.57,   -0.5,1.5,1,0 ]
+    # all_gates_pos=np.array(NOMINAL_GATES)
+    # for one_gate_info in all_gates_pos:
+    #     one_gate_info[2]=1 if one_gate_info[6] == 0 else 0.525
+    # all_gates_pos=all_gates_pos[:,[0,1,2,5]].flatten()
+
+    if info !={}:
+        # goal [x,y,z]
+        if info['current_target_gate_id'] == -1 :
+            current_goal_pos = goal_pos
+            current_target_gate_in_range= 0 
+        else :
+            # 1 or 0  1
+            current_target_gate_in_range= 1 if info['current_target_gate_in_range'] == True else 0
+            # [x,y,z,r] exactly if current_target_gate_in_range==1 else [x,y,z,y] noisy  (r,p is zero ,ignored )
+            current_target_gate_pos = info['current_target_gate_pos']
+            if current_target_gate_pos[2]==0: #  means that  current_target_gate_in_range is False, add default height.
+                current_target_gate_pos[2]=1 if info['current_target_gate_type'] == 0 else 0.525
+            current_target_gate_pos=np.array(current_target_gate_pos)[[0,1,2,5]]
+            current_goal_pos=current_target_gate_pos[:3]
+                
+    else :
+        current_target_gate_in_range= 0 
+        current_target_gate_pos = np.zeros(4)
+        current_goal_pos=np.zeros(3)
+    state=np.array([current_x,current_y,current_z,current_goal_pos[0]-current_x,current_goal_pos[1]-current_y,current_goal_pos[2]-current_z,current_target_gate_in_range,info['current_target_gate_id']])
+    # state=np.append(state,all_obstacles_pos)
+    # state=np.append(state,all_gates_pos)
+    # state=np.append(state,current_target_gate_pos)
+    return state
diff --git a/figures/terminal0.png b/figures/terminal0.png
deleted file mode 100644
index 5068ab3f5..000000000
Binary files a/figures/terminal0.png and /dev/null differ
diff --git a/figures/terminal1.png b/figures/terminal1.png
deleted file mode 100644
index 224da45de..000000000
Binary files a/figures/terminal1.png and /dev/null differ
diff --git a/poetry-core b/poetry-core
new file mode 160000
index 000000000..50f5fd3a7
--- /dev/null
+++ b/poetry-core
@@ -0,0 +1 @@
+Subproject commit 50f5fd3a79e40b9ecd6be8a19a283c5fa3f3eb10
diff --git a/pyproject.toml b/pyproject.toml
index c1a91f2b8..6f7b0c950 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,5 +31,5 @@ rich = "^12.5.1 "
 [tool.poetry.dev-dependencies]
 
 [build-system]
-requires = ["poetry-core @ git+https://github.com/python-poetry/poetry-core.git@master"]
+requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/safe_control_gym/controllers/firmware/firmware_wrapper.py b/safe_control_gym/controllers/firmware/firmware_wrapper.py
index 8de5ff7be..22102f33b 100644
--- a/safe_control_gym/controllers/firmware/firmware_wrapper.py
+++ b/safe_control_gym/controllers/firmware/firmware_wrapper.py
@@ -219,7 +219,8 @@ def step(self, sim_time, action):
         '''
         self._process_command_queue(sim_time)
         
-        
+        total_reward=0
+        break_violation_nums=0
         # Draws setpoint for debugging purposes 
         if self.verbose:
             if self.last_visualized_setpoint is not None:
@@ -230,11 +231,14 @@ def step(self, sim_time, action):
                     [self.setpoint.position.x, self.setpoint.position.y, self.setpoint.position.z],
                     p.getQuaternionFromEuler([0,0,0]),
                     physicsClientId=self.pyb_client)
-
+        i=1
+        # 500Hz 17-18
         while self.tick / self.firmware_freq < sim_time + self.ctrl_dt:
+            
             # Step the environment and print all returned information.
             obs, reward, done, info = self.env.step(action)
-            
+            total_reward+=reward
+            break_violation_nums += info['constraint_violation']
             # Get state values from pybullet
             cur_pos=np.array([obs[0], obs[2], obs[4]]) # global coord, m
             cur_vel=np.array([obs[1], obs[3], obs[5]]) # global coord, m/s
@@ -292,7 +296,9 @@ def step(self, sim_time, action):
                 done = True
 
             self.action = action 
-        return obs, reward, done, info, action
+            i+=1
+        # info['constraint_violation']=break_violation_nums
+        return obs, total_reward, done, info, action
 
 
     def _update_initial_state(self, obs):
@@ -494,6 +500,7 @@ def sendFullStateCmd(self, pos, vel, acc, yaw, rpy_rate, timestep):
             rpy_rate (list): roll, pitch, yaw rates (rad/s)
             timestep (float): simulation time when command is sent (s)
         """
+        
         self.command_queue += [['_sendFullStateCmd', [pos, vel, acc, yaw, rpy_rate, timestep]]]
 
 
@@ -631,7 +638,7 @@ def sendStopCmd(self):
         """
         self.command_queue += [['_sendStopCmd', []]]
     def _sendStopCmd(self):
-        print(f"INFO_{self.tick}: Stop command sent.")
+        # print(f"INFO_{self.tick}: Stop command sent.")
         firm.crtpCommanderHighLevelStop()
         self.full_state_cmd_override = False
         
@@ -647,7 +654,7 @@ def sendGotoCmd(self, pos, yaw, duration_s, relative):
         """
         self.command_queue += [['_sendGotoCmd', [pos, yaw, duration_s, relative]]]
     def _sendGotoCmd(self, pos, yaw, duration_s, relative):
-        print(f"INFO_{self.tick}: Go to command sent.")
+        # print(f"INFO_{self.tick}: Go to command sent.")
         firm.crtpCommanderHighLevelGoTo(*pos, yaw, duration_s, relative)
         self.full_state_cmd_override = False
 
diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
index 613472ef4..4962a8204 100644
--- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
+++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py
@@ -353,6 +353,10 @@ def __init__(self,
         else:
             self.DONE_ON_COMPLETION = False
 
+
+        ## 
+        self.last_state=None
+
     def reset(self):
         """(Re-)initializes the environment to start an episode.
 
diff --git a/safe_control_gym/utils/utils.py b/safe_control_gym/utils/utils.py
index 2dad183cb..9f45f7f90 100644
--- a/safe_control_gym/utils/utils.py
+++ b/safe_control_gym/utils/utils.py
@@ -163,7 +163,7 @@ def set_device_from_config(config):
 
     """
     use_cuda = config.use_gpu and torch.cuda.is_available()
-    config.device = "cuda" if use_cuda else "cpu"
+    config.device = "cuda:0" if use_cuda else "cpu"
 
 
 def save_video(name, frames, fps=20):