test.yaml

# Air Hockey Table Parameters
air_hockey:
  simulator_params:
    num_paddles: 1 # How would we add multiple paddles, do I need to change # of robots?
    num_pucks: 1 # More pucks needed? Need to figure out where puck reset happens
    num_blocks: 0 # Who?
    num_obstacles: 0 # Who?
    num_targets: 0 # Who?
    absorb_target: false # Why?

    # 57: <geom name="surface" type="box" material="white" size="length width 0.0505" pos="0 0 -0.0505"
    length: 1.064
    width: 0.609
    # 102: <geom pos="0 0 -0.2" name="puck" type="cylinder" material="red" size="puck_radius 0.009" condim="4" priority="0" group="1"/>
    puck_radius: 0.03175
    # round_gripper.py: ask michael, this is likely an issue
    paddle_radius: 0.0508
    block_width: 0.0254 # who?
    force_scaling: 1000 # what?
    paddle_damping: 3 # uh
    #97: <joint name="puck_x" type="slide" axis="1 0 0" damping="0.01" limited="false"/>
    #98: <joint name="puck_y" type="slide" axis="0 1 0" damping="0.01" limited="false"/>
    puck_damping: 0.08
    paddle_density: 2500 # HUH
    puck_density: 250 # WHAT NOW
    render_size: 360 # how customize this
    gravity: -0.5 # hmmm not sure where this is configured
    max_force_timestep: 100 # max force we can apply at one timestep
    render_size: 360 # smh repeated smh

  simulator: box2d # or robosuite
  max_timesteps: 300
  # reward_type: 'goal_position_velocity'
  task: 'puck_touch'
  goal_max_x_velocity: 1 # min is -goal_max_x_velocity
  goal_min_y_velocity: 1
  goal_max_y_velocity: 5

  terminate_on_out_of_bounds: true
  terminate_on_enemy_goal: true
  terminate_on_puck_stop: false
  truncate_rew: -1
  wall_bumping_rew: -1
  direction_change_rew: -0.05
  horizontal_vel_rew: -0.1
  diagonal_motion_rew: -0.1
  stand_still_rew: 0.01

# Training Parameters
n_training_steps: 1000000
model_save_filepath: model # will be saved same dir as tb_log_dir
vec_normalize_save_filepath: vec_normalize.pkl
tb_log_dir: baseline_models
tb_log_name: air_hockey_agent
gamma: 0.99
seed: [0, 1, 2, 3, 4]
n_threads: 1
algorithm: ppo

n_eval_eps: 30
eval_freq: 5000

# this parameter is only used when evaluating demonstrations
print_reward: false

# None of the below are integrated, but examples of what could be added to the config file
# num_envs: 16
# num_steps: 2048
# num_epochs: 10
# num_minibatches: 32
# learning_rate: 3e-4
# gamma: 0.99
# gae_lambda: 0.95
# clip_range: 0.2
# vf_coef: 0.5
# ent_coef: 0.01
# max_grad_norm: 0.5
# value_clip: true
# log_interval: 10
# save_interval: 100