SainsburyWellcomeCentre · LukeHollingsworth · Jul 31, 2023 · Aug 9, 2023 · Aug 15, 2023 · Aug 18, 2023
diff --git a/README.md b/README.md
@@ -104,7 +104,7 @@ pip install NeuralPlayground==0.0.5
 
 If you want to contribute to the project, get the latest development version
 from GitHub, and install it in editable mode, including the "dev" dependencies:
-
+#### Unix (Linux and macOS)
 ```bash
 git clone https://github.com/SainsburyWellcomeCentre/NeuralPlayground/ --single-branch
 cd NeuralPlayground

diff --git a/examples/agent_examples/TEM_README.md → ...ples/agent_examples/WHITTINGTON_README.md b/examples/agent_examples/TEM_README.md → ...ples/agent_examples/WHITTINGTON_README.md
diff --git a/examples/agent_examples/custom_sim/run.log b/examples/agent_examples/custom_sim/run.log
diff --git a/examples/agent_examples/whittington_2020_example.ipynb b/examples/agent_examples/whittington_2020_example.ipynb
diff --git a/examples/agent_examples/whittington_2020_run.py b/examples/agent_examples/whittington_2020_run.py
@@ -14,7 +14,7 @@
 from neuralplayground.experiments import Sargolini2006Data
 
 simulation_id = "TEM_custom_sim"
-save_path = os.path.join(os.getcwd(), "results_sim")
+save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "results_sim")
 # save_path = os.path.join(os.getcwd(), "examples", "agent_examples", "trained_results")
 agent_class = Whittington2020
 env_class = BatchEnvironment
@@ -23,56 +23,56 @@
 params = parameters.parameters()
 full_agent_params = params.copy()
 
+# Set the x and y limits for the arena
 arena_x_limits = [
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
 ]
 arena_y_limits = [
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
-    [-4, 4],
-    [-5, 5],
-    [-6, 6],
-    [-5, 5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
+    [-2.5, 2.5],
 ]
 
-room_widths = [int(np.diff(arena_x_limits)[i]) for i in range(len(arena_x_limits))]
-room_depths = [int(np.diff(arena_y_limits)[i]) for i in range(len(arena_y_limits))]
-
+# Set parameters for the environment that generates observations
 discrete_env_params = {
     "environment_name": "DiscreteObject",
     "state_density": 1,
     "n_objects": params["n_x"],
-    "agent_step_size": 1,
+    "agent_step_size": 1,  # Note: this must be 1 / state density
     "use_behavioural_data": False,
     "data_path": None,
     "experiment_class": Sargolini2006Data,
 }
 
+# Set parameters for the batch environment
 env_params = {
     "environment_name": "BatchEnvironment",
     "batch_size": 16,
@@ -81,19 +81,33 @@
     "env_class": DiscreteObjectEnvironment,
     "arg_env_params": discrete_env_params,
 }
+
+# If behavioural data are used, set arena limits to those from Sargolini et al. 2006, reduce state density to 1/4
+state_densities = [discrete_env_params["state_density"] for _ in range(env_params["batch_size"])]
+if discrete_env_params["use_behavioural_data"]:
+    arena_x_limits = [[-50, 50] for _ in range(env_params["batch_size"])]
+    arena_y_limits = [[-50, 50] for _ in range(env_params["batch_size"])]
+    state_densities = [0.25] * env_params["batch_size"]
+
+room_widths = [int(np.diff(arena_x_limits)[i]) for i in range(env_params["batch_size"])]
+room_depths = [int(np.diff(arena_y_limits)[i]) for i in range(env_params["batch_size"])]
+
+# Set parameters for the agent
 agent_params = {
     "model_name": "Whittington2020",
+    "save_name": str(simulation_id)[4:],
     "params": full_agent_params,
     "batch_size": env_params["batch_size"],
     "room_widths": room_widths,
     "room_depths": room_depths,
-    "state_densities": [discrete_env_params["state_density"]] * env_params["batch_size"],
-    "use_behavioural_data": False,
+    "state_densities": state_densities,
+    "use_behavioural_data": discrete_env_params["use_behavioural_data"],
 }
 
 # Full model training consists of 20000 episodes
-training_loop_params = {"n_episode": 10, "params": full_agent_params}
+training_loop_params = {"n_episode": 5000, "params": full_agent_params, "random_state": False, "custom_state": [0.0, 0.0]}
 
+# Create the training simulation object
 sim = SingleSim(
     simulation_id=simulation_id,
     agent_class=agent_class,
@@ -104,7 +118,7 @@
     training_loop_params=training_loop_params,
 )
 
-# print(sim)
+# Run the simulation
 print("Running sim...")
 sim.run_sim(save_path)
 print("Sim finished.")
diff --git a/examples/arena_examples/arena_examples.ipynb b/examples/arena_examples/arena_examples.ipynb
diff --git a/examples/comparisons_examples/comparison_board.ipynb b/examples/comparisons_examples/comparison_board.ipynb
@@ -783,7 +783,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.10.12 ('NPG-env')",
    "language": "python",
    "name": "python3"
   },

diff --git a/neuralplayground/agents/__init__.py b/neuralplayground/agents/__init__.py
@@ -2,5 +2,6 @@
 from .agent_core import AgentCore, RandomAgent, LevyFlightAgent, RatMovementAgent
 from .stachenfeld_2018 import Stachenfeld2018
 from .weber_2018 import Weber2018
+from .discrete_agent import SimpleDiscreteAgent
 
 # from .whittington_2020 import Whittington2020
diff --git a/neuralplayground/agents/agent_core.py b/neuralplayground/agents/agent_core.py
@@ -81,7 +81,7 @@ def act(self, obs, policy_func=None):
             action = np.random.normal(scale=self.agent_step_size, size=(2,))
 
         self.obs_history.append(obs)
-        if len(self.obs_history) >= 1000:  # reset every 1000
+        if len(self.obs_history) >= 1000:  # max length 1000
             self.obs_history.pop(0)
         if policy_func is not None:
             return policy_func(obs)

diff --git a/neuralplayground/agents/discrete_agent.py b/neuralplayground/agents/discrete_agent.py
@@ -0,0 +1,113 @@
+import numpy as np
+
+from .agent_core import AgentCore
+
+
+class SimpleDiscreteAgent(AgentCore):
+    """
+    A simplified single-environment discrete agent, loosely mirroring TEM’s
+    approach to picking actions and checking whether the environment
+    actually moved.
+    """
+
+    def __init__(
+        self,
+        agent_name: str = "SimpleDiscreteAgent",
+        **model_kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        room_width : int
+            Width (in discrete states) of the environment
+        room_depth : int
+            Depth (in discrete states) of the environment
+        state_density : float
+            Number of discrete states per unit distance (usually 1 / step_size)
+        agent_name : str
+            Agent's name
+        """
+        super().__init__(agent_name=agent_name)
+        self.room_width = model_kwargs["room_width"]
+        self.room_depth = model_kwargs["room_depth"]
+        self.state_density = model_kwargs["state_density"]
+        # Discrete actions: stay, up, down, right, left
+        self.poss_actions = [[0, 0], [0, 1], [0, -1], [1, 0], [-1, 0]]
+
+        # For storing trajectory
+        self.walk_actions = []
+        self.obs_history = []
+
+        # Keep track of previous observation/action so we know if the environment actually moved.
+        self.prev_observation = None
+        self.prev_action = [0, 0]
+        self.n_walk = 0
+
+    def reset(self):
+        """
+        Reset the agent’s history and counters.
+        """
+        super().reset()
+        self.walk_actions = []
+        self.obs_history = []
+        self.prev_observation = None
+        self.prev_action = [0, 0]
+        self.n_walk = 0
+
+    def act(self, observation, policy_func=None):
+        """
+        Decide on the next action. If the environment did not change state
+        (i.e. we got the same position as before, and we tried to move),
+        then pick a new random action. Otherwise, record the old observation and action.
+
+        Parameters
+        ----------
+        observation : list or np.ndarray
+            Typically [state_index, object_info, (x,y)] for a discrete environment.
+            The first element (observation[0]) is the discrete state index.
+
+        policy_func : callable, optional
+            Unused here. Included only for compatibility.
+
+        Returns
+        -------
+        action : list
+            Chosen discrete action [dx, dy]
+        """
+        # If this is our first time calling act, initialise
+        if self.prev_observation is None:
+            self.prev_observation = observation
+            self.prev_action = self.action_policy()
+            return self.prev_action
+
+        # Check if environment actually moved to a new state
+        curr_state_idx = observation[0]
+        prev_state_idx = self.prev_observation[0]
+
+        if curr_state_idx == prev_state_idx and self.prev_action != [0, 0]:
+            # The environment didn't move from last action, so pick a new random action
+            new_action = self.action_policy()
+        else:
+            # The environment did move, so record old obs/action before picking the next action
+            self.walk_actions.append(self.prev_action)
+            self.obs_history.append(self.prev_observation)
+            self.n_walk += 1
+            new_action = self.action_policy()
+
+        self.prev_observation = observation
+        self.prev_action = new_action
+        return new_action
+
+    def action_policy(self):
+        """
+        Random action policy that selects an action from [stay, up, down, right, left].
+        """
+        idx = np.random.choice(len(self.poss_actions))
+        return self.poss_actions[idx]
+
+    def update(self):
+        """
+        Update the agent's internal state after a walk is completed.
+        """
+        self.n_walk = 0
+        return None