riiswa · db7894 · May 4, 2024 · May 4, 2024 · May 5, 2024
diff --git a/cartpole_results_reinforce.png b/cartpole_results_reinforce.png
diff --git a/config-reinforce.yaml b/config-reinforce.yaml
@@ -0,0 +1,17 @@
+defaults:
+  - override hydra/launcher: joblib
+env_id: CartPole-v1
+batch_size: 256
+n_episodes: 500
+warm_up_episodes: 50
+episode_discount: 0.05
+gamma: 0.99
+train_steps: 5
+target_update_freq: 10
+learning_rate: 1e-2
+width: 8
+grid: 5
+method: "KAN"
+seed: 0
+log_interval: 10
+rtg: False
diff --git a/efficient_kan.py b/efficient_kan.py
@@ -0,0 +1,272 @@
+# From https://github.com/Blealtan/efficient-kan/tree/master
+
+import torch
+import torch.nn.functional as F
+
+
+class KANLinear(torch.nn.Module):
+    def __init__(
+        self,
+        in_features,
+        out_features,
+        grid_size=5,
+        spline_order=3,
+        scale_noise=0.1,
+        scale_base=1.0,
+        scale_spline=1.0,
+        enable_standalone_scale_spline=True,
+        base_activation=torch.nn.SiLU,
+        grid_eps=0.02,
+        grid_range=[-1, 1],
+    ):
+        super(KANLinear, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.grid_size = grid_size
+        self.spline_order = spline_order
+
+        h = (grid_range[1] - grid_range[0]) / grid_size
+        grid = (
+            (
+                torch.arange(-spline_order, grid_size + spline_order + 1) * h
+                + grid_range[0]
+            )
+            .expand(in_features, -1)
+            .contiguous()
+        )
+        self.register_buffer("grid", grid)
+
+        self.base_weight = torch.nn.Parameter(torch.Tensor(out_features, in_features))
+        self.spline_weight = torch.nn.Parameter(
+            torch.Tensor(out_features, in_features, grid_size + spline_order)
+        )
+        if enable_standalone_scale_spline:
+            self.spline_scaler = torch.nn.Parameter(
+                torch.Tensor(out_features, in_features)
+            )
+
+        self.scale_noise = scale_noise
+        self.scale_base = scale_base
+        self.scale_spline = scale_spline
+        self.enable_standalone_scale_spline = enable_standalone_scale_spline
+        self.base_activation = base_activation()
+        self.grid_eps = grid_eps
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        torch.nn.init.constant_(self.base_weight, self.scale_base)
+        with torch.no_grad():
+            noise = (
+                (
+                    torch.rand(self.grid_size + 1, self.in_features, self.out_features)
+                    - 1 / 2
+                )
+                * self.scale_noise
+                / self.grid_size
+            )
+            self.spline_weight.data.copy_(
+                (self.scale_spline if not self.enable_standalone_scale_spline else 1.0)
+                * self.curve2coeff(
+                    self.grid.T[self.spline_order : -self.spline_order],
+                    noise,
+                )
+            )
+            if self.enable_standalone_scale_spline:
+                torch.nn.init.constant_(self.spline_scaler, self.scale_spline)
+
+    def b_splines(self, x: torch.Tensor):
+        """
+        Compute the B-spline bases for the given input tensor.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape (batch_size, in_features).
+
+        Returns:
+            torch.Tensor: B-spline bases tensor of shape (batch_size, in_features, grid_size + spline_order).
+        """
+        assert x.dim() == 2 and x.size(1) == self.in_features
+
+        grid: torch.Tensor = (
+            self.grid
+        )  # (in_features, grid_size + 2 * spline_order + 1)
+        x = x.unsqueeze(-1)
+        bases = ((x >= grid[:, :-1]) & (x < grid[:, 1:])).to(x.dtype)
+        for k in range(1, self.spline_order + 1):
+            bases = (
+                (x - grid[:, : -(k + 1)])
+                / (grid[:, k:-1] - grid[:, : -(k + 1)])
+                * bases[:, :, :-1]
+            ) + (
+                (grid[:, k + 1 :] - x)
+                / (grid[:, k + 1 :] - grid[:, 1:(-k)])
+                * bases[:, :, 1:]
+            )
+
+        assert bases.size() == (
+            x.size(0),
+            self.in_features,
+            self.grid_size + self.spline_order,
+        )
+        return bases.contiguous()
+
+    def curve2coeff(self, x: torch.Tensor, y: torch.Tensor):
+        """
+        Compute the coefficients of the curve that interpolates the given points.
+
+        Args:
+            x (torch.Tensor): Input tensor of shape (batch_size, in_features).
+            y (torch.Tensor): Output tensor of shape (batch_size, in_features, out_features).
+
+        Returns:
+            torch.Tensor: Coefficients tensor of shape (out_features, in_features, grid_size + spline_order).
+        """
+        assert x.dim() == 2 and x.size(1) == self.in_features
+        assert y.size() == (x.size(0), self.in_features, self.out_features)
+
+        A = self.b_splines(x).transpose(
+            0, 1
+        )  # (in_features, batch_size, grid_size + spline_order)
+        B = y.transpose(0, 1)  # (in_features, batch_size, out_features)
+        solution = torch.linalg.lstsq(
+            A, B
+        ).solution  # (in_features, grid_size + spline_order, out_features)
+        result = solution.permute(
+            2, 0, 1
+        )  # (out_features, in_features, grid_size + spline_order)
+
+        assert result.size() == (
+            self.out_features,
+            self.in_features,
+            self.grid_size + self.spline_order,
+        )
+        return result.contiguous()
+
+    def forward(self, x: torch.Tensor):
+        assert x.dim() == 2 and x.size(1) == self.in_features
+
+        base_output = F.linear(self.base_activation(x), self.base_weight)
+        spline_output = F.linear(
+            self.b_splines(x).view(x.size(0), -1),
+            self.spline_weight.view(self.out_features, -1),
+        )
+        return base_output + spline_output
+
+    @torch.no_grad()
+    def update_grid(self, x: torch.Tensor, margin=0.01):
+        assert x.dim() == 2 and x.size(1) == self.in_features
+        batch = x.size(0)
+
+        splines = self.b_splines(x)  # (batch, in, coeff)
+        splines = splines.permute(1, 0, 2)  # (in, batch, coeff)
+        orig_coeff = self.spline_weight  # (out, in, coeff)
+        orig_coeff = orig_coeff.permute(1, 2, 0)  # (in, coeff, out)
+        unreduced_spline_output = torch.bmm(splines, orig_coeff)  # (in, batch, out)
+        unreduced_spline_output = unreduced_spline_output.permute(
+            1, 0, 2
+        )  # (batch, in, out)
+
+        # sort each channel individually to collect data distribution
+        x_sorted = torch.sort(x, dim=0)[0]
+        grid_adaptive = x_sorted[
+            torch.linspace(
+                0, batch - 1, self.grid_size + 1, dtype=torch.int64, device=x.device
+            )
+        ]
+
+        uniform_step = (x_sorted[-1] - x_sorted[0] + 2 * margin) / self.grid_size
+        grid_uniform = (
+            torch.arange(
+                self.grid_size + 1, dtype=torch.float32, device=x.device
+            ).unsqueeze(1)
+            * uniform_step
+            + x_sorted[0]
+            - margin
+        )
+
+        grid = self.grid_eps * grid_uniform + (1 - self.grid_eps) * grid_adaptive
+        grid = torch.concatenate(
+            [
+                grid[:1]
+                - uniform_step
+                * torch.arange(self.spline_order, 0, -1, device=x.device).unsqueeze(1),
+                grid,
+                grid[-1:]
+                + uniform_step
+                * torch.arange(1, self.spline_order + 1, device=x.device).unsqueeze(1),
+            ],
+            dim=0,
+        )
+
+        self.grid.copy_(grid.T)
+        self.spline_weight.data.copy_(self.curve2coeff(x, unreduced_spline_output))
+
+    def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
+        """
+        Compute the regularization loss.
+
+        This is a dumb simulation of the original L1 regularization as stated in the
+        paper, since the original one requires computing absolutes and entropy from the
+        expanded (batch, in_features, out_features) intermediate tensor, which is hidden
+        behind the F.linear function if we want an memory efficient implementation.
+
+        The L1 regularization is now computed as mean absolute value of the spline
+        weights. The authors implementation also includes this term in addition to the
+        sample-based regularization.
+        """
+        l1_fake = self.spline_weight.abs().mean(-1)
+        regularization_loss_activation = l1_fake.sum()
+        p = l1_fake / regularization_loss_activation
+        regularization_loss_entropy = -torch.sum(p * p.log())
+        return (
+            regularize_activation * regularization_loss_activation
+            + regularize_entropy * regularization_loss_entropy
+        )
+
+
+class EfficientKAN(torch.nn.Module):
+    def __init__(
+        self,
+        layers_hidden,
+        grid_size=5,
+        spline_order=3,
+        scale_noise=0.1,
+        scale_base=1.0,
+        scale_spline=1.0,
+        base_activation=torch.nn.SiLU,
+        grid_eps=0.02,
+        grid_range=[-1, 1],
+    ):
+        super(EfficientKAN, self).__init__()
+        self.grid_size = grid_size
+        self.spline_order = spline_order
+
+        self.layers = torch.nn.ModuleList()
+        for in_features, out_features in zip(layers_hidden, layers_hidden[1:]):
+            self.layers.append(
+                KANLinear(
+                    in_features,
+                    out_features,
+                    grid_size=grid_size,
+                    spline_order=spline_order,
+                    scale_noise=scale_noise,
+                    scale_base=scale_base,
+                    scale_spline=scale_spline,
+                    base_activation=base_activation,
+                    grid_eps=grid_eps,
+                    grid_range=grid_range,
+                )
+            )
+
+    def forward(self, x: torch.Tensor, update_grid=False):
+        for layer in self.layers:
+            if update_grid:
+                layer.update_grid(x)
+            x = layer(x)
+        return x
+
+    def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0):
+        return sum(
+            layer.regularization_loss(regularize_activation, regularize_entropy)
+            for layer in self.layers
+        )
diff --git a/experiment.sh b/experiment.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 echo "Starting experiments with MLP..."
-python kaqn.py --multirun seed="range(32)" method=MLP width=32
+python3.9 kaqn.py --multirun seed="range(8)" method=MLP width=32
 echo "Starting experiments with KAN..."
-python kaqn.py --multirun seed="range(32)"
+python3.9 kaqn.py --multirun seed="range(8)"
+
+# TODO can I parallelize runs? 
diff --git a/plot.py b/plot.py
@@ -4,22 +4,40 @@
 import glob
 
 
-def plot_results(algo1, algo2):
+def plot_results(rl_algo, algo1, algo2, algo3):
     files_algo1 = glob.glob(f"results/{algo1}_*.csv")
     files_algo2 = glob.glob(f"results/{algo2}_*.csv")
+    files_algo3 = glob.glob(f"results/{algo3}_*.csv")
 
     df_algo1 = pd.concat((pd.read_csv(file) for file in files_algo1))
     df_algo2 = pd.concat((pd.read_csv(file) for file in files_algo2))
+    df_algo3 = pd.concat((pd.read_csv(file) for file in files_algo3))
+
+    # TODO fix
+    # df_algo1 = df_algo1.drop(['epoch'], axis=1)
+    df_algo1 = df_algo1.astype(int)
+    df_algo2 = df_algo2.astype(int)
+    df_algo3 = df_algo3.astype(int)
+
+    # TODO fix
+    df_algo1 = df_algo1.drop(['epoch'], axis=1)
+    df_algo1 = df_algo1.astype(int)
+    df_algo2 = df_algo2.astype(int)
 
     median_algo1 = df_algo1.groupby('episode')['length'].median()
     median_algo2 = df_algo2.groupby('episode')['length'].median()
+    median_algo3 = df_algo3.groupby('episode')['length'].median()
+
     quantile_25_algo1 = df_algo1.groupby('episode')['length'].quantile(0.25)
     quantile_75_algo1 = df_algo1.groupby('episode')['length'].quantile(0.75)
     quantile_25_algo2 = df_algo2.groupby('episode')['length'].quantile(0.25)
     quantile_75_algo2 = df_algo2.groupby('episode')['length'].quantile(0.75)
+    quantile_25_algo3 = df_algo3.groupby('episode')['length'].quantile(0.25)
+    quantile_75_algo3 = df_algo3.groupby('episode')['length'].quantile(0.75)
 
     best_algo1 = df_algo1.groupby('episode')['length'].max()
     best_algo2 = df_algo2.groupby('episode')['length'].max()
+    best_algo3 = df_algo3.groupby('episode')['length'].max()
 
     plt.figure(figsize=(6, 4))
 
@@ -28,14 +46,19 @@ def plot_results(algo1, algo2):
 
     plt.plot(median_algo2.index, median_algo2, label=f"{algo2}", color='red')
     plt.fill_between(median_algo2.index, quantile_25_algo2, quantile_75_algo2, alpha=0.3, color='red')
+
+    plt.plot(median_algo3.index, median_algo3, label=f"{algo3}", color='red')
+    plt.fill_between(median_algo3.index, quantile_25_algo3, quantile_75_algo3, alpha=0.3, color='green')
 
     plt.plot(best_algo1.index, best_algo1, label=f"{algo1} (Best)", color='blue', marker='*', markersize=10, markevery=10, lw=2)
 
     plt.plot(best_algo2.index, best_algo2, label=f"{algo2} (Best)", color='red', marker='*', markersize=10, markevery=10, lw=2)
 
+    plt.plot(best_algo2.index, best_algo2, label=f"{algo3} (Best)", color='green', marker='*', markersize=10, markevery=10, lw=2)
+
     plt.xlabel('Episode')
     plt.ylabel('Episode Length')
-    plt.title(f'DDQN comparison with {algo1} and {algo2}')
+    plt.title(f'{rl_algo} comparison with {algo1} and {algo2} and {algo3}')
     plt.legend()
     plt.grid(True)
     plt.tight_layout()
@@ -44,8 +67,8 @@ def plot_results(algo1, algo2):
 
 
 if __name__ == "__main__":
-    if len(sys.argv) != 3:
-        print("Usage: python script.py <algo1> <algo2>")
+    if len(sys.argv) != 5:
+        print("Usage: python script.py <rl_algo> <algo1> <algo2> <algo3>")
         sys.exit(1)
 
     plot_results(*sys.argv[1:])