From 9b2fd007663d4b324e76d814e1d3d5f5542d9543 Mon Sep 17 00:00:00 2001 From: David Loiseaux Date: Fri, 4 Oct 2024 10:58:41 +0200 Subject: [PATCH] feat: added a benchmark file --- benchmarks/stuff.py | 98 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 benchmarks/stuff.py diff --git a/benchmarks/stuff.py b/benchmarks/stuff.py new file mode 100644 index 0000000..8eb4ef2 --- /dev/null +++ b/benchmarks/stuff.py @@ -0,0 +1,98 @@ +import shutil +from collections.abc import Callable, Sequence +from importlib.metadata import version +from itertools import product +from timeit import timeit + +import gudhi as gd +import numpy as np +import pandas as pd + +import multipers as mp +import multipers.grids as mpg +import multipers.ml.point_clouds as mmp +from multipers.data import noisy_annulus, orbit, three_annulus +from multipers.ml.convolutions import DTM, KDE +from multipers.slicer import Slicer_type, available_columns + +np.random.seed(0) + +available_dataset: dict[str, Callable] = { + "orbit35": lambda n: orbit(n=n, r=3.5), + "orbit41": lambda n: orbit(n=n, r=4.1), + "orbit43": lambda n: orbit(n=n, r=4.3), + "na": lambda n: noisy_annulus(n1=(m := int((2 / 3) * n)), n2=n - m), + "3a": lambda n: three_annulus(num_pts=(m := int((2 / 3) * n)), num_outliers=n - m), +} + + +datasets: Sequence[str] = list(available_dataset.keys()) +degrees:Sequence[int] = [0, 1] +num_pts:Sequence[int] = [200] +complexes = ["delaunay", "rips"] +invariants = ["mma", "slice", "hilbert", "rank"] +vineyard = ["vine", "novine"] +num_lines = 50 +num_repetition = 5 +timings = {} +available_dtype = [np.float64] + + +def fill_timing(arg, f): + timings[arg] = timeit(f, number=num_repetition) + terminal_width = shutil.get_terminal_size().columns + left = str(args) + right = f"{timings[arg]:.4f}" + dots = terminal_width - (len(left) + len(right) + 2) + print(f"{left} {'.' * dots} {right}", end="\n") + + +for args in product( + num_pts, datasets, complexes, invariants, degrees, vineyard, available_dtype, available_columns +): + n, dataset, cplx, inv, degree, vine, dtype,col = args + pts = np.asarray(available_dataset[dataset](n)) + s: Slicer_type = mmp.PointCloud2FilteredComplex( + complex=cplx, + bandwidths=[0.2], + num_collapses=2, + output_type="slicer", + reduce_degrees=[degree], + expand_dim=degree + 1, + ).fit_transform([pts])[0][0] + s = mp.Slicer(s, vineyard=vine == "vine", dtype=dtype,col) + box = mpg.compute_bounding_box(s) + s.minpres_degree = -1 ## makes it non-minpres again + if inv == "mma" and vine: + f = lambda: mp.module_approximation(mp.Slicer(s, vineyard=True)) + elif inv == "slice": + basepoints = np.random.uniform( + low=box[None, :, 0], + high=box[None, :, 1], + size=(num_lines, s.num_parameters), + ) + directions = [np.ones(s.num_parameters)] * num_lines + f = lambda: s.persistence_on_lines(basepoints, directions) + elif inv == "hilbert": + grid = mpg.compute_grid(s, resolution=50, strategy="regular") + f = lambda: mp.signed_measure(s, grid=grid, degree=degree, invariant="hilbert") + + elif inv == "rank": + grid = mpg.compute_grid(s, resolution=20, strategy="regular") + f = lambda: mp.signed_measure(s, grid=grid, degree=degree, invariant="rank") + else: + raise ValueError(f"Invariant {inv} is not benchmarkable.") + + try: + fill_timing(args, f) + except ValueError: + print("invalid args", args, "with function", f) + + +pd.DataFrame( + [ + (n, dataset, cplx, inv, degree, vine, dtype, t) + for (n, dataset, cplx, inv, degree, vine, dtype), t in timings.items() + ], + columns=["npts", "dataset", "complex", "inv", "degree", "vine", "dtype", "timing"], +).to_csv(f"benchmark_v{mp.__version__}.csv", index=False)