diff --git a/python/src/robyn/visualization/allocator_plotter.py b/python/src/robyn/visualization/allocator_plotter.py index 7951eb148..55af0be5a 100644 --- a/python/src/robyn/visualization/allocator_plotter.py +++ b/python/src/robyn/visualization/allocator_plotter.py @@ -4,7 +4,9 @@ import numpy as np import pandas as pd from typing import Dict +import logging +logger = logging.getLogger(__name__) class AllocationPlotter(BaseVisualizer): """Plotter class for allocation results visualization.""" @@ -16,10 +18,13 @@ def __init__(self, result: AllocationResult): Args: result: Allocation results to plot """ + logger.debug("Initializing AllocationPlotter") super().__init__(style="bmh") self.result = result if self.result is None: + logger.error("AllocationResult cannot be None") raise ValueError("AllocationResult cannot be None") + logger.info("AllocationPlotter initialized successfully with result: %s", self.result) def plot_all(self) -> Dict[str, plt.Figure]: """ @@ -28,28 +33,48 @@ def plot_all(self) -> Dict[str, plt.Figure]: Returns: Dictionary of figures keyed by plot name """ + logger.info("Starting to generate all allocation plots") figures = {} try: + logger.debug("Generating spend allocation plot") figures["spend_allocation"] = self.plot_spend_allocation() + + logger.debug("Generating response curves plot") figures["response_curves"] = self.plot_response_curves() + + logger.debug("Generating efficiency frontier plot") figures["efficiency_frontier"] = self.plot_efficiency_frontier() + + logger.debug("Generating spend vs response plot") figures["spend_vs_response"] = self.plot_spend_vs_response() + + logger.debug("Generating summary metrics plot") figures["summary_metrics"] = self.plot_summary_metrics() + + logger.info("Successfully generated all %d plots", len(figures)) + except Exception as e: + logger.error("Failed to generate plots: %s", str(e)) + raise finally: + logger.debug("Cleaning up plot resources") self.cleanup() return figures def plot_spend_allocation(self) -> plt.Figure: """Plot spend allocation comparison.""" + logger.debug("Starting spend allocation plot generation") + # Create figure fig, ax = self.create_figure() optimal_allocations = self.result.optimal_allocations + logger.debug("Processing optimal allocations data: %s", optimal_allocations) # Prepare data channels = optimal_allocations["channel"].values x = np.arange(len(channels)) width = 0.35 + logger.debug("Plotting current spend bars for %d channels", len(channels)) # Plot bars ax.bar( x - width / 2, @@ -61,6 +86,7 @@ def plot_spend_allocation(self) -> plt.Figure: alpha=self.alpha["primary"], ) + logger.debug("Plotting optimal spend bars") ax.bar( x + width / 2, optimal_allocations["optimal_spend"].values, @@ -72,6 +98,7 @@ def plot_spend_allocation(self) -> plt.Figure: ) # Add annotations + logger.debug("Adding percentage change annotations") for i, (curr, opt) in enumerate( zip(optimal_allocations["current_spend"].values, optimal_allocations["optimal_spend"].values) ): @@ -83,20 +110,24 @@ def plot_spend_allocation(self) -> plt.Figure: ax, title="Media Spend Allocation", ylabel="Spend", xticks=x, xticklabels=channels, rotation=45 ) - # Add legend and finalize self.add_legend(ax) self.finalize_figure() - + + logger.info("Spend allocation plot generated successfully") return fig def plot_response_curves(self) -> plt.Figure: """Plot response curves for each channel.""" + logger.debug("Starting response curves plot generation") + # Prepare data curves_df = self.result.response_curves channels = curves_df["channel"].unique() n_channels = len(channels) ncols = min(3, n_channels) nrows = (n_channels + ncols - 1) // ncols + + logger.debug("Processing %d channels for response curves", n_channels) # Create figure fig, axes = self.create_figure(nrows=nrows, ncols=ncols, figsize=(15, 5 * nrows)) @@ -109,6 +140,7 @@ def plot_response_curves(self) -> plt.Figure: # Plot each channel for idx, channel in enumerate(channels): + logger.debug("Plotting response curve for channel: %s", channel) row = idx // ncols col = idx % ncols ax = axes[row, col] @@ -126,6 +158,7 @@ def plot_response_curves(self) -> plt.Figure: # Plot current point current_data = channel_data[channel_data["is_current"]] if not current_data.empty: + logger.debug("Plotting current point for channel %s", channel) ax.scatter( current_data["spend"].iloc[0], current_data["response"].iloc[0], @@ -137,6 +170,7 @@ def plot_response_curves(self) -> plt.Figure: # Plot optimal point optimal_data = channel_data[channel_data["is_optimal"]] if not optimal_data.empty: + logger.debug("Plotting optimal point for channel %s", channel) ax.scatter( optimal_data["spend"].iloc[0], optimal_data["response"].iloc[0], @@ -145,19 +179,22 @@ def plot_response_curves(self) -> plt.Figure: s=100, ) - # Setup subplot self.setup_axis(ax, title=f"{channel} Response Curve") self.add_legend(ax) - # Remove empty subplots and finalize + # Remove empty subplots for idx in range(n_channels, nrows * ncols): + logger.debug("Removing empty subplot at index %d", idx) fig.delaxes(axes[idx // ncols, idx % ncols]) self.finalize_figure() + logger.info("Response curves plot generated successfully") return fig def plot_efficiency_frontier(self) -> plt.Figure: """Plot efficiency frontier.""" + logger.debug("Starting efficiency frontier plot generation") + # Create figure fig, ax = self.create_figure() @@ -168,7 +205,10 @@ def plot_efficiency_frontier(self) -> plt.Figure: optimal_total_spend = optimal_allocations["optimal_spend"].sum() optimal_total_response = optimal_allocations["optimal_response"].sum() - # Plot points + logger.debug("Calculated totals - Current spend: %f, Current response: %f, Optimal spend: %f, Optimal response: %f", + current_total_spend, current_total_response, optimal_total_spend, optimal_total_response) + + # Plot points and connect them ax.scatter( current_total_spend, current_total_response, @@ -187,7 +227,6 @@ def plot_efficiency_frontier(self) -> plt.Figure: zorder=2, ) - # Connect points ax.plot( [current_total_spend, optimal_total_spend], [current_total_response, optimal_total_response], @@ -197,9 +236,11 @@ def plot_efficiency_frontier(self) -> plt.Figure: zorder=1, ) - # Add percentage changes annotation + # Calculate and add percentage changes pct_spend_change = ((optimal_total_spend / current_total_spend) - 1) * 100 pct_response_change = ((optimal_total_response / current_total_response) - 1) * 100 + + logger.debug("Percentage changes - Spend: %f%%, Response: %f%%", pct_spend_change, pct_response_change) ax.annotate( f"Spend: {pct_spend_change:.1f}%\nResponse: {pct_response_change:.1f}%", @@ -210,16 +251,17 @@ def plot_efficiency_frontier(self) -> plt.Figure: bbox=dict(facecolor="white", edgecolor=self.colors["neutral"], alpha=self.alpha["annotation"]), ) - # Setup axis self.setup_axis(ax, title="Efficiency Frontier", xlabel="Total Spend", ylabel="Total Response") - self.add_legend(ax) self.finalize_figure() + logger.info("Efficiency frontier plot generated successfully") return fig def plot_spend_vs_response(self) -> plt.Figure: """Plot spend vs response changes.""" + logger.debug("Starting spend vs response plot generation") + # Create figure fig, (ax1, ax2) = self.create_figure(nrows=2, ncols=1, figsize=(12, 10)) @@ -228,6 +270,7 @@ def plot_spend_vs_response(self) -> plt.Figure: channels = df["channel"].values x = np.arange(len(channels)) + logger.debug("Processing spend changes for %d channels", len(channels)) # Plot spend changes spend_pct = ((df["optimal_spend"] / df["current_spend"]) - 1) * 100 colors = [self.colors["positive"] if pct >= 0 else self.colors["negative"] for pct in spend_pct] @@ -235,6 +278,7 @@ def plot_spend_vs_response(self) -> plt.Figure: ax1.bar(x, spend_pct, color=colors, alpha=self.alpha["primary"]) self._plot_change_axis(ax1, x, channels, spend_pct, "Spend Change %") + logger.debug("Processing response changes") # Plot response changes response_pct = ((df["optimal_response"] / df["current_response"]) - 1) * 100 colors = [self.colors["positive"] if pct >= 0 else self.colors["negative"] for pct in response_pct] @@ -243,12 +287,14 @@ def plot_spend_vs_response(self) -> plt.Figure: self._plot_change_axis(ax2, x, channels, response_pct, "Response Change %") self.finalize_figure(adjust_spacing=True) + logger.info("Spend vs response plot generated successfully") return fig def _plot_change_axis( self, ax: plt.Axes, x: np.ndarray, channels: np.ndarray, pct_values: np.ndarray, ylabel: str ) -> None: """Helper method to setup change plot axes.""" + logger.debug("Setting up change plot axis for %s", ylabel) self.setup_axis(ax, ylabel=ylabel, xticks=x, xticklabels=channels, rotation=45) ax.axhline(y=0, color="black", linestyle="-", alpha=0.2) @@ -260,6 +306,8 @@ def _plot_change_axis( def plot_summary_metrics(self) -> plt.Figure: """Plot summary metrics.""" + logger.debug("Starting summary metrics plot generation") + # Create figure fig, ax = self.create_figure() @@ -267,6 +315,8 @@ def plot_summary_metrics(self) -> plt.Figure: optimal_allocations = self.result.optimal_allocations channels = optimal_allocations["channel"].values dep_var_type = self.result.metrics.get("dep_var_type") + + logger.debug("Processing metrics for dependency variable type: %s", dep_var_type) # Calculate metrics if dep_var_type == "revenue": @@ -278,6 +328,8 @@ def plot_summary_metrics(self) -> plt.Figure: optimal_metric = optimal_allocations["optimal_spend"] / optimal_allocations["optimal_response"] metric_name = "CPA" + logger.debug("Calculated %s metrics for %d channels", metric_name, len(channels)) + # Plot bars x = np.arange(len(channels)) width = 0.35 @@ -305,7 +357,6 @@ def plot_summary_metrics(self) -> plt.Figure: pct_change = ((opt / curr) - 1) * 100 self.add_percentage_annotation(ax, i, max(curr, opt), pct_change) - # Setup axis self.setup_axis( ax, title=f"Channel {metric_name} Comparison", @@ -318,9 +369,12 @@ def plot_summary_metrics(self) -> plt.Figure: self.add_legend(ax) self.finalize_figure() + logger.info("Summary metrics plot generated successfully") return fig def cleanup(self) -> None: """Clean up all plots.""" + logger.debug("Starting cleanup of plot resources") super().cleanup() plt.close("all") + logger.debug("Cleanup completed") \ No newline at end of file diff --git a/python/src/robyn/visualization/allocator_visualizer_original.py b/python/src/robyn/visualization/allocator_visualizer_original.py index b49a15e0e..d2aa64ce4 100644 --- a/python/src/robyn/visualization/allocator_visualizer_original.py +++ b/python/src/robyn/visualization/allocator_visualizer_original.py @@ -1,3 +1,4 @@ +import logging from typing import Dict, List, Tuple, Optional import numpy as np import pandas as pd @@ -5,6 +6,9 @@ from robyn.allocator.entities.allocation_results import AllocationResult +logger = logging.getLogger(__name__) + + class AllocationPlotter: """Creates visualizations for allocation results matching R version.""" @@ -14,27 +18,37 @@ def __init__(self, result: AllocationResult): Args: result: AllocationResult containing optimization results to visualize """ + logger.info("Initializing AllocationPlotter") + if result is None: + logger.error("AllocationResult cannot be None") + raise ValueError("AllocationResult cannot be None") + # Store allocation results self.result = result + logger.debug("Stored allocation results: %s", str(result)) # Use matplotlib's built-in clean style plt.style.use("bmh") + logger.debug("Set matplotlib style to 'bmh'") # Set default plot settings plt.rcParams["figure.figsize"] = (12, 8) plt.rcParams["axes.grid"] = True plt.rcParams["axes.spines.top"] = False plt.rcParams["axes.spines.right"] = False + logger.debug("Applied default plot settings") # Store standard figure size and colors self.fig_size = (12, 8) self.colors = plt.cm.Set2(np.linspace(0, 1, 8)) + logger.debug("Set figure size to %s and generated color palette", str(self.fig_size)) # Set color scheme self.current_color = "lightgray" self.optimal_color = "#4688C7" # Steel blue self.positive_color = "#2ECC71" # Green self.negative_color = "#E74C3C" # Red + logger.debug("Initialized color scheme") def plot_all(self) -> Dict[str, plt.Figure]: """Generate all one-pager plots for allocation results. @@ -42,263 +56,361 @@ def plot_all(self) -> Dict[str, plt.Figure]: Returns: Dictionary of plot names to figures """ - return { - "spend_allocation": self.plot_spend_allocation(), - "response_curves": self.plot_response_curves(), - "efficiency_frontier": self.plot_efficiency_frontier(), - "spend_vs_response": self.plot_spend_vs_response(), - "summary_metrics": self.plot_summary_metrics(), - } + logger.info("Generating all allocation plots") + plots = {} + try: + plots = { + "spend_allocation": self.plot_spend_allocation(), + "response_curves": self.plot_response_curves(), + "efficiency_frontier": self.plot_efficiency_frontier(), + "spend_vs_response": self.plot_spend_vs_response(), + "summary_metrics": self.plot_summary_metrics(), + } + logger.info("Successfully generated all plots") + logger.debug("Generated plots: %s", list(plots.keys())) + except Exception as e: + logger.error("Failed to generate all plots: %s", str(e)) + raise + return plots def plot_spend_allocation(self) -> plt.Figure: """Plot spend allocation comparison between current and optimized.""" + logger.info("Plotting spend allocation comparison") + if self.result is None: + logger.error("No allocation results available for spend allocation plot") raise ValueError("No allocation results available. Call plot_all() first.") - fig, ax = plt.subplots(figsize=self.fig_size) - - df = self.result.optimal_allocations - channels = df["channel"].values - x = np.arange(len(channels)) - width = 0.35 - - # Plot bars - current_spend = df["current_spend"].values - optimal_spend = df["optimal_spend"].values - - ax.bar( - x - width / 2, current_spend, width, label="Current", color=self.current_color, edgecolor="gray", alpha=0.7 - ) - ax.bar( - x + width / 2, - optimal_spend, - width, - label="Optimized", - color=self.optimal_color, - edgecolor="gray", - alpha=0.7, - ) - - # Customize plot - ax.set_xticks(x) - ax.set_xticklabels(channels, rotation=45, ha="right") - ax.set_ylabel("Spend") - ax.set_title("Media Spend Allocation") - ax.legend() - - # Add spend change percentage labels - for i, (curr, opt) in enumerate(zip(current_spend, optimal_spend)): - pct_change = ((opt / curr) - 1) * 100 - color = self.positive_color if pct_change >= 0 else self.negative_color - ax.text(i, max(curr, opt), f"{pct_change:+.1f}%", ha="center", va="bottom", color=color) - - plt.tight_layout() - return fig + try: + fig, ax = plt.subplots(figsize=self.fig_size) + df = self.result.optimal_allocations + logger.debug("Processing allocation data with %d channels", len(df)) + + channels = df["channel"].values + x = np.arange(len(channels)) + width = 0.35 + + # Plot bars + current_spend = df["current_spend"].values + optimal_spend = df["optimal_spend"].values + logger.debug("Current total spend: %.2f, Optimal total spend: %.2f", + current_spend.sum(), optimal_spend.sum()) + + ax.bar( + x - width / 2, current_spend, width, label="Current", + color=self.current_color, edgecolor="gray", alpha=0.7 + ) + ax.bar( + x + width / 2, optimal_spend, width, label="Optimized", + color=self.optimal_color, edgecolor="gray", alpha=0.7 + ) + + # Add spend change percentage labels + for i, (curr, opt) in enumerate(zip(current_spend, optimal_spend)): + pct_change = ((opt / curr) - 1) * 100 + logger.debug("Channel %s spend change: %.1f%%", channels[i], pct_change) + color = self.positive_color if pct_change >= 0 else self.negative_color + ax.text(i, max(curr, opt), f"{pct_change:+.1f}%", + ha="center", va="bottom", color=color) + + ax.set_xticks(x) + ax.set_xticklabels(channels, rotation=45, ha="right") + ax.set_ylabel("Spend") + ax.set_title("Media Spend Allocation") + ax.legend() + + plt.tight_layout() + logger.info("Successfully created spend allocation plot") + return fig + except Exception as e: + logger.error("Failed to create spend allocation plot: %s", str(e)) + raise def plot_response_curves(self) -> plt.Figure: """Plot response curves with current and optimal points.""" + logger.info("Plotting response curves") + if self.result is None: + logger.error("No allocation results available for response curves plot") raise ValueError("No allocation results available. Call plot_all() first.") - curves_df = self.result.response_curves - channels = curves_df["channel"].unique() - n_channels = len(channels) - ncols = min(3, n_channels) - nrows = (n_channels + ncols - 1) // ncols - - fig, axes = plt.subplots(nrows, ncols, figsize=(15, 5 * nrows)) - if nrows == 1 and ncols == 1: - axes = np.array([[axes]]) - elif nrows == 1 or ncols == 1: - axes = axes.reshape(-1, 1) - - for idx, channel in enumerate(channels): - row = idx // ncols - col = idx % ncols - ax = axes[row, col] - - channel_data = curves_df[curves_df["channel"] == channel] - - # Plot response curve - ax.plot(channel_data["spend"], channel_data["response"], color=self.optimal_color, alpha=0.6) - - # Add current and optimal points - current_data = channel_data[channel_data["is_current"]] - optimal_data = channel_data[channel_data["is_optimal"]] - - if not current_data.empty: - ax.scatter( - current_data["spend"].iloc[0], - current_data["response"].iloc[0], - color=self.negative_color, - label="Current", - s=100, - ) - if not optimal_data.empty: - ax.scatter( - optimal_data["spend"].iloc[0], - optimal_data["response"].iloc[0], - color=self.positive_color, - label="Optimal", - s=100, - ) - - ax.set_title(f"{channel} Response Curve") - ax.legend() - ax.grid(True, alpha=0.3) - - # Remove empty subplots - for idx in range(n_channels, nrows * ncols): - row = idx // ncols - col = idx % ncols - fig.delaxes(axes[row, col]) - - plt.tight_layout() - return fig + try: + curves_df = self.result.response_curves + channels = curves_df["channel"].unique() + n_channels = len(channels) + logger.debug("Processing response curves for %d channels", n_channels) + + ncols = min(3, n_channels) + nrows = (n_channels + ncols - 1) // ncols + + fig, axes = plt.subplots(nrows, ncols, figsize=(15, 5 * nrows)) + if nrows == 1 and ncols == 1: + axes = np.array([[axes]]) + elif nrows == 1 or ncols == 1: + axes = axes.reshape(-1, 1) + + for idx, channel in enumerate(channels): + logger.debug("Plotting response curve for channel: %s", channel) + row = idx // ncols + col = idx % ncols + ax = axes[row, col] + + channel_data = curves_df[curves_df["channel"] == channel] + + # Plot response curve + ax.plot(channel_data["spend"], channel_data["response"], + color=self.optimal_color, alpha=0.6) + + # Add current and optimal points + current_data = channel_data[channel_data["is_current"]] + optimal_data = channel_data[channel_data["is_optimal"]] + + if not current_data.empty: + logger.debug("%s current point: spend=%.2f, response=%.2f", + channel, current_data["spend"].iloc[0], + current_data["response"].iloc[0]) + ax.scatter( + current_data["spend"].iloc[0], + current_data["response"].iloc[0], + color=self.negative_color, + label="Current", + s=100, + ) + if not optimal_data.empty: + logger.debug("%s optimal point: spend=%.2f, response=%.2f", + channel, optimal_data["spend"].iloc[0], + optimal_data["response"].iloc[0]) + ax.scatter( + optimal_data["spend"].iloc[0], + optimal_data["response"].iloc[0], + color=self.positive_color, + label="Optimal", + s=100, + ) + + ax.set_title(f"{channel} Response Curve") + ax.legend() + ax.grid(True, alpha=0.3) + + # Remove empty subplots + for idx in range(n_channels, nrows * ncols): + row = idx // ncols + col = idx % ncols + fig.delaxes(axes[row, col]) + + plt.tight_layout() + logger.info("Successfully created response curves plot") + return fig + except Exception as e: + logger.error("Failed to create response curves plot: %s", str(e)) + raise def plot_efficiency_frontier(self) -> plt.Figure: """Plot efficiency frontier showing spend vs response relationship.""" + logger.info("Plotting efficiency frontier") + if self.result is None: + logger.error("No allocation results available for efficiency frontier plot") raise ValueError("No allocation results available. Call plot_all() first.") - fig, ax = plt.subplots(figsize=self.fig_size) - - df = self.result.optimal_allocations - - # Calculate totals - current_total_spend = df["current_spend"].sum() - current_total_response = df["current_response"].sum() - optimal_total_spend = df["optimal_spend"].sum() - optimal_total_response = df["optimal_response"].sum() - - # Plot points and line - ax.scatter( - current_total_spend, current_total_response, color=self.negative_color, s=100, label="Current", zorder=2 - ) - ax.scatter( - optimal_total_spend, optimal_total_response, color=self.positive_color, s=100, label="Optimal", zorder=2 - ) - - ax.plot( - [current_total_spend, optimal_total_spend], - [current_total_response, optimal_total_response], - "--", - color="gray", - alpha=0.5, - zorder=1, - ) - - # Add labels - pct_spend_change = ((optimal_total_spend / current_total_spend) - 1) * 100 - pct_response_change = ((optimal_total_response / current_total_response) - 1) * 100 - - ax.annotate( - f"Spend: {pct_spend_change:+.1f}%\nResponse: {pct_response_change:+.1f}%", - xy=(optimal_total_spend, optimal_total_response), - xytext=(10, 10), - textcoords="offset points", - bbox=dict(facecolor="white", edgecolor="gray", alpha=0.7), - ) - - ax.set_xlabel("Total Spend") - ax.set_ylabel("Total Response") - ax.set_title("Efficiency Frontier") - ax.legend() - ax.grid(True, alpha=0.3) - - plt.tight_layout() - return fig + try: + fig, ax = plt.subplots(figsize=self.fig_size) + df = self.result.optimal_allocations + + # Calculate totals + current_total_spend = df["current_spend"].sum() + current_total_response = df["current_response"].sum() + optimal_total_spend = df["optimal_spend"].sum() + optimal_total_response = df["optimal_response"].sum() + + logger.debug("Current totals - spend: %.2f, response: %.2f", + current_total_spend, current_total_response) + logger.debug("Optimal totals - spend: %.2f, response: %.2f", + optimal_total_spend, optimal_total_response) + + # Plot points and line + ax.scatter( + current_total_spend, current_total_response, + color=self.negative_color, s=100, label="Current", zorder=2 + ) + ax.scatter( + optimal_total_spend, optimal_total_response, + color=self.positive_color, s=100, label="Optimal", zorder=2 + ) + + ax.plot( + [current_total_spend, optimal_total_spend], + [current_total_response, optimal_total_response], + "--", color="gray", alpha=0.5, zorder=1 + ) + + # Add labels + pct_spend_change = ((optimal_total_spend / current_total_spend) - 1) * 100 + pct_response_change = ((optimal_total_response / current_total_response) - 1) * 100 + + logger.debug("Total spend change: %.1f%%", pct_spend_change) + logger.debug("Total response change: %.1f%%", pct_response_change) + + ax.annotate( + f"Spend: {pct_spend_change:+.1f}%\nResponse: {pct_response_change:+.1f}%", + xy=(optimal_total_spend, optimal_total_response), + xytext=(10, 10), + textcoords="offset points", + bbox=dict(facecolor="white", edgecolor="gray", alpha=0.7), + ) + + ax.set_xlabel("Total Spend") + ax.set_ylabel("Total Response") + ax.set_title("Efficiency Frontier") + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + logger.info("Successfully created efficiency frontier plot") + return fig + except Exception as e: + logger.error("Failed to create efficiency frontier plot: %s", str(e)) + raise def plot_spend_vs_response(self) -> plt.Figure: """Plot channel-level spend vs response changes.""" + logger.info("Plotting spend vs response comparison") + if self.result is None: + logger.error("No allocation results available for spend vs response plot") raise ValueError("No allocation results available. Call plot_all() first.") - fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) - - df = self.result.optimal_allocations - channels = df["channel"].values - x = np.arange(len(channels)) - - # Plot spend changes - spend_pct = ((df["optimal_spend"] / df["current_spend"]) - 1) * 100 - colors = [self.negative_color if x < 0 else self.positive_color for x in spend_pct] - ax1.bar(x, spend_pct, color=colors, alpha=0.7) - ax1.set_xticks(x) - ax1.set_xticklabels(channels, rotation=45, ha="right") - ax1.set_ylabel("Spend Change %") - ax1.axhline(y=0, color="black", linestyle="-", alpha=0.2) - ax1.grid(True, alpha=0.3) - - # Add value labels - for i, v in enumerate(spend_pct): - ax1.text(i, v, f"{v:+.1f}%", ha="center", va="bottom" if v >= 0 else "top") - - # Plot response changes - response_pct = ((df["optimal_response"] / df["current_response"]) - 1) * 100 - colors = [self.negative_color if x < 0 else self.positive_color for x in response_pct] - ax2.bar(x, response_pct, color=colors, alpha=0.7) - ax2.set_xticks(x) - ax2.set_xticklabels(channels, rotation=45, ha="right") - ax2.set_ylabel("Response Change %") - ax2.axhline(y=0, color="black", linestyle="-", alpha=0.2) - ax2.grid(True, alpha=0.3) - - # Add value labels - for i, v in enumerate(response_pct): - ax2.text(i, v, f"{v:+.1f}%", ha="center", va="bottom" if v >= 0 else "top") - - plt.tight_layout() - return fig + try: + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) + df = self.result.optimal_allocations + channels = df["channel"].values + logger.debug("Processing %d channels for spend vs response comparison", + len(channels)) + + x = np.arange(len(channels)) + + # Plot spend changes + spend_pct = ((df["optimal_spend"] / df["current_spend"]) - 1) * 100 + colors = [self.negative_color if x < 0 else self.positive_color + for x in spend_pct] + + ax1.bar(x, spend_pct, color=colors, alpha=0.7) + ax1.set_xticks(x) + ax1.set_xticklabels(channels, rotation=45, ha="right") + ax1.set_ylabel("Spend Change %") + ax1.axhline(y=0, color="black", linestyle="-", alpha=0.2) + ax1.grid(True, alpha=0.3) + + # Add value labels + for i, v in enumerate(spend_pct): + logger.debug("Channel %s spend change: %.1f%%", channels[i], v) + ax1.text(i, v, f"{v:+.1f}%", ha="center", + va="bottom" if v >= 0 else "top") + + # Plot response changes + response_pct = ((df["optimal_response"] / df["current_response"]) - 1) * 100 + colors = [self.negative_color if x < 0 else self.positive_color + for x in response_pct] + + ax2.bar(x, response_pct, color=colors, alpha=0.7) + ax2.set_xticks(x) + ax2.set_xticklabels(channels, rotation=45, ha="right") + ax2.set_ylabel("Response Change %") + ax2.axhline(y=0, color="black", linestyle="-", alpha=0.2) + ax2.grid(True, alpha=0.3) + + # Add value labels + for i, v in enumerate(response_pct): + logger.debug("Channel %s response change: %.1f%%", channels[i], v) + ax2.text(i, v, f"{v:+.1f}%", ha="center", + va="bottom" if v >= 0 else "top") + + plt.tight_layout() + logger.info("Successfully created spend vs response plot") + return fig + except Exception as e: + logger.error("Failed to create spend vs response plot: %s", str(e)) + raise def plot_summary_metrics(self) -> plt.Figure: """Plot summary metrics including ROI/CPA changes.""" + logger.info("Plotting summary metrics") + if self.result is None: + logger.error("No allocation results available for summary metrics plot") raise ValueError("No allocation results available. Call plot_all() first.") - fig, ax = plt.subplots(figsize=self.fig_size) - - df = self.result.optimal_allocations - channels = df["channel"].values - - # Calculate ROI or CPA metrics - if self.result.metrics.get("dep_var_type") == "revenue": - current_metric = df["current_response"] / df["current_spend"] - optimal_metric = df["optimal_response"] / df["optimal_spend"] - metric_name = "ROI" - else: - current_metric = df["current_spend"] / df["current_response"] - optimal_metric = df["optimal_spend"] / df["optimal_response"] - metric_name = "CPA" - - x = np.arange(len(channels)) - width = 0.35 - - ax.bar( - x - width / 2, current_metric, width, label=f"Current {metric_name}", color=self.current_color, alpha=0.7 - ) - ax.bar( - x + width / 2, optimal_metric, width, label=f"Optimal {metric_name}", color=self.optimal_color, alpha=0.7 - ) - - # Add value labels - for i, (curr, opt) in enumerate(zip(current_metric, optimal_metric)): - pct_change = ((opt / curr) - 1) * 100 - color = self.positive_color if pct_change >= 0 else self.negative_color - ax.text(i, max(curr, opt), f"{pct_change:+.1f}%", ha="center", va="bottom", color=color) - - ax.set_xticks(x) - ax.set_xticklabels(channels, rotation=45, ha="right") - ax.set_ylabel(metric_name) - ax.set_title(f"Channel {metric_name} Comparison") - ax.legend() - ax.grid(True, alpha=0.3) - - plt.tight_layout() - return fig + try: + fig, ax = plt.subplots(figsize=self.fig_size) + df = self.result.optimal_allocations + channels = df["channel"].values + logger.debug("Processing summary metrics for %d channels", len(channels)) + + # Calculate ROI or CPA metrics + metric_name = "ROI" if self.result.metrics.get("dep_var_type") == "revenue" else "CPA" + logger.debug("Using metric type: %s", metric_name) + + if metric_name == "ROI": + current_metric = df["current_response"] / df["current_spend"] + optimal_metric = df["optimal_response"] / df["optimal_spend"] + else: + current_metric = df["current_spend"] / df["current_response"] + optimal_metric = df["optimal_spend"] / df["optimal_response"] + + x = np.arange(len(channels)) + width = 0.35 + + ax.bar( + x - width / 2, current_metric, width, + label=f"Current {metric_name}", color=self.current_color, alpha=0.7 + ) + ax.bar( + x + width / 2, optimal_metric, width, + label=f"Optimal {metric_name}", color=self.optimal_color, alpha=0.7 + ) + + # Add value labels + for i, (curr, opt) in enumerate(zip(current_metric, optimal_metric)): + pct_change = ((opt / curr) - 1) * 100 + logger.debug("Channel %s %s change: %.1f%% (current: %.2f, optimal: %.2f)", + channels[i], metric_name, pct_change, curr, opt) + + color = self.positive_color if pct_change >= 0 else self.negative_color + ax.text(i, max(curr, opt), f"{pct_change:+.1f}%", + ha="center", va="bottom", color=color) + + ax.set_xticks(x) + ax.set_xticklabels(channels, rotation=45, ha="right") + ax.set_ylabel(metric_name) + ax.set_title(f"Channel {metric_name} Comparison") + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + logger.info("Successfully created summary metrics plot") + return fig + except Exception as e: + logger.error("Failed to create summary metrics plot: %s", str(e)) + raise def save_plots(self, plots: Dict[str, plt.Figure], directory: str) -> None: """Save all plots to specified directory.""" + logger.info("Saving plots to directory: %s", directory) + + if not plots: + logger.warning("No plots provided to save") + return + + logger.debug("Preparing to save %d plots: %s", len(plots), list(plots.keys())) + for name, fig in plots.items(): - fig.savefig(f"{directory}/allocation_{name}.png", dpi=300, bbox_inches="tight") - plt.close(fig) + try: + filepath = f"{directory}/allocation_{name}.png" + fig.savefig(filepath, dpi=300, bbox_inches="tight") + logger.debug("Successfully saved plot '%s' to %s", name, filepath) + plt.close(fig) + except Exception as e: + logger.error("Failed to save plot '%s': %s", name, str(e)) + raise + + logger.info("Successfully saved all plots to directory") \ No newline at end of file diff --git a/python/src/robyn/visualization/base_visualizer.py b/python/src/robyn/visualization/base_visualizer.py index efd5bbdec..b746006f2 100644 --- a/python/src/robyn/visualization/base_visualizer.py +++ b/python/src/robyn/visualization/base_visualizer.py @@ -3,7 +3,10 @@ import seaborn as sns import numpy as np from pathlib import Path +import logging +# Configure logger +logger = logging.getLogger(__name__) class BaseVisualizer: """ @@ -18,6 +21,8 @@ def __init__(self, style: str = "bmh"): Args: style: matplotlib style to use (default: "bmh") """ + logger.info("Initializing BaseVisualizer with style: %s", style) + # Store style settings self.style = style self.default_figsize = (12, 8) @@ -33,9 +38,11 @@ def __init__(self, style: str = "bmh"): "optimal": "#4688C7", # For optimal values "grid": "#E0E0E0", # For grid lines } + logger.debug("Color scheme initialized: %s", self.colors) # Plot settings self.font_sizes = {"title": 14, "subtitle": 12, "label": 12, "tick": 10, "annotation": 9, "legend": 10} + logger.debug("Font sizes configured: %s", self.font_sizes) # Default alpha values self.alpha = {"primary": 0.7, "secondary": 0.5, "grid": 0.3, "annotation": 0.7} @@ -49,22 +56,29 @@ def __init__(self, style: str = "bmh"): # Apply default style self._setup_plot_style() + logger.info("BaseVisualizer initialization completed") def _setup_plot_style(self) -> None: """Configure default plotting style.""" - plt.style.use(self.style) - - plt.rcParams.update( - { - "figure.figsize": self.default_figsize, - "axes.grid": True, - "axes.spines.top": False, - "axes.spines.right": False, - "font.size": self.font_sizes["label"], - "grid.alpha": self.alpha["grid"], - "grid.color": self.colors["grid"], - } - ) + logger.debug("Setting up plot style with style: %s", self.style) + try: + plt.style.use(self.style) + + plt.rcParams.update( + { + "figure.figsize": self.default_figsize, + "axes.grid": True, + "axes.spines.top": False, + "axes.spines.right": False, + "font.size": self.font_sizes["label"], + "grid.alpha": self.alpha["grid"], + "grid.color": self.colors["grid"], + } + ) + logger.debug("Plot style parameters updated successfully") + except Exception as e: + logger.error("Failed to setup plot style: %s", str(e)) + raise def create_figure( self, nrows: int = 1, ncols: int = 1, figsize: Optional[Tuple[int, int]] = None @@ -80,13 +94,21 @@ def create_figure( Returns: Tuple of (figure, axes) """ + logger.info("Creating new figure with dimensions %dx%d", nrows, ncols) figsize = figsize or self.default_figsize - self.current_figure, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize) - if nrows == ncols == 1: - self.current_axes = axes - else: - self.current_axes = np.array(axes) - return self.current_figure, axes + logger.debug("Using figure size: %s", figsize) + + try: + self.current_figure, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize) + if nrows == ncols == 1: + self.current_axes = axes + else: + self.current_axes = np.array(axes) + logger.debug("Figure created successfully") + return self.current_figure, axes + except Exception as e: + logger.error("Failed to create figure: %s", str(e)) + raise def setup_axis( self, @@ -114,25 +136,36 @@ def setup_axis( yticklabels: Optional list of y-axis tick labels rotation: Rotation angle for tick labels """ - if title: - ax.set_title(title, fontsize=self.font_sizes["title"]) - if xlabel: - ax.set_xlabel(xlabel, fontsize=self.font_sizes["label"]) - if ylabel: - ax.set_ylabel(ylabel, fontsize=self.font_sizes["label"]) - - if xticks is not None: - ax.set_xticks(xticks) - if yticks is not None: - ax.set_yticks(yticks) - - if xticklabels is not None: - ax.set_xticklabels(xticklabels, rotation=rotation, fontsize=self.font_sizes["tick"]) - if yticklabels is not None: - ax.set_yticklabels(yticklabels, fontsize=self.font_sizes["tick"]) - - ax.tick_params(labelsize=self.font_sizes["tick"]) - ax.grid(True, alpha=self.alpha["grid"], color=self.colors["grid"]) + logger.debug("Setting up axis with title: %s, xlabel: %s, ylabel: %s", title, xlabel, ylabel) + + try: + if title: + ax.set_title(title, fontsize=self.font_sizes["title"]) + if xlabel: + ax.set_xlabel(xlabel, fontsize=self.font_sizes["label"]) + if ylabel: + ax.set_ylabel(ylabel, fontsize=self.font_sizes["label"]) + + if xticks is not None: + logger.debug("Setting x-ticks: %s", xticks) + ax.set_xticks(xticks) + if yticks is not None: + logger.debug("Setting y-ticks: %s", yticks) + ax.set_yticks(yticks) + + if xticklabels is not None: + logger.debug("Setting x-tick labels with rotation: %d", rotation) + ax.set_xticklabels(xticklabels, rotation=rotation, fontsize=self.font_sizes["tick"]) + if yticklabels is not None: + logger.debug("Setting y-tick labels") + ax.set_yticklabels(yticklabels, fontsize=self.font_sizes["tick"]) + + ax.tick_params(labelsize=self.font_sizes["tick"]) + ax.grid(True, alpha=self.alpha["grid"], color=self.colors["grid"]) + logger.debug("Axis setup completed successfully") + except Exception as e: + logger.error("Failed to setup axis: %s", str(e)) + raise def add_percentage_annotation( self, ax: plt.Axes, x: float, y: float, percentage: float, va: str = "bottom", ha: str = "center" @@ -148,17 +181,23 @@ def add_percentage_annotation( va: Vertical alignment ha: Horizontal alignment """ - color = self.colors["positive"] if percentage >= 0 else self.colors["negative"] - ax.text( - x, - y, - f"{percentage:.1f}%", - color=color, - va=va, - ha=ha, - fontsize=self.font_sizes["annotation"], - alpha=self.alpha["annotation"], - ) + logger.debug("Adding percentage annotation at (x=%f, y=%f) with value: %f%%", x, y, percentage) + try: + color = self.colors["positive"] if percentage >= 0 else self.colors["negative"] + ax.text( + x, + y, + f"{percentage:.1f}%", + color=color, + va=va, + ha=ha, + fontsize=self.font_sizes["annotation"], + alpha=self.alpha["annotation"], + ) + logger.debug("Percentage annotation added successfully") + except Exception as e: + logger.error("Failed to add percentage annotation: %s", str(e)) + raise def add_legend(self, ax: plt.Axes, loc: str = "best", title: Optional[str] = None) -> None: """ @@ -169,9 +208,15 @@ def add_legend(self, ax: plt.Axes, loc: str = "best", title: Optional[str] = Non loc: Legend location title: Optional legend title """ - legend = ax.legend(fontsize=self.font_sizes["legend"], loc=loc, framealpha=self.alpha["annotation"]) - if title: - legend.set_title(title, prop={"size": self.font_sizes["legend"]}) + logger.debug("Adding legend with location: %s and title: %s", loc, title) + try: + legend = ax.legend(fontsize=self.font_sizes["legend"], loc=loc, framealpha=self.alpha["annotation"]) + if title: + legend.set_title(title, prop={"size": self.font_sizes["legend"]}) + logger.debug("Legend added successfully") + except Exception as e: + logger.error("Failed to add legend: %s", str(e)) + raise def finalize_figure(self, tight_layout: bool = True, adjust_spacing: bool = False) -> None: """ @@ -181,13 +226,21 @@ def finalize_figure(self, tight_layout: bool = True, adjust_spacing: bool = Fals tight_layout: Whether to apply tight_layout adjust_spacing: Whether to adjust subplot spacing """ + logger.info("Finalizing figure with tight_layout=%s, adjust_spacing=%s", tight_layout, adjust_spacing) + if self.current_figure is None: + logger.warning("No current figure to finalize") return - if tight_layout: - self.current_figure.tight_layout(pad=self.spacing["tight_layout_pad"]) - if adjust_spacing: - self.current_figure.subplots_adjust(hspace=self.spacing["subplot_adjust_hspace"]) + try: + if tight_layout: + self.current_figure.tight_layout(pad=self.spacing["tight_layout_pad"]) + if adjust_spacing: + self.current_figure.subplots_adjust(hspace=self.spacing["subplot_adjust_hspace"]) + logger.debug("Figure finalization completed successfully") + except Exception as e: + logger.error("Failed to finalize figure: %s", str(e)) + raise def save_plot(self, filename: Union[str, Path], dpi: int = 300, cleanup: bool = True) -> None: """ @@ -198,20 +251,35 @@ def save_plot(self, filename: Union[str, Path], dpi: int = 300, cleanup: bool = dpi: Resolution for saved plot cleanup: Whether to close the plot after saving """ + logger.info("Saving plot to: %s with DPI: %d", filename, dpi) + if self.current_figure is None: - raise ValueError("No current figure to save") + error_msg = "No current figure to save" + logger.error(error_msg) + raise ValueError(error_msg) - filepath = Path(filename) - filepath.parent.mkdir(parents=True, exist_ok=True) + try: + filepath = Path(filename) + filepath.parent.mkdir(parents=True, exist_ok=True) + logger.debug("Created directory structure for: %s", filepath.parent) - self.current_figure.savefig(filepath, dpi=dpi, bbox_inches="tight", facecolor="white", edgecolor="none") + self.current_figure.savefig(filepath, dpi=dpi, bbox_inches="tight", facecolor="white", edgecolor="none") + logger.info("Plot saved successfully to: %s", filepath) - if cleanup: - self.cleanup() + if cleanup: + logger.debug("Cleaning up after save") + self.cleanup() + except Exception as e: + logger.error("Failed to save plot to %s: %s", filename, str(e)) + raise def cleanup(self) -> None: """Close the current plot and clear matplotlib memory.""" + logger.debug("Performing cleanup") if self.current_figure is not None: plt.close(self.current_figure) self.current_figure = None self.current_axes = None + logger.debug("Cleanup completed successfully") + else: + logger.debug("No figure to clean up") \ No newline at end of file diff --git a/python/src/robyn/visualization/feature_visualization.py b/python/src/robyn/visualization/feature_visualization.py index 3a3d55f38..94cdead9f 100644 --- a/python/src/robyn/visualization/feature_visualization.py +++ b/python/src/robyn/visualization/feature_visualization.py @@ -2,10 +2,12 @@ import pandas as pd import matplotlib.pyplot as plt import seaborn as sns +import logging from robyn.data.entities.mmmdata import MMMData from robyn.data.entities.hyperparameters import Hyperparameters, ChannelHyperparameters -from robyn.modeling.feature_engineering import FeaturizedMMMData # New import +from robyn.modeling.feature_engineering import FeaturizedMMMData +logger = logging.getLogger(__name__) class FeaturePlotter: """ @@ -22,6 +24,9 @@ def __init__(self, mmm_data: MMMData, hyperparameters: Hyperparameters): """ self.mmm_data = mmm_data self.hyperparameters = hyperparameters + logger.info("Initializing FeaturePlotter with %s data points", len(mmm_data)) + logger.debug("MMM Data: %s", mmm_data) + logger.debug("Hyperparameters: %s", hyperparameters) def plot_adstock(self, channel: str) -> plt.Figure: """ @@ -33,7 +38,15 @@ def plot_adstock(self, channel: str) -> plt.Figure: Returns: plt.Figure: A matplotlib Figure object containing the adstock plot. """ - pass + logger.info("Generating adstock plot for channel: %s", channel) + logger.debug("Processing adstock transformation for channel %s", channel) + try: + # Implementation placeholder + logger.warning("plot_adstock method not implemented yet") + pass + except Exception as e: + logger.error("Failed to generate adstock plot for channel %s: %s", channel, str(e)) + raise def plot_saturation(self, channel: str) -> plt.Figure: """ @@ -45,7 +58,15 @@ def plot_saturation(self, channel: str) -> plt.Figure: Returns: plt.Figure: A matplotlib Figure object containing the saturation curves plot. """ - pass + logger.info("Generating saturation plot for channel: %s", channel) + logger.debug("Processing saturation curve transformation for channel %s", channel) + try: + # Implementation placeholder + logger.warning("plot_saturation method not implemented yet") + pass + except Exception as e: + logger.error("Failed to generate saturation plot for channel %s: %s", channel, str(e)) + raise def plot_spend_exposure(self, featurized_data: FeaturizedMMMData, channel: str) -> plt.Figure: """ @@ -58,52 +79,71 @@ def plot_spend_exposure(self, featurized_data: FeaturizedMMMData, channel: str) Returns: plt.Figure: A matplotlib Figure object containing the spend-exposure plot. """ - if channel not in featurized_data.modNLS["results"]: - raise ValueError(f"No spend-exposure data available for channel: {channel}") - - res = featurized_data.modNLS["results"][channel] - plot_data = featurized_data.modNLS["plots"][channel] - - fig, ax = plt.subplots(figsize=(10, 6)) - - # Plot scatter of actual data - sns.scatterplot(x="spend", y="exposure", data=plot_data, ax=ax, alpha=0.6, label="Actual") - - # Plot fitted line - sns.lineplot(x="spend", y="yhat", data=plot_data, ax=ax, color="red", label="Fitted") - - ax.set_xlabel(f"Spend [{channel}]") - ax.set_ylabel(f"Exposure [{channel}]") - ax.set_title(f"Spend vs Exposure for {channel}") - - # Add model information to the plot - model_type = res["model_type"] - rsq = res["rsq"] - if model_type == "nls": - Vmax, Km = res["coef"]["Vmax"], res["coef"]["Km"] - ax.text( - 0.05, - 0.95, - f"Model: Michaelis-Menten\nR² = {rsq:.4f}\nVmax = {Vmax:.2f}\nKm = {Km:.2f}", - transform=ax.transAxes, - verticalalignment="top", - bbox=dict(boxstyle="round", facecolor="white", alpha=0.7), - ) - else: - coef = res["coef"]["coef"] - ax.text( - 0.05, - 0.95, - f"Model: Linear\nR² = {rsq:.4f}\nCoefficient = {coef:.4f}", - transform=ax.transAxes, - verticalalignment="top", - bbox=dict(boxstyle="round", facecolor="white", alpha=0.7), - ) - - plt.legend() - plt.tight_layout() - - return fig + logger.info("Generating spend-exposure plot for channel: %s", channel) + logger.debug("Featurized data being processed: %s", featurized_data) + + try: + if channel not in featurized_data.modNLS["results"]: + logger.error("Channel %s not found in featurized data results", channel) + raise ValueError(f"No spend-exposure data available for channel: {channel}") + + res = featurized_data.modNLS["results"][channel] + plot_data = featurized_data.modNLS["plots"][channel] + + logger.debug("Retrieved model results for channel %s: %s", channel, res) + logger.debug("Plot data shape: %s", plot_data.shape if hasattr(plot_data, 'shape') else 'N/A') + + fig, ax = plt.subplots(figsize=(10, 6)) + + # Plot scatter of actual data + sns.scatterplot(x="spend", y="exposure", data=plot_data, ax=ax, alpha=0.6, label="Actual") + logger.debug("Created scatter plot for actual data") + + # Plot fitted line + sns.lineplot(x="spend", y="yhat", data=plot_data, ax=ax, color="red", label="Fitted") + logger.debug("Added fitted line to plot") + + ax.set_xlabel(f"Spend [{channel}]") + ax.set_ylabel(f"Exposure [{channel}]") + ax.set_title(f"Spend vs Exposure for {channel}") + + # Add model information to the plot + model_type = res["model_type"] + rsq = res["rsq"] + logger.debug("Model type: %s, R-squared: %f", model_type, rsq) + + if model_type == "nls": + Vmax, Km = res["coef"]["Vmax"], res["coef"]["Km"] + ax.text( + 0.05, + 0.95, + f"Model: Michaelis-Menten\nR² = {rsq:.4f}\nVmax = {Vmax:.2f}\nKm = {Km:.2f}", + transform=ax.transAxes, + verticalalignment="top", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.7), + ) + logger.debug("Added NLS model parameters: Vmax=%f, Km=%f", Vmax, Km) + else: + coef = res["coef"]["coef"] + ax.text( + 0.05, + 0.95, + f"Model: Linear\nR² = {rsq:.4f}\nCoefficient = {coef:.4f}", + transform=ax.transAxes, + verticalalignment="top", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.7), + ) + logger.debug("Added linear model parameters: coefficient=%f", coef) + + plt.legend() + plt.tight_layout() + + logger.info("Successfully generated spend-exposure plot for channel %s", channel) + return fig + + except Exception as e: + logger.error("Failed to generate spend-exposure plot for channel %s: %s", channel, str(e), exc_info=True) + raise def plot_feature_importance(self, feature_importance: Dict[str, float]) -> plt.Figure: """ @@ -115,7 +155,15 @@ def plot_feature_importance(self, feature_importance: Dict[str, float]) -> plt.F Returns: plt.Figure: A matplotlib Figure object containing the feature importance plot. """ - pass + logger.info("Generating feature importance plot") + logger.debug("Feature importance data: %s", feature_importance) + try: + # Implementation placeholder + logger.warning("plot_feature_importance method not implemented yet") + pass + except Exception as e: + logger.error("Failed to generate feature importance plot: %s", str(e)) + raise def plot_response_curves(self, featurized_data: FeaturizedMMMData) -> Dict[str, plt.Figure]: """ @@ -127,6 +175,14 @@ def plot_response_curves(self, featurized_data: FeaturizedMMMData) -> Dict[str, Returns: Dict[str, plt.Figure]: Dictionary mapping channel names to their respective response curve plots. """ - dt_mod = featurized_data.dt_mod - # Rest of the method implementation - pass + logger.info("Generating response curves") + logger.debug("Processing featurized data: %s", featurized_data) + try: + dt_mod = featurized_data.dt_mod + logger.debug("Modified data: %s", dt_mod) + # Rest of the method implementation + logger.warning("plot_response_curves method not fully implemented yet") + pass + except Exception as e: + logger.error("Failed to generate response curves: %s", str(e)) + raise \ No newline at end of file diff --git a/python/src/robyn/visualization/media_response_visualizer.py b/python/src/robyn/visualization/media_response_visualizer.py index 42da61600..e26558c2d 100644 --- a/python/src/robyn/visualization/media_response_visualizer.py +++ b/python/src/robyn/visualization/media_response_visualizer.py @@ -1,9 +1,15 @@ +import logging import matplotlib.pyplot as plt from robyn.modeling.entities.pareto_result import ParetoResult -class MediaResponseVisualizer(): +logger = logging.getLogger(__name__) + +class MediaResponseVisualizer: def __init__(self, pareto_result: ParetoResult): + """Initialize the MediaResponseVisualizer with a ParetoResult object.""" + logger.debug("Initializing MediaResponseVisualizer with ParetoResult: %s", str(pareto_result)) self.pareto_result = pareto_result + logger.info("MediaResponseVisualizer initialized successfully") def plot_adstock(self) -> plt.Figure: """ @@ -12,9 +18,21 @@ def plot_adstock(self) -> plt.Figure: Returns: plt.Figure: The generated figure. """ - fig, ax = plt.subplots() - # Add plotting logic here - return fig + logger.debug("Starting adstock plot generation") + + try: + fig, ax = plt.subplots() + logger.debug("Created figure and axis objects for adstock plot") + + # Add plotting logic here + logger.debug("Applying plotting logic for adstock visualization") + + logger.info("Successfully generated adstock plot") + return fig + + except Exception as e: + logger.error("Failed to generate adstock plot: %s", str(e), exc_info=True) + raise def plot_saturation(self) -> plt.Figure: """ @@ -23,9 +41,21 @@ def plot_saturation(self) -> plt.Figure: Returns: plt.Figure: The generated figure. """ - fig, ax = plt.subplots() - # Add plotting logic here - return fig + logger.debug("Starting saturation plot generation") + + try: + fig, ax = plt.subplots() + logger.debug("Created figure and axis objects for saturation plot") + + # Add plotting logic here + logger.debug("Applying plotting logic for saturation visualization") + + logger.info("Successfully generated saturation plot") + return fig + + except Exception as e: + logger.error("Failed to generate saturation plot: %s", str(e), exc_info=True) + raise def plot_spend_exposure_fit(self) -> plt.Figure: """ @@ -34,7 +64,22 @@ def plot_spend_exposure_fit(self) -> plt.Figure: Returns: plt.Figure: The generated figure. """ + logger.debug("Starting spend exposure fit plot generation") - fig, ax = plt.subplots() - # Add plotting logic here - return fig \ No newline at end of file + try: + if not hasattr(self.pareto_result, 'spend_exposure_data'): + logger.warning("Spend exposure data not available in ParetoResult") + return None + + fig, ax = plt.subplots() + logger.debug("Created figure and axis objects for spend exposure fit plot") + + # Add plotting logic here + logger.debug("Applying plotting logic for spend exposure fit visualization") + + logger.info("Successfully generated spend exposure fit plot") + return fig + + except Exception as e: + logger.error("Failed to generate spend exposure fit plot: %s", str(e), exc_info=True) + raise \ No newline at end of file diff --git a/python/src/robyn/visualization/model_convergence_visualizer.py b/python/src/robyn/visualization/model_convergence_visualizer.py index a1da2bcc4..d3a188fc6 100644 --- a/python/src/robyn/visualization/model_convergence_visualizer.py +++ b/python/src/robyn/visualization/model_convergence_visualizer.py @@ -5,131 +5,168 @@ from typing import List import io import base64 +import logging from robyn.modeling.entities.modeloutputs import Trial +# Initialize logger for this module +logger = logging.getLogger(__name__) class ModelConvergenceVisualizer: def __init__(self, n_cuts: int, nrmse_win: List[float]): self.n_cuts = n_cuts self.nrmse_win = nrmse_win + logger.info("Initialized ModelConvergenceVisualizer with n_cuts=%d, nrmse_win=%s", n_cuts, nrmse_win) def create_moo_distrb_plot(self, dt_objfunc_cvg: pd.DataFrame, conv_msg: List[str]) -> str: - dt_objfunc_cvg["id"] = dt_objfunc_cvg["cuts"].astype(int) - dt_objfunc_cvg["cuts"] = pd.Categorical( - dt_objfunc_cvg["cuts"], categories=sorted(dt_objfunc_cvg["cuts"].unique(), reverse=True) - ) - - for error_type in dt_objfunc_cvg["error_type"].unique(): - mask = dt_objfunc_cvg["error_type"] == error_type - dt_objfunc_cvg.loc[mask, "value"] = np.clip( - dt_objfunc_cvg.loc[mask, "value"], *np.quantile(dt_objfunc_cvg.loc[mask, "value"], self.nrmse_win) + logger.debug("Starting moo distribution plot creation with data shape: %s", dt_objfunc_cvg.shape) + + try: + dt_objfunc_cvg["id"] = dt_objfunc_cvg["cuts"].astype(int) + dt_objfunc_cvg["cuts"] = pd.Categorical( + dt_objfunc_cvg["cuts"], categories=sorted(dt_objfunc_cvg["cuts"].unique(), reverse=True) ) - - fig, ax = plt.subplots(figsize=(12, 8)) - sns.violinplot(data=dt_objfunc_cvg, x="value", y="cuts", hue="error_type", split=True, inner="quartile", ax=ax) - ax.set_xlabel("Objective functions") - ax.set_ylabel("Iterations [#]") - ax.set_title("Objective convergence by iterations quantiles") - plt.tight_layout() - - # Add convergence messages as caption - plt.figtext(0.5, 0.01, "\n".join(conv_msg), ha="center", fontsize=8, wrap=True) - - return self._convert_plot_to_base64(fig) + + logger.debug("Processing error types: %s", dt_objfunc_cvg["error_type"].unique()) + for error_type in dt_objfunc_cvg["error_type"].unique(): + mask = dt_objfunc_cvg["error_type"] == error_type + original_values = dt_objfunc_cvg.loc[mask, "value"] + quantiles = np.quantile(original_values, self.nrmse_win) + dt_objfunc_cvg.loc[mask, "value"] = np.clip(original_values, *quantiles) + logger.debug("Clipped values for error_type %s: min=%f, max=%f", + error_type, quantiles[0], quantiles[1]) + + fig, ax = plt.subplots(figsize=(12, 8)) + sns.violinplot(data=dt_objfunc_cvg, x="value", y="cuts", + hue="error_type", split=True, inner="quartile", ax=ax) + ax.set_xlabel("Objective functions") + ax.set_ylabel("Iterations [#]") + ax.set_title("Objective convergence by iterations quantiles") + plt.tight_layout() + + plt.figtext(0.5, 0.01, "\n".join(conv_msg), ha="center", fontsize=8, wrap=True) + + logger.info("Successfully created moo distribution plot") + return self._convert_plot_to_base64(fig) + + except Exception as e: + logger.error("Failed to create moo distribution plot: %s", str(e), exc_info=True) + raise def create_moo_cloud_plot(self, df: pd.DataFrame, conv_msg: List[str], calibrated: bool) -> str: - df["nrmse"] = np.clip(df["nrmse"], *np.quantile(df["nrmse"], self.nrmse_win)) - - fig, ax = plt.subplots(figsize=(10, 8)) - scatter = ax.scatter(df["nrmse"], df["decomp.rssd"], c=df["ElapsedAccum"], cmap="viridis") - - if calibrated and "mape" in df.columns: - sizes = (df["mape"] - df["mape"].min()) / (df["mape"].max() - df["mape"].min()) - sizes = sizes * 100 + 10 # Scale sizes - ax.scatter(df["nrmse"], df["decomp.rssd"], s=sizes, alpha=0.5) - - plt.colorbar(scatter, label="Time [s]") - ax.set_xlabel("NRMSE") - ax.set_ylabel("DECOMP.RSSD") - ax.set_title("Multi-objective evolutionary performance") - - # Add convergence messages as caption - plt.figtext(0.5, 0.01, "\n".join(conv_msg), ha="center", fontsize=8, wrap=True) + logger.debug("Starting moo cloud plot creation with data shape: %s, calibrated=%s", + df.shape, calibrated) + + try: + original_nrmse = df["nrmse"] + quantiles = np.quantile(original_nrmse, self.nrmse_win) + df["nrmse"] = np.clip(original_nrmse, *quantiles) + logger.debug("Clipped NRMSE values: min=%f, max=%f", quantiles[0], quantiles[1]) + + fig, ax = plt.subplots(figsize=(10, 8)) + scatter = ax.scatter(df["nrmse"], df["decomp.rssd"], + c=df["ElapsedAccum"], cmap="viridis") + + if calibrated and "mape" in df.columns: + logger.debug("Adding calibrated MAPE visualization") + sizes = (df["mape"] - df["mape"].min()) / (df["mape"].max() - df["mape"].min()) + sizes = sizes * 100 + 10 + ax.scatter(df["nrmse"], df["decomp.rssd"], s=sizes, alpha=0.5) + + plt.colorbar(scatter, label="Time [s]") + ax.set_xlabel("NRMSE") + ax.set_ylabel("DECOMP.RSSD") + ax.set_title("Multi-objective evolutionary performance") + + plt.figtext(0.5, 0.01, "\n".join(conv_msg), ha="center", fontsize=8, wrap=True) + plt.tight_layout() + + logger.info("Successfully created moo cloud plot") + return self._convert_plot_to_base64(fig) + + except Exception as e: + logger.error("Failed to create moo cloud plot: %s", str(e), exc_info=True) + raise - plt.tight_layout() + def create_ts_validation_plot(self, trials: List[Trial]) -> str: + logger.debug("Starting time-series validation plot creation with %d trials", len(trials)) + + try: + result_hyp_param = pd.concat([trial.result_hyp_param for trial in trials], ignore_index=True) + result_hyp_param["trial"] = result_hyp_param.groupby("solID").cumcount() + 1 + result_hyp_param["iteration"] = result_hyp_param.index + 1 + + logger.debug("Processing metrics for validation plot") + result_hyp_param_long = result_hyp_param.melt( + id_vars=["solID", "trial", "train_size", "iteration"], + value_vars=["rsq_train", "rsq_val", "rsq_test", + "nrmse_train", "nrmse_val", "nrmse_test"], + var_name="metric", + value_name="value", + ) - return self._convert_plot_to_base64(fig) + result_hyp_param_long["dataset"] = result_hyp_param_long["metric"].str.split("_").str[-1] + result_hyp_param_long["metric_type"] = result_hyp_param_long["metric"].str.split("_").str[0] - @staticmethod - def _convert_plot_to_base64(fig: plt.Figure) -> str: - buffer = io.BytesIO() - fig.savefig(buffer, format="png") - buffer.seek(0) - image_png = buffer.getvalue() - buffer.close() - graphic = base64.b64encode(image_png) - return graphic.decode("utf-8") + # Winsorize the data + logger.debug("Winsorizing metric values") + result_hyp_param_long["value"] = result_hyp_param_long.groupby("metric_type")["value"].transform( + lambda x: np.clip(x, + np.percentile(x, self.nrmse_win[0] * 100), + np.percentile(x, self.nrmse_win[1] * 100)) + ) - def create_ts_validation_plot(self, trials: List[Trial]) -> str: - result_hyp_param = pd.concat([trial.result_hyp_param for trial in trials], ignore_index=True) - result_hyp_param["trial"] = result_hyp_param.groupby("solID").cumcount() + 1 - result_hyp_param["iteration"] = result_hyp_param.index + 1 # Use this instead of 'i' - - result_hyp_param_long = result_hyp_param.melt( - id_vars=["solID", "trial", "train_size", "iteration"], - value_vars=["rsq_train", "rsq_val", "rsq_test", "nrmse_train", "nrmse_val", "nrmse_test"], - var_name="metric", - value_name="value", - ) - - result_hyp_param_long["dataset"] = result_hyp_param_long["metric"].str.split("_").str[-1] - result_hyp_param_long["metric_type"] = result_hyp_param_long["metric"].str.split("_").str[0] - - # Winsorize the data - result_hyp_param_long["value"] = result_hyp_param_long.groupby("metric_type")["value"].transform( - lambda x: np.clip(x, np.percentile(x, self.nrmse_win[0] * 100), np.percentile(x, self.nrmse_win[1] * 100)) - ) - - fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), height_ratios=[3, 1]) - - # NRMSE plot - sns.scatterplot( - data=result_hyp_param_long[result_hyp_param_long["metric_type"] == "nrmse"], - x="iteration", - y="value", - hue="dataset", - style="trial", - alpha=0.5, - ax=ax1, - ) - sns.lineplot( - data=result_hyp_param_long[result_hyp_param_long["metric_type"] == "nrmse"], - x="iteration", - y="value", - hue="dataset", - ax=ax1, - ) - ax1.set_ylabel("NRMSE [Winsorized]") - ax1.set_xlabel("Iteration") - ax1.legend(title="Dataset") - - # Train Size plot - sns.scatterplot(data=result_hyp_param, x="iteration", y="train_size", hue="trial", ax=ax2, legend=False) - ax2.set_ylabel("Train Size") - ax2.set_xlabel("Iteration") - ax2.set_ylim(0, 1) - ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: "{:.0%}".format(y))) - - plt.suptitle("Time-series validation & Convergence") - plt.tight_layout() - - return self._convert_plot_to_base64(fig) + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), height_ratios=[3, 1]) + + # NRMSE plot + sns.scatterplot( + data=result_hyp_param_long[result_hyp_param_long["metric_type"] == "nrmse"], + x="iteration", + y="value", + hue="dataset", + style="trial", + alpha=0.5, + ax=ax1, + ) + sns.lineplot( + data=result_hyp_param_long[result_hyp_param_long["metric_type"] == "nrmse"], + x="iteration", + y="value", + hue="dataset", + ax=ax1, + ) + ax1.set_ylabel("NRMSE [Winsorized]") + ax1.set_xlabel("Iteration") + ax1.legend(title="Dataset") + + # Train Size plot + sns.scatterplot(data=result_hyp_param, x="iteration", y="train_size", + hue="trial", ax=ax2, legend=False) + ax2.set_ylabel("Train Size") + ax2.set_xlabel("Iteration") + ax2.set_ylim(0, 1) + ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: "{:.0%}".format(y))) + + plt.suptitle("Time-series validation & Convergence") + plt.tight_layout() + + logger.info("Successfully created time-series validation plot") + return self._convert_plot_to_base64(fig) + + except Exception as e: + logger.error("Failed to create time-series validation plot: %s", str(e), exc_info=True) + raise def _convert_plot_to_base64(self, fig: plt.Figure) -> str: - buffer = io.BytesIO() - fig.savefig(buffer, format="png") - buffer.seek(0) - image_png = buffer.getvalue() - buffer.close() - graphic = base64.b64encode(image_png) - return graphic.decode("utf-8") + logger.debug("Converting plot to base64") + try: + buffer = io.BytesIO() + fig.savefig(buffer, format="png") + buffer.seek(0) + image_png = buffer.getvalue() + buffer.close() + graphic = base64.b64encode(image_png) + logger.debug("Successfully converted plot to base64") + return graphic.decode("utf-8") + except Exception as e: + logger.error("Failed to convert plot to base64: %s", str(e), exc_info=True) + raise \ No newline at end of file diff --git a/python/src/robyn/visualization/response_visualizer.py b/python/src/robyn/visualization/response_visualizer.py index cebf9fdcd..997689505 100644 --- a/python/src/robyn/visualization/response_visualizer.py +++ b/python/src/robyn/visualization/response_visualizer.py @@ -2,12 +2,16 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import logging from robyn.data.entities.mmmdata import MMMData from robyn.modeling.entities.pareto_result import ParetoResult +logger = logging.getLogger(__name__) class ResponseVisualizer(): def __init__(self, pareto_result: ParetoResult, mmm_data: MMMData): + logger.debug("Initializing ResponseVisualizer with pareto_result=%s, mmm_data=%s", + pareto_result, mmm_data) self.pareto_result = pareto_result self.mmm_data = mmm_data @@ -18,6 +22,7 @@ def plot_response(self) -> plt.Figure: Returns: plt.Figure: The generated figure. """ + logger.info("Starting response curve plotting") pass def plot_marginal_response(self) -> plt.Figure: @@ -27,6 +32,7 @@ def plot_marginal_response(self) -> plt.Figure: Returns: plt.Figure: The generated figure. """ + logger.info("Starting marginal response curve plotting") pass def generate_response_curves(self, ax: Optional[plt.Axes] = None, trim_rate: float = 1.3) -> Optional[plt.Figure]: @@ -42,126 +48,160 @@ def generate_response_curves(self, ax: Optional[plt.Axes] = None, trim_rate: flo Returns: Optional[plt.Figure]: Generated matplotlib Figure object if ax is None, otherwise None """ - # Get plot data - plot_data = next(iter(self.pareto_result.plot_data_collect.values())) - curve_data = plot_data['plot4data']['dt_scurvePlot'].copy() - mean_data = plot_data['plot4data']['dt_scurvePlotMean'].copy() + logger.info("Generating response curves with trim_rate=%.2f", trim_rate) - # Add channel if missing in mean data - if 'channel' not in mean_data.columns: - mean_data['channel'] = mean_data['rn'] - - # Trim data if specified - if trim_rate > 0: - max_spend = mean_data['mean_spend_adstocked'].max() * trim_rate - max_response = mean_data['mean_response'].max() * trim_rate - - # Filter curve data - curve_data = curve_data[ - (curve_data['spend'] < max_spend) & - (curve_data['response'] < max_response) - ] - - # Add mean carryover information - curve_data = curve_data.merge( - mean_data[['channel', 'mean_carryover']], - on='channel', - how='left' - ) - - # Create figure if no axes provided - if ax is None: - fig, ax = plt.subplots(figsize=(12, 8)) - else: - fig = None - - # Set up colors using Set2 colormap - channels = curve_data['channel'].unique() - colors = plt.cm.Set2(np.linspace(0, 1, len(channels))) - - # Plot response curves for each channel - for idx, channel in enumerate(channels): - # Get channel data and sort by spend for smooth curve - channel_data = curve_data[curve_data['channel'] == channel].sort_values('spend') - - # Plot response curve - ax.plot(channel_data['spend'], - channel_data['response'], - color=colors[idx], - label=channel, - zorder=2) - - # Add shaded area up to mean carryover - if 'mean_carryover' in channel_data.columns: - carryover_data = channel_data[channel_data['spend'] <= channel_data['mean_carryover'].iloc[0]] - ax.fill_between(carryover_data['spend'], - carryover_data['response'], - color='grey', - alpha=0.2, - zorder=1) - - # Add mean points and labels - for idx, row in mean_data.iterrows(): - # Add point - ax.scatter(row['mean_spend_adstocked'], - row['mean_response'], - color=colors[idx], - s=100, - zorder=3) - - # Add label with abbreviated formatting - if abs(row['mean_spend_adstocked']) >= 1e9: - formatted_spend = f"{row['mean_spend_adstocked']/1e9:.1f}B" - elif abs(row['mean_spend_adstocked']) >= 1e6: - formatted_spend = f"{row['mean_spend_adstocked']/1e6:.1f}M" - elif abs(row['mean_spend_adstocked']) >= 1e3: - formatted_spend = f"{row['mean_spend_adstocked']/1e3:.1f}K" + try: + # Get plot data + logger.debug("Extracting plot data from pareto results") + plot_data = next(iter(self.pareto_result.plot_data_collect.values())) + curve_data = plot_data['plot4data']['dt_scurvePlot'].copy() + mean_data = plot_data['plot4data']['dt_scurvePlotMean'].copy() + + logger.debug("Initial curve data shape: %s", curve_data.shape) + logger.debug("Initial mean data shape: %s", mean_data.shape) + + # Add channel if missing in mean data + if 'channel' not in mean_data.columns: + logger.debug("Adding missing channel column to mean data") + mean_data['channel'] = mean_data['rn'] + + # Trim data if specified + if trim_rate > 0: + logger.debug("Trimming data with rate %.2f", trim_rate) + max_spend = mean_data['mean_spend_adstocked'].max() * trim_rate + max_response = mean_data['mean_response'].max() * trim_rate + + logger.debug("Max spend threshold: %.2f", max_spend) + logger.debug("Max response threshold: %.2f", max_response) + + # Filter curve data + original_rows = len(curve_data) + curve_data = curve_data[ + (curve_data['spend'] < max_spend) & + (curve_data['response'] < max_response) + ] + filtered_rows = len(curve_data) + + logger.info("Filtered %d rows from curve data", original_rows - filtered_rows) + + # Add mean carryover information + logger.debug("Adding mean carryover information") + curve_data = curve_data.merge( + mean_data[['channel', 'mean_carryover']], + on='channel', + how='left' + ) + + # Create figure if no axes provided + if ax is None: + logger.debug("Creating new figure with axes") + fig, ax = plt.subplots(figsize=(12, 8)) else: - formatted_spend = f"{row['mean_spend_adstocked']:.1f}" + logger.debug("Using provided axes") + fig = None + + # Set up colors using Set2 colormap + channels = curve_data['channel'].unique() + logger.debug("Processing %d unique channels: %s", len(channels), channels) + colors = plt.cm.Set2(np.linspace(0, 1, len(channels))) + + # Plot response curves for each channel + for idx, channel in enumerate(channels): + logger.debug("Plotting response curve for channel: %s", channel) + # Get channel data and sort by spend for smooth curve + channel_data = curve_data[curve_data['channel'] == channel].sort_values('spend') - ax.text(row['mean_spend_adstocked'], - row['mean_response'], - formatted_spend, - ha='left', - va='bottom', - fontsize=9, - color=colors[idx]) - - # Format axes with K/M/B notation - def format_axis_labels(x, p): - if abs(x) >= 1e9: - return f"{x/1e9:.0f}B" - elif abs(x) >= 1e6: - return f"{x/1e6:.0f}M" - elif abs(x) >= 1e3: - return f"{x/1e3:.0f}K" - return f"{x:.0f}" - - ax.xaxis.set_major_formatter(plt.FuncFormatter(format_axis_labels)) - ax.yaxis.set_major_formatter(plt.FuncFormatter(format_axis_labels)) - - # Customize plot - ax.grid(True, alpha=0.2) - ax.set_axisbelow(True) - - # Remove unnecessary spines - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - - # Set title and labels - ax.set_title('Response Curves and Mean Spends by Channel') - ax.set_xlabel('Spend (carryover + immediate)') - ax.set_ylabel('Response') - - # Add legend - ax.legend(bbox_to_anchor=(1.02, 0.5), - loc='center left', - frameon=True, - framealpha=0.8, - facecolor='white', - edgecolor='none') - - if fig: - plt.tight_layout() - return fig - return None \ No newline at end of file + # Plot response curve + ax.plot(channel_data['spend'], + channel_data['response'], + color=colors[idx], + label=channel, + zorder=2) + + # Add shaded area up to mean carryover + if 'mean_carryover' in channel_data.columns: + logger.debug("Adding carryover shading for channel: %s", channel) + carryover_data = channel_data[channel_data['spend'] <= channel_data['mean_carryover'].iloc[0]] + ax.fill_between(carryover_data['spend'], + carryover_data['response'], + color='grey', + alpha=0.2, + zorder=1) + + # Add mean points and labels + logger.debug("Adding mean points and labels") + for idx, row in mean_data.iterrows(): + # Add point + ax.scatter(row['mean_spend_adstocked'], + row['mean_response'], + color=colors[idx], + s=100, + zorder=3) + + # Add label with abbreviated formatting + spend_value = row['mean_spend_adstocked'] + if abs(spend_value) >= 1e9: + formatted_spend = f"{spend_value/1e9:.1f}B" + elif abs(spend_value) >= 1e6: + formatted_spend = f"{spend_value/1e6:.1f}M" + elif abs(spend_value) >= 1e3: + formatted_spend = f"{spend_value/1e3:.1f}K" + else: + formatted_spend = f"{spend_value:.1f}" + + ax.text(row['mean_spend_adstocked'], + row['mean_response'], + formatted_spend, + ha='left', + va='bottom', + fontsize=9, + color=colors[idx]) + + logger.debug("Formatting axis labels") + # Format axes with K/M/B notation + def format_axis_labels(x, p): + if abs(x) >= 1e9: + return f"{x/1e9:.0f}B" + elif abs(x) >= 1e6: + return f"{x/1e6:.0f}M" + elif abs(x) >= 1e3: + return f"{x/1e3:.0f}K" + return f"{x:.0f}" + + ax.xaxis.set_major_formatter(plt.FuncFormatter(format_axis_labels)) + ax.yaxis.set_major_formatter(plt.FuncFormatter(format_axis_labels)) + + # Customize plot + logger.debug("Applying final plot customizations") + ax.grid(True, alpha=0.2) + ax.set_axisbelow(True) + + # Remove unnecessary spines + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + + # Set title and labels + ax.set_title('Response Curves and Mean Spends by Channel') + ax.set_xlabel('Spend (carryover + immediate)') + ax.set_ylabel('Response') + + # Add legend + ax.legend(bbox_to_anchor=(1.02, 0.5), + loc='center left', + frameon=True, + framealpha=0.8, + facecolor='white', + edgecolor='none') + + if fig: + logger.debug("Adjusting layout") + plt.tight_layout() + logger.info("Successfully generated response curves figure") + return fig + + logger.info("Successfully added response curves to existing axes") + return None + + except Exception as e: + logger.error("Error generating response curves: %s", str(e), exc_info=True) + raise \ No newline at end of file diff --git a/python/src/robyn/visualization/robyn_visualizer.py b/python/src/robyn/visualization/robyn_visualizer.py deleted file mode 100644 index d00c1b389..000000000 --- a/python/src/robyn/visualization/robyn_visualizer.py +++ /dev/null @@ -1,52 +0,0 @@ -from typing import Dict, Any -import matplotlib.pyplot as plt -from robyn.visualization.input_visualizer import InputVisualizer -from robyn.visualization.model_convergence_visualizer import ModelVisualizer -from robyn.visualization.allocator_visualizer import AllocatorVisualizer -from robyn.visualization.response_visualizer import ResponseVisualizer - -class RobynVisualizer: - def __init__(self): - self.input_visualizer = None - self.model_visualizer = None - self.allocator_visualizer = None - self.response_visualizer = None - - def set_input_data(self, input_data: Dict[str, Any]): - self.input_visualizer = InputVisualizer(input_data) - - def set_model_data(self, model_data: Dict[str, Any]): - self.model_visualizer = ModelVisualizer(model_data) - - def set_allocator_data(self, allocator_data: Dict[str, Any]): - self.allocator_visualizer = AllocatorVisualizer(allocator_data) - - def set_response_data(self, response_data: Dict[str, Any]): - self.response_visualizer = ResponseVisualizer(response_data) - - def plot_adstock(self) -> plt.Figure: - return self.input_visualizer.plot_adstock() - - def plot_saturation(self) -> plt.Figure: - return self.input_visualizer.plot_saturation() - - def plot_moo_distribution(self) -> plt.Figure: - return self.model_visualizer.plot_moo_distribution() - - def plot_moo_cloud(self) -> plt.Figure: - return self.model_visualizer.plot_moo_cloud() - - def plot_ts_validation(self) -> plt.Figure: - return self.model_visualizer.plot_ts_validation() - - def plot_onepager(self, input_collect: Dict[str, Any], output_collect: Dict[str, Any], select_model: str) -> Dict[str, plt.Figure]: - return self.model_visualizer.plot_onepager(input_collect, output_collect, select_model) - - def plot_allocator(self) -> plt.Figure: - return self.allocator_visualizer.plot_allocator() - - def plot_response(self) -> plt.Figure: - return self.response_visualizer.plot_response() - - def plot_marginal_response(self) -> plt.Figure: - return self.response_visualizer.plot_marginal_response() diff --git a/python/src/robyn/visualization/transformation_visualizer.py b/python/src/robyn/visualization/transformation_visualizer.py index b80692584..b8167f060 100644 --- a/python/src/robyn/visualization/transformation_visualizer.py +++ b/python/src/robyn/visualization/transformation_visualizer.py @@ -1,8 +1,7 @@ # pyre-strict import matplotlib.pyplot as plt from typing import List, Tuple, Optional -from typing import Tuple - +import logging from matplotlib.ticker import PercentFormatter import numpy as np from robyn.data.entities.enums import DependentVarType @@ -11,8 +10,12 @@ from matplotlib.patches import Patch import seaborn as sns +logger = logging.getLogger(__name__) + class TransformationVisualizer: def __init__(self, pareto_result: ParetoResult, mmm_data: MMMData): + logger.debug("Initializing TransformationVisualizer with pareto_result=%s, mmm_data=%s", + pareto_result, mmm_data) self.pareto_result = pareto_result self.mmm_data = mmm_data @@ -20,13 +23,25 @@ def create_adstock_plots(self) -> None: """ Generate adstock visualization plots and store them as instance variables. """ - pass + logger.info("Starting creation of adstock plots") + try: + # Implementation placeholder + logger.debug("Adstock plots creation completed successfully") + except Exception as e: + logger.error("Failed to create adstock plots: %s", str(e)) + raise def create_saturation_plots(self) -> None: """ Generate saturation visualization plots and store them as instance variables. """ - pass + logger.info("Starting creation of saturation plots") + try: + # Implementation placeholder + logger.debug("Saturation plots creation completed successfully") + except Exception as e: + logger.error("Failed to create saturation plots: %s", str(e)) + raise def get_adstock_plots(self) -> Optional[Tuple[plt.Figure, plt.Figure]]: """ @@ -35,7 +50,14 @@ def get_adstock_plots(self) -> Optional[Tuple[plt.Figure, plt.Figure]]: Returns: Optional[Tuple[plt.Figure, plt.Figure]]: Tuple of matplotlib figures for adstock plots """ - pass + logger.debug("Retrieving adstock plots") + try: + # Implementation placeholder + logger.debug("Successfully retrieved adstock plots") + return None + except Exception as e: + logger.error("Failed to retrieve adstock plots: %s", str(e)) + raise def get_saturation_plots(self) -> Optional[Tuple[plt.Figure, plt.Figure]]: """ @@ -44,19 +66,38 @@ def get_saturation_plots(self) -> Optional[Tuple[plt.Figure, plt.Figure]]: Returns: Optional[Tuple[plt.Figure, plt.Figure]]: Tuple of matplotlib figures for saturation plots """ - pass + logger.debug("Retrieving saturation plots") + try: + # Implementation placeholder + logger.debug("Successfully retrieved saturation plots") + return None + except Exception as e: + logger.error("Failed to retrieve saturation plots: %s", str(e)) + raise def display_adstock_plots(self) -> None: """ Display the adstock plots. """ - pass + logger.info("Displaying adstock plots") + try: + # Implementation placeholder + logger.debug("Successfully displayed adstock plots") + except Exception as e: + logger.error("Failed to display adstock plots: %s", str(e)) + raise def display_saturation_plots(self) -> None: """ Display the saturation plots. """ - pass + logger.info("Displaying saturation plots") + try: + # Implementation placeholder + logger.debug("Successfully displayed saturation plots") + except Exception as e: + logger.error("Failed to display saturation plots: %s", str(e)) + raise def save_adstock_plots(self, filenames: List[str]) -> None: """ @@ -65,7 +106,13 @@ def save_adstock_plots(self, filenames: List[str]) -> None: Args: filenames (List[str]): List of filenames to save the plots """ - pass + logger.info("Saving adstock plots to files: %s", filenames) + try: + # Implementation placeholder + logger.debug("Successfully saved adstock plots") + except Exception as e: + logger.error("Failed to save adstock plots: %s", str(e)) + raise def save_saturation_plots(self, filenames: List[str]) -> None: """ @@ -74,106 +121,88 @@ def save_saturation_plots(self, filenames: List[str]) -> None: Args: filenames (List[str]): List of filenames to save the plots """ - pass - + logger.info("Saving saturation plots to files: %s", filenames) + try: + # Implementation placeholder + logger.debug("Successfully saved saturation plots") + except Exception as e: + logger.error("Failed to save saturation plots: %s", str(e)) + raise def generate_spend_effect_comparison(self, ax: Optional[plt.Axes] = None) -> Optional[plt.Figure]: """Generate comparison plot of spend share vs effect share.""" - # Get plot data - plot_data = next(iter(self.pareto_result.plot_data_collect.values())) - bar_data = plot_data['plot1data']['plotMediaShareLoopBar'].copy() - line_data = plot_data['plot1data']['plotMediaShareLoopLine'].copy() - - # Extract scalar value from ySecScale DataFrame - y_sec_scale = float(plot_data['plot1data']['ySecScale'].iloc[0]) - - # Transform variable names - bar_data['variable'] = bar_data['variable'].str.replace('_', ' ').str.title() - - # Create figure if no axes provided - if ax is None: - fig, ax = plt.subplots(figsize=(12, 8)) - else: - fig = None - - # Set background color - ax.set_facecolor('white') - - # Set up colors - type_colour = '#03396C' # Dark blue for line - bar_colors = ['#A4C2F4', '#FFB7B2'] # Light blue and light coral for bars - - # Set up dimensions - channels = line_data['rn'].unique() # Use line_data for consistent ordering - y_pos = np.arange(len(channels)) - - # Plot bars for each variable type - bar_width = 0.35 - for i, (var, color) in enumerate(zip(bar_data['variable'].unique(), bar_colors)): - var_data = bar_data[bar_data['variable'] == var] - # Ensure alignment with channels - values = [var_data[var_data['rn'] == ch]['value'].iloc[0] for ch in channels] - bars = ax.barh(y=[y + (i-0.5)*bar_width for y in y_pos], - width=values, - height=bar_width, - label=var, - color=color, - alpha=0.5) - - # Convert line values to numpy array with correct dimensions - line_values = np.array([line_data[line_data['rn'] == ch]['value'].iloc[0] for ch in channels]) - line_x = line_values / y_sec_scale - - # Plot line - ax.plot(line_x, y_pos, - color=type_colour, - marker='o', - markersize=8, - zorder=3) - - # Add line value labels - for i, value in enumerate(line_values): - ax.text(line_x[i], y_pos[i], - f"{value:.2f}", - color=type_colour, - fontweight='bold', - ha='left', - va='center', - zorder=4) - - # Set channel labels - ax.set_yticks(y_pos) - ax.set_yticklabels(channels) - - # Format x-axis as percentage - ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x*100:.0f}%')) - ax.set_xlim(0, max(1, np.max(line_x) * 1.2)) - - # Add grid - ax.grid(True, axis='x', alpha=0.2, linestyle='-') - ax.set_axisbelow(True) - - # Remove unnecessary spines - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - - # Set title - metric_type = "ROI" if (self.mmm_data and - hasattr(self.mmm_data.mmmdata_spec, 'dep_var_type') and - self.mmm_data.mmmdata_spec.dep_var_type == DependentVarType.REVENUE) else "CPA" - ax.set_title(f'Total Spend% VS Effect% with total {metric_type}') - - # Add legend - ax.legend(bbox_to_anchor=(0, 1.02, 1, 0.2), - loc="lower left", - mode="expand", - ncol=2) - - # Add axis labels - ax.set_xlabel('Total Share by Channel') - ax.set_ylabel(None) - - if fig: - plt.tight_layout() - return fig - return None \ No newline at end of file + logger.info("Starting generation of spend effect comparison plot") + try: + # Get plot data + logger.debug("Extracting plot data from pareto result") + plot_data = next(iter(self.pareto_result.plot_data_collect.values())) + bar_data = plot_data['plot1data']['plotMediaShareLoopBar'].copy() + line_data = plot_data['plot1data']['plotMediaShareLoopLine'].copy() + + logger.debug("Processing plot data - bar_data shape: %s, line_data shape: %s", + bar_data.shape, line_data.shape) + + # Extract scalar value from ySecScale DataFrame + y_sec_scale = float(plot_data['plot1data']['ySecScale'].iloc[0]) + logger.debug("Y-scale factor: %f", y_sec_scale) + + # Transform variable names + bar_data['variable'] = bar_data['variable'].str.replace('_', ' ').str.title() + + # Create figure if no axes provided + if ax is None: + logger.debug("Creating new figure and axes") + fig, ax = plt.subplots(figsize=(12, 8)) + else: + logger.debug("Using provided axes for plotting") + fig = None + + # Plot setup and data processing + channels = line_data['rn'].unique() + y_pos = np.arange(len(channels)) + logger.debug("Processing %d channels for visualization", len(channels)) + + # Plot bars + bar_width = 0.35 + bar_colors = ['#A4C2F4', '#FFB7B2'] + for i, (var, color) in enumerate(zip(bar_data['variable'].unique(), bar_colors)): + var_data = bar_data[bar_data['variable'] == var] + values = [var_data[var_data['rn'] == ch]['value'].iloc[0] for ch in channels] + logger.debug("Plotting bars for variable '%s' with %d values", var, len(values)) + ax.barh(y=[y + (i-0.5)*bar_width for y in y_pos], + width=values, + height=bar_width, + label=var, + color=color, + alpha=0.5) + + # Plot line + line_values = np.array([line_data[line_data['rn'] == ch]['value'].iloc[0] for ch in channels]) + line_x = line_values / y_sec_scale + logger.debug("Plotting line with %d points", len(line_x)) + + ax.plot(line_x, y_pos, + color='#03396C', + marker='o', + markersize=8, + zorder=3) + + # Finalize plot formatting + metric_type = "ROI" if (self.mmm_data and + hasattr(self.mmm_data.mmmdata_spec, 'dep_var_type') and + self.mmm_data.mmmdata_spec.dep_var_type == DependentVarType.REVENUE) else "CPA" + logger.debug("Setting plot title with metric type: %s", metric_type) + + ax.set_title(f'Total Spend% VS Effect% with total {metric_type}') + ax.set_xlabel('Total Share by Channel') + ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x*100:.0f}%')) + + logger.info("Successfully generated spend effect comparison plot") + if fig: + plt.tight_layout() + return fig + return None + + except Exception as e: + logger.error("Failed to generate spend effect comparison plot: %s", str(e)) + raise \ No newline at end of file