yashasvini121 · yashasvini121 · Oct 30, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 30, 2024
diff --git a/App.py b/App.py
@@ -176,6 +176,22 @@
             Ideal for travel, business meetings, and language learning, breaking down language barriers effortlessly.
         """
     },
+    {
+        "name": "Business Performance Forecaster",
+        "description": "Forecast business profits based on various investment areas for better financial planning and budget allocation.",
+        "details": """
+            ### Overview  
+            The Business Performance Forecaster predicts company profit based on investment in R&D, administration, and marketing, using machine learning to analyze investment patterns and optimize budget allocation.
+
+            ### Key Features  
+            - **Profit Prediction**: Provides an estimated profit based on investment data.
+            - **Investment Analysis**: Evaluates how different spending areas impact overall profit.
+            - **Multi-Input Support**: Accounts for multiple variables like R&D, administration, and marketing expenses.
+
+            ### Use Cases  
+            Useful for companies looking to plan budgets, assess the impact of investments, and improve decision-making processes in financial forecasting.
+        """
+    }
 ]
 
 # Define shades of blue for calculators

diff --git a/form_configs/business_performance_forecasting.json b/form_configs/business_performance_forecasting.json
@@ -0,0 +1,31 @@
+{
+  "Business Forecast Form": {
+    "R&D Spend": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 100000.0,
+      "step": 1000.0,
+      "field_name": "RnD_Spend"
+    },
+    "Administration": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 50000.0,
+      "step": 1000.0,
+      "field_name": "Administration"
+    },
+    "Marketing Spend": {
+      "type": "number",
+      "min_value": 0.0,
+      "default_value": 100000.0,
+      "step": 1000.0,
+      "field_name": "Marketing_Spend"
+    },
+    "State": {
+      "type": "dropdown",
+      "options": ["New York", "California", "Florida"],
+      "default_value": "New York",
+      "field_name": "State"
+    }
+  }
+}
diff --git a/models/business_performance_forecasting/data/50_Startups.csv b/models/business_performance_forecasting/data/50_Startups.csv
@@ -0,0 +1,51 @@
+R&D Spend,Administration,Marketing Spend,State,Profit
+165349.2,136897.8,471784.1,New York,192261.83
+162597.7,151377.59,443898.53,California,191792.06
+153441.51,101145.55,407934.54,Florida,191050.39
+144372.41,118671.85,383199.62,New York,182901.99
+142107.34,91391.77,366168.42,Florida,166187.94
+131876.9,99814.71,362861.36,New York,156991.12
+134615.46,147198.87,127716.82,California,156122.51
+130298.13,145530.06,323876.68,Florida,155752.6
+120542.52,148718.95,311613.29,New York,152211.77
+123334.88,108679.17,304981.62,California,149759.96
+101913.08,110594.11,229160.95,Florida,146121.95
+100671.96,91790.61,249744.55,California,144259.4
+93863.75,127320.38,249839.44,Florida,141585.52
+91992.39,135495.07,252664.93,California,134307.35
+119943.24,156547.42,256512.92,Florida,132602.65
+114523.61,122616.84,261776.23,New York,129917.04
+78013.11,121597.55,264346.06,California,126992.93
+94657.16,145077.58,282574.31,New York,125370.37
+91749.16,114175.79,294919.57,Florida,124266.9
+86419.7,153514.11,0,New York,122776.86
+76253.86,113867.3,298664.47,California,118474.03
+78389.47,153773.43,299737.29,New York,111313.02
+73994.56,122782.75,303319.26,Florida,110352.25
+67532.53,105751.03,304768.73,Florida,108733.99
+77044.01,99281.34,140574.81,New York,108552.04
+64664.71,139553.16,137962.62,California,107404.34
+75328.87,144135.98,134050.07,Florida,105733.54
+72107.6,127864.55,353183.81,New York,105008.31
+66051.52,182645.56,118148.2,Florida,103282.38
+65605.48,153032.06,107138.38,New York,101004.64
+61994.48,115641.28,91131.24,Florida,99937.59
+61136.38,152701.92,88218.23,New York,97483.56
+63408.86,129219.61,46085.25,California,97427.84
+55493.95,103057.49,214634.81,Florida,96778.92
+46426.07,157693.92,210797.67,California,96712.8
+46014.02,85047.44,205517.64,New York,96479.51
+28663.76,127056.21,201126.82,Florida,90708.19
+44069.95,51283.14,197029.42,California,89949.14
+20229.59,65947.93,185265.1,New York,81229.06
+38558.51,82982.09,174999.3,California,81005.76
+28754.33,118546.05,172795.67,California,78239.91
+27892.92,84710.77,164470.71,Florida,77798.83
+23640.93,96189.63,148001.11,California,71498.49
+15505.73,127382.3,35534.17,New York,69758.98
+22177.74,154806.14,28334.72,California,65200.33
+1000.23,124153.04,1903.93,New York,64926.08
+1315.46,115816.21,297114.46,Florida,49490.75
+0,135426.92,0,California,42559.73
+542.05,51743.15,0,New York,35673.41
+0,116983.8,45173.06,California,14681.4
diff --git a/models/business_performance_forecasting/model.py b/models/business_performance_forecasting/model.py
@@ -0,0 +1,14 @@
+import pickle
+import os
+model_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'model.pkl')
+scaler_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'scaler.pkl')
+
+
+# Load the saved model and scaler
+def load_model_and_scaler():
+    with open(model_path, 'rb') as model_file:
+        model = pickle.load(model_file)
+    with open(scaler_path, 'rb') as scaler_file:
+        scaler = pickle.load(scaler_file)
+
+    return model, scaler
diff --git a/models/business_performance_forecasting/notebooks/Business_forecasting.py b/models/business_performance_forecasting/notebooks/Business_forecasting.py
@@ -0,0 +1,79 @@
+import numpy as np 
+import pandas as pd 
+import pickle
+import os
+import matplotlib.pyplot as plt
+from sklearn.metrics import r2_score
+# Load the data
+df = pd.read_csv('50_Startups.csv')
+X = df.iloc[:, :-1].values
+y = df.iloc[:, -1].values
+
+# Preprocessing - Encoding categorical data
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
+X = np.array(ct.fit_transform(X))
+
+# Splitting the dataset
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+# Training the model
+from sklearn.linear_model import LinearRegression
+model = LinearRegression()
+model.fit(X_train, y_train)
+
+# Make predictions
+y_pred = model.predict(X_test)
+
+# Print predictions alongside actual values
+np.set_printoptions(precision=2)
+print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis=1))
+
+model_path = os.path.abspath("model.pkl")
+scaler_path = os.path.abspath("scaler.pkl")
+
+# Save the model and preprocessing objects
+with open(model_path, 'wb') as model_file:
+    pickle.dump(model, model_file)
+
+with open(scaler_path, 'wb') as scaler_file:
+    pickle.dump(ct, scaler_file)
+
+print("Model and preprocessing objects saved successfully!")
+
+def save_evaluation_to_pickle(train_X, train_Y, test_X, test_Y, output_file="evaluation_results.pkl"):
+    # Calculate R^2 score
+    train_r2 = r2_score(train_Y, model.predict(train_X))
+    test_r2 = r2_score(test_Y, y_pred)
+
+    # Create plot
+    fig, ax = plt.subplots(figsize=(10, 6))
+    ax.scatter(test_Y, y_pred, alpha=0.6, color='blue', label='Predicted')
+    ax.plot([test_Y.min(), test_Y.max()], [test_Y.min(), test_Y.max()], 'r--', label='Perfect Prediction')
+    ax.set_xlabel("Actual")
+    ax.set_ylabel("Predicted")
+    ax.set_title("Actual vs Predicted Values (Test Set)")
+    ax.legend()
+    ax.grid(True)
+
+    # Save the plot as a PNG file
+    plot_file = "actual_vs_predicted.png"
+    fig.savefig(plot_file)
+
+    # Package results
+    results = {
+        "Train_R2": train_r2,
+        "Test_R2": test_r2,
+        "plot_file": plot_file  # Save the plot file path
+    }
+
+    # Save results to a pickle file
+    with open(output_file, "wb") as f:
+        pickle.dump(results, f)
+
+    print(f"Evaluation and plot data saved to {output_file}")
+    print(f"Plot saved as {plot_file}")
+# Run this function once to generate the evaluation file
+save_evaluation_to_pickle(X_train, y_train, X_test, y_test)
diff --git a/models/business_performance_forecasting/predict.py b/models/business_performance_forecasting/predict.py
@@ -0,0 +1,40 @@
+import os
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pickle
+from models.business_performance_forecasting.model import load_model_and_scaler  # Import the function from model.py
+
+# Define the prediction function
+def get_prediction(RnD_Spend, Administration, Marketing_Spend, State):
+    # Load the model and scalers
+    model, scaler = load_model_and_scaler()
+    # Prepare input features as a NumPy array
+    input_data = np.array([[RnD_Spend, Administration, Marketing_Spend, State]])
+
+    # Apply the scaler
+    scaled_data = scaler.transform(input_data)
+    scaled_data = scaled_data.astype(float)  
+
+    # Make prediction using the loaded model
+    prediction = model.predict(scaled_data)
+
+    return prediction[0]  # Return the predicted profit
+
+
+class ModelEvaluation:
+    def __init__(self):
+        metrics_file= os.path.join(os.path.dirname(__file__), 'saved_models', 'evaluation_results.pkl')
+        # Load evaluation metrics from a pickle file
+        with open(metrics_file, "rb") as f:
+            self.metrics = pickle.load(f)
+        print("Loaded metrics:", self.metrics)
+    def evaluate(self):
+        metrics = self.metrics       
+        return metrics, None, None, None
+
+def model_details():
+    evaluator = ModelEvaluation()
+    return evaluator
+
diff --git a/models/business_performance_forecasting/saved_models/evaluation_results.pkl b/models/business_performance_forecasting/saved_models/evaluation_results.pkl
diff --git a/models/business_performance_forecasting/saved_models/model.pkl b/models/business_performance_forecasting/saved_models/model.pkl
diff --git a/models/business_performance_forecasting/saved_models/scaler.pkl b/models/business_performance_forecasting/saved_models/scaler.pkl
diff --git a/page_handler.py b/page_handler.py
@@ -85,11 +85,12 @@ def render_model_details(self, model_module,tab):
 
 			# Display the scatter plot for predicted vs actual values
 			#used clear_figure to clear the plot once displayed to avoid conflict 
-			st.subheader("Model Prediction Plot")
-			st.pyplot(prediction_plot, clear_figure=True)
-
-			st.subheader("Error Plot")
-			st.pyplot(error_plot, clear_figure=True)
-
-			st.subheader("Model Performance Plot")
-			st.pyplot(performance_plot, clear_figure=True)
+			if prediction_plot!=None:
+				st.subheader("Model Prediction Plot")
+				st.pyplot(prediction_plot, clear_figure=True)
+			if error_plot!=None:
+				st.subheader("Error Plot")
+				st.pyplot(error_plot, clear_figure=True)
+			if performance_plot!=None:
+				st.subheader("Model Performance Plot")
+				st.pyplot(performance_plot, clear_figure=True)
diff --git a/pages/Business_Performance_Forecasting.py b/pages/Business_Performance_Forecasting.py
@@ -0,0 +1,4 @@
+from page_handler import PageHandler
+
+page_handler = PageHandler("pages/pages.json")
+page_handler.render_page("Business Performance Forecasting")
diff --git a/pages/pages.json b/pages/pages.json
@@ -195,7 +195,30 @@
         "description": "This model uses a dataset containing demographic and health-related factors to predict the cost of insurance. The features include age, sex, BMI, children, smoker status, and region, with predictions made using the Random Forest algorithm for accurate results. Ensemble techniques like XGBoost will also be used to further enhance the prediction accuracy."
       }
     ]
+  },
+  "Business Performance Forecasting": {
+    "title": "Business Performance Forecasting",
+    "page_title": "Business Performance Forecasting",
+    "page_icon": "\ud83c\udf3e",
+    "model_predict_file_path": "models/business_performance_forecasting/predict.py",
+    "model_function": "get_prediction",
+    "model_detail_function": "model_details",
+    "form_config_path": "form_configs/business_performance_forecasting.json",
+    "tabs": [
+      {
+        "name": "Business Forecast Form",
+        "type": "form",
+        "form_name": "Business Forecast Form"
+      },
+      {
+        "name": "Model Details",
+        "type": "model_details",
+        "problem_statement": "The Business Performance Forecasting model predicts future profits based on R&D spend, administration costs, marketing spend, and state. By utilizing machine learning, this tool assists businesses in making informed decisions about resource allocation.",
+        "description": "This model employs a dataset with features including R&D spend, administration costs, marketing spend, and geographic location to forecast profits. The predictions are generated using regression techniques, ensuring accuracy and reliability for business strategy planning."
+      }
+    ]
   }
+
 }