Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Business performace forecasting #197

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions App.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,22 @@
Ideal for travel, business meetings, and language learning, breaking down language barriers effortlessly.
"""
},
{
"name": "Business Performance Forecaster",
"description": "Forecast business profits based on various investment areas for better financial planning and budget allocation.",
"details": """
### Overview
The Business Performance Forecaster predicts company profit based on investment in R&D, administration, and marketing, using machine learning to analyze investment patterns and optimize budget allocation.

### Key Features
- **Profit Prediction**: Provides an estimated profit based on investment data.
- **Investment Analysis**: Evaluates how different spending areas impact overall profit.
- **Multi-Input Support**: Accounts for multiple variables like R&D, administration, and marketing expenses.

### Use Cases
Useful for companies looking to plan budgets, assess the impact of investments, and improve decision-making processes in financial forecasting.
"""
}
]

# Define shades of blue for calculators
Expand Down
31 changes: 31 additions & 0 deletions form_configs/business_performance_forecasting.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"Business Forecast Form": {
"R&D Spend": {
"type": "number",
"min_value": 0.0,
"default_value": 100000.0,
"step": 1000.0,
"field_name": "RnD_Spend"
},
"Administration": {
"type": "number",
"min_value": 0.0,
"default_value": 50000.0,
"step": 1000.0,
"field_name": "Administration"
},
"Marketing Spend": {
"type": "number",
"min_value": 0.0,
"default_value": 100000.0,
"step": 1000.0,
"field_name": "Marketing_Spend"
},
"State": {
"type": "dropdown",
"options": ["New York", "California", "Florida"],
"default_value": "New York",
"field_name": "State"
}
}
}
51 changes: 51 additions & 0 deletions models/business_performance_forecasting/data/50_Startups.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
R&D Spend,Administration,Marketing Spend,State,Profit
165349.2,136897.8,471784.1,New York,192261.83
162597.7,151377.59,443898.53,California,191792.06
153441.51,101145.55,407934.54,Florida,191050.39
144372.41,118671.85,383199.62,New York,182901.99
142107.34,91391.77,366168.42,Florida,166187.94
131876.9,99814.71,362861.36,New York,156991.12
134615.46,147198.87,127716.82,California,156122.51
130298.13,145530.06,323876.68,Florida,155752.6
120542.52,148718.95,311613.29,New York,152211.77
123334.88,108679.17,304981.62,California,149759.96
101913.08,110594.11,229160.95,Florida,146121.95
100671.96,91790.61,249744.55,California,144259.4
93863.75,127320.38,249839.44,Florida,141585.52
91992.39,135495.07,252664.93,California,134307.35
119943.24,156547.42,256512.92,Florida,132602.65
114523.61,122616.84,261776.23,New York,129917.04
78013.11,121597.55,264346.06,California,126992.93
94657.16,145077.58,282574.31,New York,125370.37
91749.16,114175.79,294919.57,Florida,124266.9
86419.7,153514.11,0,New York,122776.86
76253.86,113867.3,298664.47,California,118474.03
78389.47,153773.43,299737.29,New York,111313.02
73994.56,122782.75,303319.26,Florida,110352.25
67532.53,105751.03,304768.73,Florida,108733.99
77044.01,99281.34,140574.81,New York,108552.04
64664.71,139553.16,137962.62,California,107404.34
75328.87,144135.98,134050.07,Florida,105733.54
72107.6,127864.55,353183.81,New York,105008.31
66051.52,182645.56,118148.2,Florida,103282.38
65605.48,153032.06,107138.38,New York,101004.64
61994.48,115641.28,91131.24,Florida,99937.59
61136.38,152701.92,88218.23,New York,97483.56
63408.86,129219.61,46085.25,California,97427.84
55493.95,103057.49,214634.81,Florida,96778.92
46426.07,157693.92,210797.67,California,96712.8
46014.02,85047.44,205517.64,New York,96479.51
28663.76,127056.21,201126.82,Florida,90708.19
44069.95,51283.14,197029.42,California,89949.14
20229.59,65947.93,185265.1,New York,81229.06
38558.51,82982.09,174999.3,California,81005.76
28754.33,118546.05,172795.67,California,78239.91
27892.92,84710.77,164470.71,Florida,77798.83
23640.93,96189.63,148001.11,California,71498.49
15505.73,127382.3,35534.17,New York,69758.98
22177.74,154806.14,28334.72,California,65200.33
1000.23,124153.04,1903.93,New York,64926.08
1315.46,115816.21,297114.46,Florida,49490.75
0,135426.92,0,California,42559.73
542.05,51743.15,0,New York,35673.41
0,116983.8,45173.06,California,14681.4
14 changes: 14 additions & 0 deletions models/business_performance_forecasting/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pickle
import os
model_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'model.pkl')
scaler_path = os.path.join(os.path.dirname(__file__), 'saved_models', 'scaler.pkl')


# Load the saved model and scaler
def load_model_and_scaler():
with open(model_path, 'rb') as model_file:
model = pickle.load(model_file)
with open(scaler_path, 'rb') as scaler_file:
scaler = pickle.load(scaler_file)

return model, scaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
# Load the data
df = pd.read_csv('50_Startups.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Preprocessing - Encoding categorical data
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

# Splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Training the model
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Print predictions alongside actual values
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), axis=1))

model_path = os.path.abspath("model.pkl")
scaler_path = os.path.abspath("scaler.pkl")

# Save the model and preprocessing objects
with open(model_path, 'wb') as model_file:
pickle.dump(model, model_file)

with open(scaler_path, 'wb') as scaler_file:
pickle.dump(ct, scaler_file)

print("Model and preprocessing objects saved successfully!")

def save_evaluation_to_pickle(train_X, train_Y, test_X, test_Y, output_file="evaluation_results.pkl"):
# Calculate R^2 score
train_r2 = r2_score(train_Y, model.predict(train_X))
test_r2 = r2_score(test_Y, y_pred)

# Create plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(test_Y, y_pred, alpha=0.6, color='blue', label='Predicted')
ax.plot([test_Y.min(), test_Y.max()], [test_Y.min(), test_Y.max()], 'r--', label='Perfect Prediction')
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title("Actual vs Predicted Values (Test Set)")
ax.legend()
ax.grid(True)

# Save the plot as a PNG file
plot_file = "actual_vs_predicted.png"
fig.savefig(plot_file)

# Package results
results = {
"Train_R2": train_r2,
"Test_R2": test_r2,
"plot_file": plot_file # Save the plot file path
}

# Save results to a pickle file
with open(output_file, "wb") as f:
pickle.dump(results, f)

print(f"Evaluation and plot data saved to {output_file}")
print(f"Plot saved as {plot_file}")
# Run this function once to generate the evaluation file
save_evaluation_to_pickle(X_train, y_train, X_test, y_test)
40 changes: 40 additions & 0 deletions models/business_performance_forecasting/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from models.business_performance_forecasting.model import load_model_and_scaler # Import the function from model.py

# Define the prediction function
def get_prediction(RnD_Spend, Administration, Marketing_Spend, State):
# Load the model and scalers
model, scaler = load_model_and_scaler()
# Prepare input features as a NumPy array
input_data = np.array([[RnD_Spend, Administration, Marketing_Spend, State]])

# Apply the scaler
scaled_data = scaler.transform(input_data)
scaled_data = scaled_data.astype(float)

# Make prediction using the loaded model
prediction = model.predict(scaled_data)

return prediction[0] # Return the predicted profit


class ModelEvaluation:
def __init__(self):
metrics_file= os.path.join(os.path.dirname(__file__), 'saved_models', 'evaluation_results.pkl')
# Load evaluation metrics from a pickle file
with open(metrics_file, "rb") as f:
self.metrics = pickle.load(f)
print("Loaded metrics:", self.metrics)
def evaluate(self):
metrics = self.metrics
return metrics, None, None, None

def model_details():
evaluator = ModelEvaluation()
return evaluator

Binary file not shown.
Binary file not shown.
Binary file not shown.
17 changes: 9 additions & 8 deletions page_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ def render_model_details(self, model_module,tab):

# Display the scatter plot for predicted vs actual values
#used clear_figure to clear the plot once displayed to avoid conflict
st.subheader("Model Prediction Plot")
st.pyplot(prediction_plot, clear_figure=True)

st.subheader("Error Plot")
st.pyplot(error_plot, clear_figure=True)

st.subheader("Model Performance Plot")
st.pyplot(performance_plot, clear_figure=True)
if prediction_plot!=None:
st.subheader("Model Prediction Plot")
st.pyplot(prediction_plot, clear_figure=True)
if error_plot!=None:
st.subheader("Error Plot")
st.pyplot(error_plot, clear_figure=True)
if performance_plot!=None:
st.subheader("Model Performance Plot")
st.pyplot(performance_plot, clear_figure=True)
4 changes: 4 additions & 0 deletions pages/Business_Performance_Forecasting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from page_handler import PageHandler

page_handler = PageHandler("pages/pages.json")
page_handler.render_page("Business Performance Forecasting")
23 changes: 23 additions & 0 deletions pages/pages.json
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,30 @@
"description": "This model uses a dataset containing demographic and health-related factors to predict the cost of insurance. The features include age, sex, BMI, children, smoker status, and region, with predictions made using the Random Forest algorithm for accurate results. Ensemble techniques like XGBoost will also be used to further enhance the prediction accuracy."
}
]
},
"Business Performance Forecasting": {
"title": "Business Performance Forecasting",
"page_title": "Business Performance Forecasting",
"page_icon": "\ud83c\udf3e",
"model_predict_file_path": "models/business_performance_forecasting/predict.py",
"model_function": "get_prediction",
"model_detail_function": "model_details",
"form_config_path": "form_configs/business_performance_forecasting.json",
"tabs": [
{
"name": "Business Forecast Form",
"type": "form",
"form_name": "Business Forecast Form"
},
{
"name": "Model Details",
"type": "model_details",
"problem_statement": "The Business Performance Forecasting model predicts future profits based on R&D spend, administration costs, marketing spend, and state. By utilizing machine learning, this tool assists businesses in making informed decisions about resource allocation.",
"description": "This model employs a dataset with features including R&D spend, administration costs, marketing spend, and geographic location to forecast profits. The predictions are generated using regression techniques, ensuring accuracy and reliability for business strategy planning."
}
]
}

}


Loading