Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Car Price Prediction #123

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions App.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,44 @@
st.write(
"- **Parkinson's Disease Detector**: Assess your risk of Parkinson's Disease with advanced machine learning algorithms."
)
st.write(

"- **Car Price Predictor**: Predict the market price of a car based on features such as make, model, year, mileage, and condition."

)



# Car Price Predictor Section

with st.expander("Car Price Predictor - More Information"):
st.subheader("Introduction")
st.write(
"""
The Car Price Predictor estimates the market value of a vehicle based on various input features, including make, model, year, mileage, and condition. This tool leverages machine learning algorithms to provide accurate pricing estimates tailored to the current automotive market.
"""
)
# Dataset section
st.subheader("Car Price Dataset")
st.write(
"""
The dataset used for this model contains historical sales data for vehicles, including various features that influence car prices. The goal is to predict the selling price based on characteristics that are commonly evaluated in the market.
"""
)
# Input features section
st.subheader("Additional Variable Information")
st.write(
"""
- **Make**: The brand of the car (e.g., Toyota, Ford).
- **Model**: The specific model of the car (e.g., Camry, Mustang).
- **Year**: The manufacturing year of the vehicle.
- **Mileage**: The total distance driven by the car, usually measured in miles.
- **Condition**: The state of the vehicle (e.g., new, used, certified pre-owned).
- **Engine Size**: The size of the car's engine, often measured in liters.
- **Fuel Type**: The type of fuel the car uses (e.g., gasoline, diesel, electric).
- **Transmission**: The type of transmission (e.g., automatic, manual).
"""
)
# Parkinson's Disease Detector Section
with st.expander("Parkinson's Disease Detector - More Information"):
st.subheader("Introduction")
Expand Down
66 changes: 66 additions & 0 deletions form_configs/car_price_prediction.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"Car Price Detection Form": {
"Car Age (in years)": {
"field_name": "car_age",
"type": "number",
"min_value": 0,
"max_value": 20,
"default_value": 5,
"step": 1
},
"Mileage (in kilometers)": {
"field_name": "mileage",
"type": "number",
"min_value": 0,
"max_value": 300000,
"default_value": 50000,
"step": 1000
},
"Engine Size (in liters)": {
"field_name": "engine_size",
"type": "float",
"min_value": 0.5,
"max_value": 5.0,
"default_value": 1.5,
"step": 0.1
},
"Fuel Type": {
"field_name": "fuel_type",
"type": "dropdown",
"options": ["Petrol", "Diesel", "Electric", "Hybrid"],
"default_value": "Petrol"
},
"Transmission Type": {
"field_name": "transmission",
"type": "dropdown",
"options": ["Automatic", "Manual"],
"default_value": "Automatic"
},
"Number of Previous Owners": {
"field_name": "previous_owners",
"type": "number",
"min_value": 0,
"max_value": 5,
"default_value": 1,
"step": 1
},
"Car Brand": {
"field_name": "car_brand",
"type": "dropdown",
"options": ["Toyota", "Honda", "Ford", "BMW", "Mercedes"],
"default_value": "Toyota"
},
"Condition of the Car": {
"field_name": "condition",
"type": "dropdown",
"options": ["New", "Like New", "Used", "Certified Pre-Owned"],
"default_value": "Used"
},
"Color": {
"field_name": "color",
"type": "dropdown",
"options": ["Red", "Blue", "Black", "White", "Silver", "Gray"],
"default_value": "Black"
}
}
}
8,129 changes: 8,129 additions & 0 deletions models/car_price/data/car_data.csv

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions models/car_price/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# model.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.preprocessing import LabelEncoder

def load_and_preprocess_data(file_path):
# Load dataset from the provided CSV file
df = pd.read_csv(file_path)

# Selecting relevant columns for prediction
relevant_columns = ['year', 'selling_price', 'km_driven', 'fuel', 'seller_type', 'transmission']
df = df[relevant_columns]

# Convert categorical features to numerical using LabelEncoder
le = LabelEncoder()
df['fuel'] = le.fit_transform(df['fuel'])
df['seller_type'] = le.fit_transform(df['seller_type'])
df['transmission'] = le.fit_transform(df['transmission'])

# Define features and target variable
X = df[['year', 'km_driven', 'fuel', 'seller_type', 'transmission']]
y = df['selling_price']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

return X_train, X_test, y_train, y_test

def train_linear_regression(X_train, y_train):
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
return lr_model

def train_lasso_regression(X_train, y_train):
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
return lasso_model
34 changes: 34 additions & 0 deletions models/car_price/modelEvaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# modelEvaluation.py
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

def evaluate_model(model, X_test, y_test):
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')
return predictions

def plot_predictions(y_test, predictions, model_name):
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=predictions, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r') # Diagonal line
plt.title(f'{model_name} Predictions vs Actual Selling Prices')
plt.xlabel('Actual Selling Prices')
plt.ylabel('Predicted Selling Prices')
plt.xlim(y_test.min() - 1000, y_test.max() + 1000)
plt.ylim(y_test.min() - 1000, y_test.max() + 1000)
plt.grid()
plt.show()

def plot_feature_importance(model, feature_names):
importance = model.coef_ if hasattr(model, 'coef_') else None
if importance is not None:
plt.figure(figsize=(10, 6))
sns.barplot(x=importance, y=feature_names)
plt.title('Feature Importance')
plt.xlabel('Coefficient Value')
plt.ylabel('Features')
plt.show()
29 changes: 29 additions & 0 deletions models/car_price/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# predict.py
from model import load_and_preprocess_data, train_linear_regression, train_lasso_regression
from modelEvaluation import evaluate_model, plot_predictions, plot_feature_importance

def main():
# Path to the CSV file
file_path = 'data//car_data.csv' # Ensure this CSV file is in the same directory

# Load and preprocess data
X_train, X_test, y_train, y_test = load_and_preprocess_data(file_path)

# Train models
lr_model = train_linear_regression(X_train, y_train)
lasso_model = train_lasso_regression(X_train, y_train)

# Evaluate models
print("Evaluating Linear Regression:")
lr_predictions = evaluate_model(lr_model, X_test, y_test)
plot_predictions(y_test, lr_predictions, "Linear Regression")

print("\nEvaluating Lasso Regression:")
lasso_predictions = evaluate_model(lasso_model, X_test, y_test)
plot_predictions(y_test, lasso_predictions, "Lasso Regression")

# Plot feature importance for Lasso Regression
plot_feature_importance(lasso_model, X_train.columns)

if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions pages/Car_price_Predictor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from page_handler import PageHandler

page_handler = PageHandler("pages/pages.json")
page_handler.render_page("Car Price Predictor")
25 changes: 24 additions & 1 deletion pages/pages.json
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,28 @@
"type": "model_details"
}
]
}

},
"Car Price Predictor": {
"title": "Car Price Predictor",
"page_title": "Car Price Predictor",
"page_icon": "\ud83d\ude97",
"model_predict_file_path": "models/car_price/predict.py",
"model_function": "get_prediction",
"model_detail_function": "model_details",
"form_config_path": "form_configs/car_price.json",
"tabs": [
{
"name": "Car Price Estimator",
"type": "form",
"form_name": "Car Price Form"
},
{
"name": "Model Details",
"type": "model_details",
"problem_statement": "This model predicts car prices based on various features such as brand, model, year, mileage, and condition.",
"description": "Using a regression algorithm, this model analyzes historical sales data to provide accurate price estimations for cars based on their characteristics."
}
]
}
}