From 4baf2b3404f45db7f9238a929e6182c1d504c298 Mon Sep 17 00:00:00 2001 From: Simran Shaikh Date: Fri, 8 Nov 2024 22:46:12 +0530 Subject: [PATCH] added app.py file for #209 --- pages/bitcoin_price_prediction_lstm.py | 213 +++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 pages/bitcoin_price_prediction_lstm.py diff --git a/pages/bitcoin_price_prediction_lstm.py b/pages/bitcoin_price_prediction_lstm.py new file mode 100644 index 00000000..a0fef6c4 --- /dev/null +++ b/pages/bitcoin_price_prediction_lstm.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- +"""Bitcoin Price Prediction LSTM.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1kinADIkfmyvxsBWbJpmNRlJSqLCH1MgE + +# Implementation of LSTM on Bitcoin Dataset +""" + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.preprocessing import MinMaxScaler +from keras.models import Sequential +from keras.layers import Dense, LSTM, Dropout +from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error,r2_score +import warnings +import seaborn as sns +import tensorflow as tf + +warnings.filterwarnings("ignore") + +# Load the data +BTC = pd.read_csv("BTC-USD.csv") + +print(BTC.columns) + +# Convert the 'Date' column to datetime and set it as the index +BTC['Date'] = pd.to_datetime(BTC['Date']) +BTC.set_index('Date', inplace=True) + +BTC.head() + +#Checking null value +print(BTC.isnull().sum()) + +# Plot the closing prices +plt.figure(figsize=(10, 6)) +plt.plot(BTC['Close'], label='BTC Close') +plt.title('BTC Closing Prices') +plt.xlabel('Date') +plt.ylabel('Closing Price') +plt.legend() +plt.show() + +# Time Series Scatter Plot +plt.figure(figsize=(10, 6)) +sns.scatterplot(x=BTC.index, y=nifty_50_df['Close']) +plt.title('Time Series Scatter Plot of BTC Closing Prices') +plt.xlabel('Date') +plt.ylabel('Closing Price') +plt.show() + +# Prepare the data for modeling +data = BTC[['Close']].values +scaler = MinMaxScaler(feature_range=(0, 1)) +scaled_data = scaler.fit_transform(data) + +# Split the data into training and testing sets +train_size = int(len(scaled_data) * 0.7) +train_data, test_data = scaled_data[:train_size], scaled_data[train_size:] + +# Create a function to create datasets for training and testing +def create_dataset(dataset, time_step): + X, Y = [], [] + for i in range(len(dataset) - time_step - 1): + a = dataset[i:(i + time_step), 0] + X.append(a) + Y.append(dataset[i + time_step, 0]) + return np.array(X), np.array(Y) + +# Create the training and testing datasets +time_step = 50 +X_train, y_train = create_dataset(train_data, time_step) +X_test, y_test = create_dataset(test_data, time_step) + +# Reshape the data for GRU layers +X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) +X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1) + +# Create the GRU model +model = Sequential() +model.add(LSTM(200, return_sequences=True, input_shape=(time_step, 1))) +model.add(Dropout(0.4)) +model.add(LSTM(160, return_sequences=False)) +model.add(Dense(50)) +model.add(Dense(1)) + +# Compile the model +model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.MeanAbsolutePercentageError()]) + +# Train the model +history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test)) + +# Make predictions +train_predict = model.predict(X_train) +test_predict = model.predict(X_test) + +# Inverse transform the predictions +train_predict = scaler.inverse_transform(train_predict) +test_predict = scaler.inverse_transform(test_predict) + +# Inverse transform the original values +original_y_train = scaler.inverse_transform([y_train]) +original_y_test = scaler.inverse_transform([y_test]) + +# Create plots for the predicted values +train_predict_plot = np.empty_like(scaled_data) +train_predict_plot[:, :] = np.nan +train_predict_plot[time_step:len(train_predict) + time_step, :] = train_predict + +test_predict_plot = np.empty_like(scaled_data) +test_predict_plot[:, :] = np.nan +test_predict_plot[len(train_predict) + (time_step * 2) + 1:len(scaled_data) - 1, :] = test_predict + +plt.figure(figsize=(10, 6)) +plt.plot(scaler.inverse_transform(scaled_data), label='Actual') +plt.plot(train_predict_plot, label='Train Predict') +plt.plot(test_predict_plot, label='Test Predict') +plt.title('Actual vs Predicted Values') +plt.xlabel('Date') +plt.ylabel('Closing Price') +plt.legend() +plt.show() + +# Plot the model loss and MAPE over epochs +plt.figure(figsize=(12, 6)) + +# Plot Loss +plt.subplot(1, 2, 1) +plt.plot(history.history['loss'], label='Training Loss') +plt.plot(history.history['val_loss'], label='Validation Loss') +plt.title('Model Loss (MSE) Over Epochs') +plt.xlabel('Epochs') +plt.ylabel('Loss (MSE)') +plt.legend() + +# Plot MAPE +plt.subplot(1, 2, 2) +plt.plot(history.history['mean_absolute_percentage_error'], label='Training MAPE') +plt.plot(history.history['val_mean_absolute_percentage_error'], label='Validation MAPE') +plt.title('Model Accuracy (MAPE) Over Epochs') +plt.xlabel('Epochs') +plt.ylabel('MAPE (%)') +plt.legend() + +plt.tight_layout() +plt.show() + +# Plot true vs predicted residuals +plt.figure(figsize=(12, 6)) +plt.plot(scaler.inverse_transform(scaled_data), label="True") +plt.plot(test_predict_plot, label="Test Predicted") +plt.title("True vs Predicted BTC Close Prices") +plt.legend() +plt.show() + +# Calculate R² score, rmse, mae, mse, mate, smate for training and testing sets +train_r2 = r2_score(original_y_train[0], train_predict[:, 0]) +test_r2 = r2_score(original_y_test[0], test_predict[:, 0]) + +train_rmse = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0])) +test_rmse = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0])) + +train_mae = mean_absolute_error(original_y_train[0], train_predict[:, 0]) +test_mae = mean_absolute_error(original_y_test[0], test_predict[:, 0]) + +train_mse = mean_squared_error(original_y_train[0], train_predict[:, 0]) +test_mse = mean_squared_error(original_y_test[0], test_predict[:, 0]) + +train_mate = median_absolute_error(original_y_train[0], train_predict[:, 0]) +test_mate = median_absolute_error(original_y_test[0], test_predict[:, 0]) + +train_smate = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0])) / np.mean(original_y_train) +test_smate = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0])) / np.mean(original_y_test) + +print(f'Train R²: {train_r2}') +print(f'Test R²: {test_r2}') + +print("Training RMSE: ", train_rmse) +print("Testing RMSE: ", test_rmse) + +print("Training MAE: ", train_mae) +print("Testing MAE: ", test_mae) + +print("Training MSE: ", train_mse) +print("Testing MSE: ", test_mse) + +print("Training MATE: ", train_mate) +print("Testing MATE: ", test_mate) + +print("Training SMATE: ", train_smate) +print("Testing SMATE: ", test_smate) + +from tabulate import tabulate +import numpy as np +# Create a table +table = [ + ["Metric", "Training", "Testing"], + ["R² Score", f"{train_r2:.4f}", f"{test_r2:.4f}"], + ["RMSE", f"{train_rmse:.4f}", f"{test_rmse:.4f}"], + ["MSE", f"{train_mse:.4f}", f"{test_mse:.4f}"], + ["MAE", f"{train_mae:.4f}", f"{test_mae:.4f}"], + ["MATE", f"{train_mate:.4f}", f"{test_mate:.4f}"], + ["SMATE", f"{train_smate:.4f}", f"{test_smate:.4f}"] +] + +print(tabulate(table, headers="firstrow", tablefmt="grid")) + + +