From 4baf2b3404f45db7f9238a929e6182c1d504c298 Mon Sep 17 00:00:00 2001
From: Simran Shaikh <simmoshaikh20.2003@gmail.com>
Date: Fri, 8 Nov 2024 22:46:12 +0530
Subject: [PATCH] added app.py file for #209

---
 pages/bitcoin_price_prediction_lstm.py | 213 +++++++++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 pages/bitcoin_price_prediction_lstm.py

diff --git a/pages/bitcoin_price_prediction_lstm.py b/pages/bitcoin_price_prediction_lstm.py
new file mode 100644
index 00000000..a0fef6c4
--- /dev/null
+++ b/pages/bitcoin_price_prediction_lstm.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+"""Bitcoin Price Prediction LSTM.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/1kinADIkfmyvxsBWbJpmNRlJSqLCH1MgE
+
+# Implementation of LSTM on Bitcoin Dataset
+"""
+
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import MinMaxScaler
+from keras.models import Sequential
+from keras.layers import Dense, LSTM, Dropout
+from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error,r2_score
+import warnings
+import seaborn as sns
+import tensorflow as tf
+
+warnings.filterwarnings("ignore")
+
+# Load the data
+BTC = pd.read_csv("BTC-USD.csv")
+
+print(BTC.columns)
+
+# Convert the 'Date' column to datetime and set it as the index
+BTC['Date'] = pd.to_datetime(BTC['Date'])
+BTC.set_index('Date', inplace=True)
+
+BTC.head()
+
+#Checking null value
+print(BTC.isnull().sum())
+
+# Plot the closing prices
+plt.figure(figsize=(10, 6))
+plt.plot(BTC['Close'], label='BTC Close')
+plt.title('BTC Closing Prices')
+plt.xlabel('Date')
+plt.ylabel('Closing Price')
+plt.legend()
+plt.show()
+
+# Time Series Scatter Plot
+plt.figure(figsize=(10, 6))
+sns.scatterplot(x=BTC.index, y=nifty_50_df['Close'])
+plt.title('Time Series Scatter Plot of BTC Closing Prices')
+plt.xlabel('Date')
+plt.ylabel('Closing Price')
+plt.show()
+
+# Prepare the data for modeling
+data = BTC[['Close']].values
+scaler = MinMaxScaler(feature_range=(0, 1))
+scaled_data = scaler.fit_transform(data)
+
+# Split the data into training and testing sets
+train_size = int(len(scaled_data) * 0.7)
+train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]
+
+# Create a function to create datasets for training and testing
+def create_dataset(dataset, time_step):
+    X, Y = [], []
+    for i in range(len(dataset) - time_step - 1):
+        a = dataset[i:(i + time_step), 0]
+        X.append(a)
+        Y.append(dataset[i + time_step, 0])
+    return np.array(X), np.array(Y)
+
+# Create the training and testing datasets
+time_step = 50
+X_train, y_train = create_dataset(train_data, time_step)
+X_test, y_test = create_dataset(test_data, time_step)
+
+# Reshape the data for GRU layers
+X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
+X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
+
+# Create the GRU model
+model = Sequential()
+model.add(LSTM(200, return_sequences=True, input_shape=(time_step, 1)))
+model.add(Dropout(0.4))
+model.add(LSTM(160, return_sequences=False))
+model.add(Dense(50))
+model.add(Dense(1))
+
+# Compile the model
+model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])
+
+# Train the model
+history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test))
+
+# Make predictions
+train_predict = model.predict(X_train)
+test_predict = model.predict(X_test)
+
+# Inverse transform the predictions
+train_predict = scaler.inverse_transform(train_predict)
+test_predict = scaler.inverse_transform(test_predict)
+
+# Inverse transform the original values
+original_y_train = scaler.inverse_transform([y_train])
+original_y_test = scaler.inverse_transform([y_test])
+
+# Create plots for the predicted values
+train_predict_plot = np.empty_like(scaled_data)
+train_predict_plot[:, :] = np.nan
+train_predict_plot[time_step:len(train_predict) + time_step, :] = train_predict
+
+test_predict_plot = np.empty_like(scaled_data)
+test_predict_plot[:, :] = np.nan
+test_predict_plot[len(train_predict) + (time_step * 2) + 1:len(scaled_data) - 1, :] = test_predict
+
+plt.figure(figsize=(10, 6))
+plt.plot(scaler.inverse_transform(scaled_data), label='Actual')
+plt.plot(train_predict_plot, label='Train Predict')
+plt.plot(test_predict_plot, label='Test Predict')
+plt.title('Actual vs Predicted Values')
+plt.xlabel('Date')
+plt.ylabel('Closing Price')
+plt.legend()
+plt.show()
+
+# Plot the model loss and MAPE over epochs
+plt.figure(figsize=(12, 6))
+
+# Plot Loss
+plt.subplot(1, 2, 1)
+plt.plot(history.history['loss'], label='Training Loss')
+plt.plot(history.history['val_loss'], label='Validation Loss')
+plt.title('Model Loss (MSE) Over Epochs')
+plt.xlabel('Epochs')
+plt.ylabel('Loss (MSE)')
+plt.legend()
+
+# Plot MAPE
+plt.subplot(1, 2, 2)
+plt.plot(history.history['mean_absolute_percentage_error'], label='Training MAPE')
+plt.plot(history.history['val_mean_absolute_percentage_error'], label='Validation MAPE')
+plt.title('Model Accuracy (MAPE) Over Epochs')
+plt.xlabel('Epochs')
+plt.ylabel('MAPE (%)')
+plt.legend()
+
+plt.tight_layout()
+plt.show()
+
+# Plot true vs predicted residuals
+plt.figure(figsize=(12, 6))
+plt.plot(scaler.inverse_transform(scaled_data), label="True")
+plt.plot(test_predict_plot, label="Test Predicted")
+plt.title("True vs Predicted BTC   Close Prices")
+plt.legend()
+plt.show()
+
+# Calculate R² score, rmse, mae, mse, mate, smate for training and testing sets
+train_r2 = r2_score(original_y_train[0], train_predict[:, 0])
+test_r2 = r2_score(original_y_test[0], test_predict[:, 0])
+
+train_rmse = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0]))
+test_rmse = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0]))
+
+train_mae = mean_absolute_error(original_y_train[0], train_predict[:, 0])
+test_mae = mean_absolute_error(original_y_test[0], test_predict[:, 0])
+
+train_mse = mean_squared_error(original_y_train[0], train_predict[:, 0])
+test_mse = mean_squared_error(original_y_test[0], test_predict[:, 0])
+
+train_mate = median_absolute_error(original_y_train[0], train_predict[:, 0])
+test_mate = median_absolute_error(original_y_test[0], test_predict[:, 0])
+
+train_smate = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0])) / np.mean(original_y_train)
+test_smate = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0])) / np.mean(original_y_test)
+
+print(f'Train R²: {train_r2}')
+print(f'Test R²: {test_r2}')
+
+print("Training RMSE: ", train_rmse)
+print("Testing RMSE: ", test_rmse)
+
+print("Training MAE: ", train_mae)
+print("Testing MAE: ", test_mae)
+
+print("Training MSE: ", train_mse)
+print("Testing MSE: ", test_mse)
+
+print("Training MATE: ", train_mate)
+print("Testing MATE: ", test_mate)
+
+print("Training SMATE: ", train_smate)
+print("Testing SMATE: ", test_smate)
+
+from tabulate import tabulate
+import numpy as np
+# Create a table
+table = [
+    ["Metric", "Training", "Testing"],
+    ["R² Score", f"{train_r2:.4f}", f"{test_r2:.4f}"],
+    ["RMSE", f"{train_rmse:.4f}", f"{test_rmse:.4f}"],
+    ["MSE", f"{train_mse:.4f}", f"{test_mse:.4f}"],
+    ["MAE", f"{train_mae:.4f}", f"{test_mae:.4f}"],
+    ["MATE", f"{train_mate:.4f}", f"{test_mate:.4f}"],
+    ["SMATE", f"{train_smate:.4f}", f"{test_smate:.4f}"]
+]
+
+print(tabulate(table, headers="firstrow", tablefmt="grid"))
+
+
+