Skip to content

Commit

Permalink
added app.py file for #209
Browse files Browse the repository at this point in the history
  • Loading branch information
SimranShaikh20 committed Nov 8, 2024
1 parent ebe2d4d commit 4baf2b3
Showing 1 changed file with 213 additions and 0 deletions.
213 changes: 213 additions & 0 deletions pages/bitcoin_price_prediction_lstm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
"""Bitcoin Price Prediction LSTM.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1kinADIkfmyvxsBWbJpmNRlJSqLCH1MgE
# Implementation of LSTM on Bitcoin Dataset
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error,r2_score
import warnings
import seaborn as sns
import tensorflow as tf

warnings.filterwarnings("ignore")

# Load the data
BTC = pd.read_csv("BTC-USD.csv")

print(BTC.columns)

# Convert the 'Date' column to datetime and set it as the index
BTC['Date'] = pd.to_datetime(BTC['Date'])
BTC.set_index('Date', inplace=True)

BTC.head()

#Checking null value
print(BTC.isnull().sum())

# Plot the closing prices
plt.figure(figsize=(10, 6))
plt.plot(BTC['Close'], label='BTC Close')
plt.title('BTC Closing Prices')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

# Time Series Scatter Plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x=BTC.index, y=nifty_50_df['Close'])
plt.title('Time Series Scatter Plot of BTC Closing Prices')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.show()

# Prepare the data for modeling
data = BTC[['Close']].values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Split the data into training and testing sets
train_size = int(len(scaled_data) * 0.7)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]

# Create a function to create datasets for training and testing
def create_dataset(dataset, time_step):
X, Y = [], []
for i in range(len(dataset) - time_step - 1):
a = dataset[i:(i + time_step), 0]
X.append(a)
Y.append(dataset[i + time_step, 0])
return np.array(X), np.array(Y)

# Create the training and testing datasets
time_step = 50
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

# Reshape the data for GRU layers
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Create the GRU model
model = Sequential()
model.add(LSTM(200, return_sequences=True, input_shape=(time_step, 1)))
model.add(Dropout(0.4))
model.add(LSTM(160, return_sequences=False))
model.add(Dense(50))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])

# Train the model
history = model.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_test, y_test))

# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform the predictions
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

# Inverse transform the original values
original_y_train = scaler.inverse_transform([y_train])
original_y_test = scaler.inverse_transform([y_test])

# Create plots for the predicted values
train_predict_plot = np.empty_like(scaled_data)
train_predict_plot[:, :] = np.nan
train_predict_plot[time_step:len(train_predict) + time_step, :] = train_predict

test_predict_plot = np.empty_like(scaled_data)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict) + (time_step * 2) + 1:len(scaled_data) - 1, :] = test_predict

plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(scaled_data), label='Actual')
plt.plot(train_predict_plot, label='Train Predict')
plt.plot(test_predict_plot, label='Test Predict')
plt.title('Actual vs Predicted Values')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

# Plot the model loss and MAPE over epochs
plt.figure(figsize=(12, 6))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss (MSE) Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()

# Plot MAPE
plt.subplot(1, 2, 2)
plt.plot(history.history['mean_absolute_percentage_error'], label='Training MAPE')
plt.plot(history.history['val_mean_absolute_percentage_error'], label='Validation MAPE')
plt.title('Model Accuracy (MAPE) Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('MAPE (%)')
plt.legend()

plt.tight_layout()
plt.show()

# Plot true vs predicted residuals
plt.figure(figsize=(12, 6))
plt.plot(scaler.inverse_transform(scaled_data), label="True")
plt.plot(test_predict_plot, label="Test Predicted")
plt.title("True vs Predicted BTC Close Prices")
plt.legend()
plt.show()

# Calculate R² score, rmse, mae, mse, mate, smate for training and testing sets
train_r2 = r2_score(original_y_train[0], train_predict[:, 0])
test_r2 = r2_score(original_y_test[0], test_predict[:, 0])

train_rmse = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0]))
test_rmse = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0]))

train_mae = mean_absolute_error(original_y_train[0], train_predict[:, 0])
test_mae = mean_absolute_error(original_y_test[0], test_predict[:, 0])

train_mse = mean_squared_error(original_y_train[0], train_predict[:, 0])
test_mse = mean_squared_error(original_y_test[0], test_predict[:, 0])

train_mate = median_absolute_error(original_y_train[0], train_predict[:, 0])
test_mate = median_absolute_error(original_y_test[0], test_predict[:, 0])

train_smate = np.sqrt(mean_squared_error(original_y_train[0], train_predict[:, 0])) / np.mean(original_y_train)
test_smate = np.sqrt(mean_squared_error(original_y_test[0], test_predict[:, 0])) / np.mean(original_y_test)

print(f'Train R²: {train_r2}')
print(f'Test R²: {test_r2}')

print("Training RMSE: ", train_rmse)
print("Testing RMSE: ", test_rmse)

print("Training MAE: ", train_mae)
print("Testing MAE: ", test_mae)

print("Training MSE: ", train_mse)
print("Testing MSE: ", test_mse)

print("Training MATE: ", train_mate)
print("Testing MATE: ", test_mate)

print("Training SMATE: ", train_smate)
print("Testing SMATE: ", test_smate)

from tabulate import tabulate
import numpy as np
# Create a table
table = [
["Metric", "Training", "Testing"],
["R² Score", f"{train_r2:.4f}", f"{test_r2:.4f}"],
["RMSE", f"{train_rmse:.4f}", f"{test_rmse:.4f}"],
["MSE", f"{train_mse:.4f}", f"{test_mse:.4f}"],
["MAE", f"{train_mae:.4f}", f"{test_mae:.4f}"],
["MATE", f"{train_mate:.4f}", f"{test_mate:.4f}"],
["SMATE", f"{train_smate:.4f}", f"{test_smate:.4f}"]
]

print(tabulate(table, headers="firstrow", tablefmt="grid"))



0 comments on commit 4baf2b3

Please sign in to comment.