'Keras Sequential model. Generating predictions on non-daily time intervals
I am trying to generate price predictions for cryptocurrencies using keras. I have it working on 1 day intervals using 1 day data from the Kraken API. My problem comes when I try to generate predictions on other time frames. 1hr 4hr and 1week. The prediction timestamps come out as daily regardless of the input data. For hourly the output dataframe is completely scrambled. These are screenshots of the output.This is correctly working
This is the scrambled 1 hour dataframe This is the 4hr dataframe with wrong timestamps
My code is as follows. Sorry it's not a minimum reproducible example.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, GRU, SimpleRNN, Conv1D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, Normalizer
import os
import tensorflow as tf
import krakenex
from pykrakenapi import KrakenAPI
#Pulling a DataFrame from the API
api = krakenex.API()
k = KrakenAPI(api)
ohlc, last = k.get_ohlc_data("ETHUSD", interval=60)
df = ohlc
last_close = ohlc
last_close = last_close[['close']]
last_close = last_close.iloc[::-1]
last_price = last_close.close[-1]
print(last_price)
#Clipping Dataframe to only containg index anc close prices
df = df[['close']]
#Reversing dataframe order
df = df.iloc[::-1]
df.tail()
#LSTM works better if data is scaled but removing for a moment
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns, index=df.index)
#print(df.columns)
df.tail()
#Visualizing price action
df.plot(figsize=(14,8))
plt.title('ETH prices')
plt.ylabel('normalized prices')
plt.show()
def split_sequence(seq, n_steps_in, n_steps_out):
"""
Splits the unzivariate time sequence
"""
# Creating a list for both variables
X, y = [], []
for i in range(len(seq)):
# Finding the end of the current sequence
end = i + n_steps_in
out_end = end + n_steps_out
# Breaking out of the loop if we have exceeded the dataset's length
if out_end > len(seq):
break
# Splitting the sequences into: x = past prices and indicators, y = prices ahead
seq_x, seq_y = seq[i:end], seq[end:out_end]
X.append(seq_x)
y.append(seq_y)
return np.array(X), np.array(y)
#Some charting for accuracy tests
def visualize_results(results):
history = results.history
plt.figure(figsize=(14,8))
plt.plot(history['val_loss'])
plt.plot(history['loss'])
plt.legend(['val_loss', 'loss'])
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()
plt.figure(figsize=(14,8))
plt.plot(history['val_accuracy'])
plt.plot(history['accuracy'])
plt.legend(['val_accuracy', 'accuracy'])
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()
#number of periods to look back at
n_per_in = 680
#number of periods to predict
n_per_out = 20
n_features = 1
#making a sequence for predictions
X, y = split_sequence(list(df.close), n_per_in, n_per_out)
#Giving the model the proper dimensions
X = X.reshape(X.shape[0], X.shape[1], n_features)
# Instantiating the model
d = 0.2
activ = 'softsign'
model = Sequential()
#model.add(Conv1D(100, kernel_size=2, input_shape=(n_per_in, n_features), activation=activ))
model.add(LSTM(60, input_shape=(n_per_in, n_features), activation=activ))
model.add(Dense(units=n_per_out))
# Model summary
model.summary()
model.compile(loss = "mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=['accuracy'])
res = model.fit(X,y, epochs=10, batch_size=4, validation_split=0.2)
plt.figure(figsize=(14, 8))
yhat = model.predict(X[-1].reshape(1,n_per_in,n_features)).tolist()[0]
yhat = np.array(yhat).reshape(-1,1).tolist()
actual = y[-1].reshape(-1,1)
print("predicted ", yhat)
plt.plot(yhat, label='predicted')
print('actuals ', actual.tolist())
plt.plot(actual.tolist(), label='actual')
plt.title("Predicted vs Actual")
plt.ylabel('price')
plt.legend()
plt.show()
yhat = model.predict(np.array(df.head(n_per_in)).reshape(1, n_per_in, n_features)).tolist()[0]
yhat = np.array(yhat).reshape(-1,1).tolist()
preds = pd.DataFrame(yhat, index=pd.date_range(start=df.index[-1], periods=len(yhat)), columns=df.columns)
print(preds)
periods = 30
actual = pd.DataFrame(df[['close']].tail(periods), index = df.close.tail(periods).index, columns= df.columns).append(preds.head(1))
plt.figure(figsize=(14,8))
plt.plot(actual, label='actuals')
plt.plot(preds, label='predictions')
plt.ylabel("price")
plt.xlabel('times')
plt.title(f'Forecasting the next {len(yhat)} days')
plt.legend()
plt.show()
How can I get the correct time frames in my output? I've gone to the docs and scoured google for hours and can't come up with a solution other than switching to prophet rather than Keras, and I don't want to do that.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
