'LSTM time series prediction is not accurate
I'm now using LSTM to forecast the metrics(node_load1) data from Prometheus, my training data is every 20s a value, and the LSTM model predicts the value of node_load1 for the next hour. The training data dimension is [120000, 1],A small portion of the data from the training set is shown below:
2022-04-20 03:30:00, 0.79000
2022-04-20 03:30:20, 0.71000
2022-04-20 03:30:40, 0.51000
2022-04-20 03:31:00, 0.44000
2022-04-20 03:31:20, 0.37000
2022-04-20 03:31:40, 0.42000
2022-04-20 03:32:00, 0.66000
2022-04-20 03:32:20, 0.69000
2022-04-20 03:32:40, 0.84000
2022-04-20 03:33:00, 0.87000
2022-04-20 03:33:20, 0.69000
2022-04-20 03:33:40, 0.50000
2022-04-20 03:34:00, 0.35000
LSTM traning code:
class MetricPredictor:
"""docstring for Predictor."""
model_name = "lstm"
model_description = "Forecasted value from Lstm model"
model = None
predicted_df = None
metric = None
def __init__(self, metric, rolling_data_window_size="10d", number_of_feature=375, validation_ratio=0.2,
parameter_tuning=False, sample_freq='120s'):
"""Initialize the Metric object."""
self.metric = Metric(metric, rolling_data_window_size)
self.number_of_features = number_of_feature
self.scalar = MinMaxScaler(feature_range=(0, 1))
self.parameter_tuning = parameter_tuning
self.validation_ratio = validation_ratio
self.sample_freq = sample_freq
def sample_data(self, data,):
data_sam = data.resample(self.sample_freq, label='right', on='ds').mean()
data_sam = data_sam.reset_index()
return data_sam
def visualize_loss(self, history, title):
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(len(loss))
plt.figure()
plt.plot(epochs, loss, "b", label="Training loss")
plt.plot(epochs, val_loss, "r", label="Validation loss")
plt.title(title)
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig('./train_val_loss.png')
def prepare_data(self, data):
"""Prepare the data for LSTM."""
train_x = np.array(data[:, 1])[np.newaxis, :].T
for i in range(self.number_of_features):
train_x = np.concatenate((train_x, np.roll(data[:, 1], -i)[np.newaxis, :].T), axis=1)
train_x = train_x[:train_x.shape[0] - self.number_of_features, :self.number_of_features]
train_yt = np.roll(data[:, 1], -self.number_of_features + 1)
train_y = train_yt[:train_yt.shape[0] - self.number_of_features + 1]
train_x = train_x.reshape(train_x.shape[0], 1, train_x.shape[1])
return train_x, train_y
def get_model(self, lstm_cell_count, dense_cell_count):
"""Build the model."""
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(1, self.number_of_features)))
model.add(Dropout(0.2))
model.add(LSTM(lstm_cell_count))
model.add(Dropout(0.2))
model.add(Dense(dense_cell_count))
model.add(Dense(1))
return model
def train(self, metric_data=None, prediction_duration=15, prediction_freq="15s", isLongPeriod=True):
"""Train the model."""
if metric_data:
# because the rolling_data_window_size is set, this df should not bloat
self.metric += Metric(metric_data)
# resample the data
if isLongPeriod:
data_sampled = self.sample_data(self.metric.metric_values)
self.metric.metric_values = data_sampled
self.metric.metric_values = self.metric.metric_values.bfill()
# normalising
metric_values_np = self.metric.metric_values.values
scaled_np_arr = self.scalar.fit_transform(metric_values_np[:, 1].reshape(-1, 1))
metric_values_np[:, 1] = scaled_np_arr.flatten()
self.lstm_cell_count = 32
self.dense_cell_count = 64
model = self.get_model(self.lstm_cell_count, self.dense_cell_count)
data_x, data_y = self.prepare_data(metric_values_np)
adam = optimizers.Adam(lr=0.0001)
model.compile(loss='mean_squared_error', optimizer=adam)
history = model.fit(np.asarray(data_x).astype(np.float32), np.asarray(data_y).astype(np.float32),
epochs=50, batch_size=512,validation_split=self.validation_ratio, verbose=1,shuffle=False)
self.visualize_loss(history, "Training and Validation Loss")
data_test = np.asarray(metric_values_np[-self.number_of_features:, 1]).astype(np.float32)
print("number_of_features",self.number_of_features)
forecast_values = []
prev_value = data_test[-1]
for i in range(prediction_duration):
prediction = model.predict(data_test.reshape(1, 1, self.number_of_features)).flatten()[0]
curr_pred_value = prediction
scaled_final_value = self.scalar.inverse_transform(curr_pred_value.reshape(1, -1)).flatten()[0]
forecast_values.append(scaled_final_value)
data_test = np.roll(data_test, -1)
data_test[-1] = curr_pred_value
prev_value = data_test[-1]
dataframe_cols = {"yhat": np.array(forecast_values)}
upper_bound = np.array(
[
(
forecast_values[i] + (np.std(forecast_values[:i]) * 4)
)
for i in range(len(forecast_values))
]
)
upper_bound[0] = np.mean(
forecast_values[0]
) # to account for no std of a single value
lower_bound = np.array(
[
(
forecast_values[i] - (np.std(forecast_values[:i]) * 4)
)
for i in range(len(forecast_values))
]
)
lower_bound[0] = np.mean(
forecast_values[0]
) # to account for no std of a single value
dataframe_cols["yhat_upper"] = upper_bound
dataframe_cols["yhat_lower"] = lower_bound
data = self.metric.metric_values
maximum_time = max(data["ds"])
print("maximum_time",maximum_time)
print("len forecast_values",len(forecast_values))
dataframe_cols["timestamp"] = pd.date_range(
maximum_time, periods=len(forecast_values), freq='20s'
)
if isLongPeriod:
dataframe_cols["timestamp"] = pd.date_range(
maximum_time, periods=len(forecast_values), freq='120s'
)
forecast = pd.DataFrame(data=dataframe_cols)
forecast = forecast.set_index("timestamp")
print(forecast)
self.predicted_df = forecast
The training data for the past 30 days are shown in figure:
and the loss of lstm on the training set and validation set is:
train loss and validation loss
Comparison of test set prediction results and real data prediction and true value in 1 hour
The lstm model works well on the training set and validation set, but it doesn't work well on the test set. Could you please provide advice and tell me where to improve the model?
Solution 1:[1]
There are a couple of syntactical issues with the code.
Application arguments should be separate from the body and pub without a struct doesn't make sense either.
Unfortunately the documentation of their Rust interface is quite lacking (seems to be mostly "have a look at some examples then find out the rest through trial-and-error"). So I was unable to look up enough information to suggest a reasonably correct version.
Here are a couple of more pointers:
- it's not clear what the input to this function is. You're referencing a
msgobject with asendermember there, but the only equivalent I could identify was the&[AccountInfo]argument which identifies the invoking account. - Alternatively, Solana programs receive a byte array of instruction data which apparently can have any content encoded within them.
I would suggest starting with their Hello World example, playing around with it a bit and continue with your own app once you're more familiar with Rust syntax and Solana best practices.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | milgner |
