'Stateful LSTM VAE: Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [batch_size, latent_dim]

I am solving a Timeseries problem using LSTM VAE(Variational auto-encoder), I have built my VAE model as below

import tensorflow as tf
tf.compat.v1.disable_eager_execution()

class VAE:

    def __init__(self,
                 hidden_layer_units,
                 hidden_layer_leakyrelu_alphas,
                 hidden_layer_dropout_rates,
                 batch_size,
                 time_steps,
                 num_features,
                 is_stateful_learning):

        self.hidden_layer_units = hidden_layer_units
        self.hidden_layer_leakyrelu_alphas = hidden_layer_leakyrelu_alphas
        self.hidden_layer_dropout_rates = hidden_layer_dropout_rates
        self.encoder_num_layers = 0
        self.latent_space_dim = 0

        vae_total_layers = len(hidden_layer_units)
        if 0 < vae_total_layers:
            self.encoder_num_layers = int((vae_total_layers - 1) / 2)
            self.latent_space_dim = self.hidden_layer_units[self.encoder_num_layers]

        self.batch_size = batch_size
        self.time_steps = time_steps
        self.num_features = num_features
        self.is_stateful_learning = is_stateful_learning

        self.encoder = None
        self.decoder = None
        self.model = None

        self.model_input = None
        self.model_output = None
        self.mu = None
        self.log_variance = None
        self.kulback_coef = 0.0001

        self._build()

    def summary(self):
        self.encoder.summary()
        self.decoder.summary()
        self.model.summary()

    def compile(self, learning_rate=0.001):
        optimizer = Adam(learning_rate=learning_rate)
        self.model.compile(optimizer=optimizer,
                           loss=self._calculate_combined_loss,
                           metrics=[self._calculate_reconstruction_loss, self._calculate_kl_loss])

    def _build(self):
        self._build_encoder()
        self._build_decoder()
        self._build_autoencoder()

    def _build_encoder(self):
        encoder_input = self._add_encoder_input()
        lstm_layers = self._add_encoder_lstm_layers(encoder_input)
        bottleneck = self._add_bottleneck(lstm_layers)
        self.model_input = encoder_input
        self.encoder = Model(encoder_input, bottleneck, name="encoder")

    def _build_decoder(self):
        decoder_input = self._add_decoder_input()
        repeater_layer = self._add_repeater_layer(decoder_input)
        lstm_layer = self._add_decoder_lstm_layer(repeater_layer)
        decoder_output = self._add_decoder_output(lstm_layer)
        self.decoder = Model(decoder_input, decoder_output, name="decoder")

    def _build_autoencoder(self):
        model_input = self.model_input
        encoder_output = self.encoder(model_input)
        model_output = self.decoder(encoder_output)
        self.model_output = model_output
        self.model = Model(model_input, model_output, name="autoencoder")

    def _add_encoder_input(self):
        if self.is_stateful_learning:
            x = Input(batch_shape=(self.batch_size, self.time_steps, self.num_features), name="encoder_input")
        else:
            x = Input(shape=(self.time_steps, self.num_features), name="encoder_input")
        return x

    def _add_encoder_lstm_layers(self, encoder_input):
        """ Create all lstm layers in encoder."""

        x = encoder_input
        for layer_index, units in enumerate(self.hidden_layer_units[:self.encoder_num_layers]):
            lstm_params = {}
            if layer_index < self.encoder_num_layers - 1:
                lstm_params["return_sequences"] = True

            if self.is_stateful_learning:
                lstm_params["stateful"] = True

            x = LSTM(units=units, **lstm_params)(x)
            x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_index])(x)
            x = Dropout(rate=self.hidden_layer_dropout_rates[layer_index])(x)
        return x

    def _add_bottleneck(self, x):
        """ add bottleneck with Guassian sampling (Dense layer)."""

        self.mu = Dense(self.latent_space_dim, name="mu")(x)
        self.log_variance = Dense(self.latent_space_dim, name="log_variance")(x)

        x = Lambda(self.sample_point_from_normal_distribution, name="encoder_output")([self.mu, self.log_variance])

        return x

    def sample_point_from_normal_distribution(self, args):
        mu, log_variance = args
        epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
        sampled_point = mu + K.exp(log_variance / 2) * epsilon
        return sampled_point

    def _add_decoder_input(self):
        if self.is_stateful_learning:
            x = Input(batch_shape=(self.batch_size, self.latent_space_dim), name="decoder_input")
        else:
            x = Input(shape=(self.latent_space_dim), name="decoder_input")

        return x

    def _add_repeater_layer(self, decoder_input):
        return RepeatVector(self.time_steps)(decoder_input)

    def _add_decoder_lstm_layer(self, repeater_layer):

        x = repeater_layer
        for layer_index, units in enumerate(self.hidden_layer_units[self.encoder_num_layers + 1:]):
            lstm_params = {}
            if self.is_stateful_learning:
                # stateful build
                lstm_params = {'stateful': True, 'return_sequences': True}
            else:
                lstm_params["return_sequences"] = True

            layer_no = layer_index + self.encoder_num_layers + 1
            x = LSTM(units=units, **lstm_params)(x)
            x = LeakyReLU(alpha=self.hidden_layer_leakyrelu_alphas[layer_no])(x)
            x = Dropout(rate=self.hidden_layer_dropout_rates[layer_no])(x)

        return x

    def _add_decoder_output(self, lstm_layer):
        return TimeDistributed(Dense(1))(lstm_layer)

    def _calculate_combined_loss(self, y_target, y_predicted):
        reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
        kl_loss = self._calculate_kl_loss(y_target, y_predicted)
        combined_loss = reconstruction_loss + (self.kulback_coef * kl_loss)
        return combined_loss

    def _calculate_reconstruction_loss(self, y_target, y_predicted):
        error = y_target - y_predicted
        reconstruction_loss = K.mean(K.square(error), axis=1)
        return reconstruction_loss

    def _calculate_kl_loss(self, y_target, y_predicted):
        kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) - K.exp(self.log_variance), axis=1)
        return kl_loss


# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
                              number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
    vae = VAE(
        hidden_layer_units=lstm_layer_units,
        hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
        hidden_layer_dropout_rates=dropout_layer_rates,
        batch_size=number_of_sequences,
        time_steps=time_steps,
        num_features=data_dim,
        is_stateful_learning=is_stateful_learning
    )

    vae.compile(learning_rate)
    vae.summary()

    return vae.model


Model training block looks as below

# configuration
nn_lstm_layer_units = [160, 3, 160]
nn_leakyrelu_layer_alphas = [0.0, 0.0, 0.0]
nn_dropout_layer_rates = [0.3, 0.0, 0.3]
batch_size = 96
win_length = 64
num_features = 6  # You can use single variate Timeseries data as well, num_features = 1
epochs = 782
learning_rate = 0.0001
want_stateful_learning = True

# Build LSTM VAE model
model = build_lstm_neural_network(nn_lstm_layer_units, nn_leakyrelu_layer_alphas, nn_dropout_layer_rates, batch_size,
                                  win_length, num_features, want_stateful_learning)


TIME_STEPS = win_length
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i: (i + time_steps)])
    return np.stack(output)


x_train = create_sequences(x_train)
x_val = create_sequences(x_val)

callbacks = []
unfit_train_record_count = 0
unfit_val_record_count = 0
if want_stateful_learning:
    # stateful learning

    # adjust train data size(should be in multiples of batch size)
    unfit_train_record_count = len(x_train) % batch_size
    unfit_val_record_count = len(x_val) % batch_size

    # Reset states of the stateful model on epoch end
    stateful_model_reset_states = LambdaCallback(on_epoch_end=lambda batch, logs: model.reset_states())
    callbacks.append(stateful_model_reset_states)

early_stopping = EarlyStopping(monitor=monitor, patience=patience)
callbacks.append(early_stopping)

# Model traning
history = model.fit(x=x_train[unfit_train_record_count:], y=x_train[unfit_train_record_count:, :, [0]], validation_data=(x_val[unfit_val_record_count:], x_val[unfit_val_record_count:, :, [0]]), batch_size=batch_size, epochs=epochs, shuffle=False, callbacks=callbacks)

The stateless mode of the model is working as expected but the stateful mode is throwing an error as below-

1632/1632 [==============================] - ETA: 0s - loss: 0.2447 - _calculate_reconstruction_loss: 0.2447 - _calculate_kl_loss: 0.0326

tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
     [[{{node decoder_input}}]]
     [[metrics/_calculate_reconstruction_loss/Identity/_229]]
  (1) Invalid argument: You must feed a value for placeholder tensor 'decoder_input' with dtype float and shape [96,3]
     [[{{node decoder_input}}]]

Environment used is as Python-3.8.12, Tensorflow-gpu: 2.5, cudnn: 8.2.1.32

I am not clear why the stateful model run 1 Epoch for training data, but as soon as it starts to process the validation data, it throws the error.



Solution 1:[1]

I had the same experiences with dataset and loss function that not suitable, I try to simulate again it possible no loss value change, no loss as nan, error when validation. That is possible no value, no match or not update neuron, you can use Tensorflow 2.x is a lot moire easier.

This is no match validation: Working on training but results in errors when validation. ( one possible )

Epoch 1/100
2022-01-23 21:04:59.846791: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - ETA: 0s - loss: 3.1866 - accuracy: 0.0000e+00Traceback (most recent call last):

Another possible is loss Fn no match: It is possible they are not update the neurons

Epoch 1/100
2022-01-23 21:08:23.330068: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8100
1/1 [==============================] - 3s 3s/step - loss: 13.7138 - accuracy: 0.2000 - val_loss: 8.2133 - val_accuracy: 0.0000e+00
Epoch 2/100
1/1 [==============================] - 0s 65ms/step - loss: 7.7745 - accuracy: 0.0000e+00 - val_loss: 8.0456 - val_accuracy: 0.0000e+00

Sample use 1 Sample use 2 Sample use 3 Sample use 4

Solution 2:[2]

I solved the problem, by changing the loss calculation logic, instead of defining the functions to calculate reconstruction and KL loss in the VAE class, I moved the loss calculation part outside the VAE class as below

# Build Variational AutoEncoder(VAE) LSTM Model:
def build_lstm_neural_network(lstm_layer_units=[], leakyrelu_layer_alphas=[], dropout_layer_rates=[],
                              number_of_sequences=32, time_steps=32, data_dim=1, is_stateful_learning=False):
    vae = VAE(
        hidden_layer_units=lstm_layer_units,
        hidden_layer_leakyrelu_alphas=leakyrelu_layer_alphas,
        hidden_layer_dropout_rates=dropout_layer_rates,
        batch_size=number_of_sequences,
        time_steps=time_steps,
        num_features=data_dim,
        is_stateful_learning=is_stateful_learning
    )

    # Add reconstruction loss
    error = vae.model_input - vae.model_output
    reconstruction_loss = K.mean(K.square(error))
    vae.model.add_loss(reconstruction_loss)
    vae.model.add_metric(reconstruction_loss, name='mse_loss', aggregation='mean')

    # Add KL loss
    kl_loss = kl_beta * K.mean(-0.5 * K.sum(1 + vae.log_variance - K.square(vae.mu) - K.exp(vae.log_variance), axis = 1), axis=0)
    model.add_loss(kl_loss)
    model.add_metric(kl_loss, name='kl_loss', aggregation='mean')

    optimizer = Adam(learning_rate=vae.learning_rate, clipvalue=vae.clipvalue)
    vae.model.compile(loss=None, optimizer=optimizer)
    vae.summary()

    return vae.model

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Jirayu Kaewprateep
Solution 2 Sachin Savale