'Tensorflow - SGD with momentum optimizer update fails for variable with dynamic shape

I am trying to create a variable with a dynamic shape and updating it using SGD. Without momentum, the following code works :-

import tensorflow as tf

x = tf.Variable(tf.random.normal((32,3)), shape=[None,3])

with tf.GradientTape() as tape:
  x.assign(tf.random.normal((20,3)))
  y = tf.reduce_sum(x)

grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])

But, the replacing the line opt = tf.keras.optimizers.SGD(0.01) with opt = tf.keras.optimizers.SGD(0.01, momentum=0.9) throws an error -

<ipython-input-6-66726ccd04f3> in <module>()
      9 grads = tape.gradient(y, x)
     10 opt = tf.keras.optimizers.SGD(0.01, momentum=0.9)
---> 11 opt.apply_gradients([[grads, x]])

5 frames
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in apply_gradients(self, grads_and_vars, name, experimental_aggregate_gradients)
    637       # Create iteration if necessary.
    638       with tf.init_scope():
--> 639         self._create_all_weights(var_list)
    640 
    641       if not grads_and_vars:

/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in _create_all_weights(self, var_list)
    823     _ = self.iterations
    824     self._create_hypers()
--> 825     self._create_slots(var_list)
    826 
    827   def __getattribute__(self, name):

/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/gradient_descent.py in _create_slots(self, var_list)
    117     if self._momentum:
    118       for var in var_list:
--> 119         self.add_slot(var, "momentum")
    120 
    121   def _prepare_local(self, var_device, var_dtype, apply_state):

/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in add_slot(self, var, slot_name, initializer, shape)
    913               dtype=var.dtype,
    914               trainable=False,
--> 915               initial_value=initial_value)
    916       backend.track_variable(weight)
    917       slot_dict[slot_name] = weight

/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
    151     except Exception as e:
    152       filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153       raise e.with_traceback(filtered_tb) from None
    154     finally:
    155       del filtered_tb

/usr/local/lib/python3.7/dist-packages/keras/initializers/initializers_v2.py in __call__(self, shape, dtype, **kwargs)
    143     if _PARTITION_SHAPE in kwargs:
    144       shape = kwargs[_PARTITION_SHAPE]
--> 145     return tf.zeros(shape, dtype)
    146 
    147 

ValueError: Cannot convert a partially known TensorShape (None, 3) to a Tensor.

How can I resolve this?



Solution 1:[1]

Instead of taking shape as (20,3) in tf.GradientTape you can consider shape (20,3) while initializing the variable.

import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
   #x.assign(tf.random.normal((20,3)))
   y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])

The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
  #x.assign(tf.random.normal((20,3)))
  y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01,momentum=0.9)
opt.apply_gradients([[grads, x]])

The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 TFer