'Tensorflow - SGD with momentum optimizer update fails for variable with dynamic shape
I am trying to create a variable with a dynamic shape and updating it using SGD. Without momentum, the following code works :-
import tensorflow as tf
x = tf.Variable(tf.random.normal((32,3)), shape=[None,3])
with tf.GradientTape() as tape:
x.assign(tf.random.normal((20,3)))
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])
But, the replacing the line opt = tf.keras.optimizers.SGD(0.01) with opt = tf.keras.optimizers.SGD(0.01, momentum=0.9) throws an error -
<ipython-input-6-66726ccd04f3> in <module>()
9 grads = tape.gradient(y, x)
10 opt = tf.keras.optimizers.SGD(0.01, momentum=0.9)
---> 11 opt.apply_gradients([[grads, x]])
5 frames
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in apply_gradients(self, grads_and_vars, name, experimental_aggregate_gradients)
637 # Create iteration if necessary.
638 with tf.init_scope():
--> 639 self._create_all_weights(var_list)
640
641 if not grads_and_vars:
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in _create_all_weights(self, var_list)
823 _ = self.iterations
824 self._create_hypers()
--> 825 self._create_slots(var_list)
826
827 def __getattribute__(self, name):
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/gradient_descent.py in _create_slots(self, var_list)
117 if self._momentum:
118 for var in var_list:
--> 119 self.add_slot(var, "momentum")
120
121 def _prepare_local(self, var_device, var_dtype, apply_state):
/usr/local/lib/python3.7/dist-packages/keras/optimizer_v2/optimizer_v2.py in add_slot(self, var, slot_name, initializer, shape)
913 dtype=var.dtype,
914 trainable=False,
--> 915 initial_value=initial_value)
916 backend.track_variable(weight)
917 slot_dict[slot_name] = weight
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
151 except Exception as e:
152 filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153 raise e.with_traceback(filtered_tb) from None
154 finally:
155 del filtered_tb
/usr/local/lib/python3.7/dist-packages/keras/initializers/initializers_v2.py in __call__(self, shape, dtype, **kwargs)
143 if _PARTITION_SHAPE in kwargs:
144 shape = kwargs[_PARTITION_SHAPE]
--> 145 return tf.zeros(shape, dtype)
146
147
ValueError: Cannot convert a partially known TensorShape (None, 3) to a Tensor.
How can I resolve this?
Solution 1:[1]
Instead of taking shape as (20,3) in tf.GradientTape you can consider shape (20,3) while initializing the variable.
import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
#x.assign(tf.random.normal((20,3)))
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01)
opt.apply_gradients([[grads, x]])
The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>
import tensorflow as tf
x = tf.Variable(tf.random.normal((20,3)))
with tf.GradientTape() as tape:
#x.assign(tf.random.normal((20,3)))
y = tf.reduce_sum(x)
grads = tape.gradient(y, x)
opt = tf.keras.optimizers.SGD(0.01,momentum=0.9)
opt.apply_gradients([[grads, x]])
The output of the above code is: <tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | TFer |
