'InvalidArgumentError(): Incompatible shapes: [4,784] vs [784,4] [Op:Mul]
I am implementing some new ideas, but when I am running through the code it gives me the error:
InvalidArgumentError(): Incompatible shapes: [4,784] vs [784,4] [Op:Mul]
The code
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
### MNIST dataset
def load_dataset():
# load in the dataset
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
# reshape dataset into a single channel
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))
# one hot encode target values
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
return x_train, y_train, x_test, y_test
X_train, y_train, X_test, y_test = load_dataset()
def normalize(X):
X = X.astype("float32")
X = X / 255.0
return X
### shuffling the data and sample 1024 images
from random import sample
import pandas as pd
n = 1024
X_train, y_train = shuffle(X_train, y_train)
X_sample = X_train[:n]
# oneliner
y_sample = tf.one_hot(X_train, 10)
x = tf.constant(tf.reshape(normalize(X_sample), (n, 784)))
y = tf.constant(y_sample)
## Initialize the weights by adding noise to the weight weight = A + B*omega
A1_norm = tf.random.normal(shape=[784 * 4], stddev=1e-2)
A2_norm = tf.random.normal(shape=[4 * 4], stddev=1e-2)
A3_norm = tf.random.normal(shape=[4 * 4], stddev=1e-2)
A4_norm = tf.random.normal(shape=[4 * 10], stddev=1e-2)
# set them as variables do we can keep a look out for the gradient
A1 = tf.Variable(initial_value=tf.reshape(A1_norm, (784, 4)), trainable=True)
B1 = tf.Variable(initial_value=tf.ones([784, 4]), trainable=True)
A2 = tf.Variable(initial_value=tf.reshape(A2_norm, (4, 4)), trainable=True)
B2 = tf.Variable(initial_value=tf.ones([4, 4]), trainable=True)
A3 = tf.Variable(initial_value=tf.reshape(A3_norm, (4, 4)), trainable=True)
B3 = tf.Variable(initial_value=tf.ones([4, 4]), trainable=True)
A4 = tf.Variable(
initial_value=tf.reshape(A4_norm, (4, 10)), trainable=True
) # making it trainable we can calculate the gradient
B4 = tf.Variable(initial_value=tf.ones([4, 10]), trainable=True)
# Very interesting how we initialize the weights
P0 = tf.constant(np.diag(np.repeat(1e-4, 28)))
W1 = tf.constant(A1.numpy() * 0.0, dtype=tf.float32)
W2 = tf.constant(A2.numpy() * 0.0, dtype=tf.float32)
W3 = tf.constant(A3.numpy() * 0.0, dtype=tf.float32)
W4 = tf.constant(A4.numpy() * 0.0, dtype=tf.float32)
V = tf.constant(A4.numpy() * 0.0, dtype=tf.float32)
def f1(x, A1, W1, B1):
return tf.sigmoid(tf.matmul(x, (A1 + W1.T * B1)))
def f2(x, A2, W2, B2):
return tf.sigmoid(tf.matmul(x, (A2 + W2.T * B2)))
def f3(x, A3, W3, B3):
return tf.sigmoid(tf.matmul(x, (A3 + W3.T * B3)))
def obs(x, V, A4, B4):
return tf.nn.softmax(tf.matmul(x, A4 + V.T * B4))
# set a learning rate
learning_rate = 0.1
# the forward propagation
# https://www.tensorflow.org/guide/autodiff
def forward(x, y, W1, A1, B1, W2, A2, B2, W3, A3, B3, V, A4, B4):
with tf.GradientTape() as g: # keep a record of the g really interesting
g.watch(W1) # watch out for the weight
g.watch(x) # watch out for the input
h1 = f1(
x, A1, W1, B1
) # hidden layer 0 in the implementation of the forward propagation
dh1dx = g.batch_jacobian(h1, x)
dh1dW = tf.reshape(
g.jacobian(h1, W1), shape=list(n, h1.shape.as_list()[2], -1)
) # if you need a seperate gradient for each item
P1 = tf.matmul(tf.matmul(dh1dx, P0), dh1dx, transpose_b=True) + tf.matmul(
dh1dW, dh1dW, transpose_b=True
) # Q=I
with tf.GradientTape() as g:
g.watch(W2)
g.watch(h1)
h2 = f2(h1, W2, A2, B2) # mu.predict
dh2dh1 = g.batch_jacobian(h2, h1)
dh2dW = tf.reshape(
g.jacobian(h2, W2), shape=list(n, h2.shape.as_list()[2], -1)
) # input batch size = 1024, output size = 2, number of parameters = 2 x 2
P2 = tf.matmul(tf.matmul(dh2dh1, P1), dh2dh1, transpose_b=True) + tf.matmul(
dh2dW, dh2dW, transpose_b=True
) # Q=I
with tf.GradientTape() as g: # persistent=TRUE
g.watch(W3)
g.watch(h2)
h3 = f3(h2, W3, A3, B3)
dh3dh2 = g.batch_jacobian(
h3, h2
) # taken the jacobian of the h3 with respect to h2
dh3dW = tf.reshape(
g.jacobian(h3, W3), shape=list(n, h3.shape.as_list()[2], -1)
) # input batch size = 1024, output size = 2, number of parameters = 2 x 2
P3 = tf.matmul(tf.matmul(dh3dh2, P2), dh3dh2, transpose_b=True) + tf.matmul(
dh3dW, dh3dW, transpose_b=True
) # Q=I
with tf.GradientTape() as g:
g.watch(W4)
g.watch(h3)
y3 = obs(h3, W4, A4, B4)
dobsdh3 = g.batch_jacobian(y3, h3)
dobsdV = tf.reshape(
g.jacobian(y3, W4), shape=list(n, y3.shape.as_list()[2], -1)
)
tf.matmul(dh3dW, dh3dW, transpose_b=True) # Q=I
S3 = (
tf.matmul(tf.matmul(dobsdh3, P3), dobsdh3, transpose_b=True)
+ tf.matmul(dobsdV, dobsdV, transpose_b=True)
+ np.diag(np.repeat(1e-6, 10))
) # Q=I
# require detailed study
log_prob = tfp.distributions.Categorical(probs=y3).log_prob(y_train[:1024, :])
return log_prob, h1, P1, h2, P2, h3, P3, y3, S3
log_prob, h1, P1, h2, P2, h3, P3, y3, S3 = forward(
x, y, W1, A1, B1, W2, A2, B2, W3, A3, B3, V, A4, B4
)
def train(x, y, W1, A1, B1, W2, A2, B2, W3, A3, B3, V, A4, B4, learning_rate):
with tf.GradientTape() as g2:
log_prob, h1, P1, h2, P2, h3, P3, y3, S3 = forward(
x, y, W1, A1, B1, W2, A2, B2, W3, A3, B3, V, A4, B4
)
neg_log_prob = -1 * (tf.reduce_mean(log_prob))
print(neg_log_prob.numpy())
dloss = g2.gradient(neg_log_prob, list(A1, B1, A2, B2, A3, B3, A4, B4))
A1.assign_sub(learning_rate * dloss[[1]])
B1.assign_sub(learning_rate * dloss[[2]])
A2.assign_sub(learning_rate * dloss[[3]])
B2.assign_sub(learning_rate * dloss[[4]])
A3.assign_sub(learning_rate * dloss[[5]])
B3.assign_sub(learning_rate * dloss[[6]])
A4.assign_sub(learning_rate * dloss[[7]])
B4.assign_sub(learning_rate * dloss[[8]])
train(x, y, W1, A1, B1, W2, A2, B2, W3, A3, B3, W4, A4, B4, learning_rate)
Error:
Traceback (most recent call last): File "/home/southern/Documents/Kalman_Project/Kalman_filter.py", line 162, in log_prob, h1, P1, h2, P2, h3, P3, y3, S3 = forward( File "/home/southern/Documents/Kalman_Project/Kalman_filter.py", line 101, in forward h1 = f1( File "/home/southern/Documents/Kalman_Project/Kalman_filter.py", line 77, in f1 return tf.sigmoid(tf.matmul(x, (A1 + W1.T * B1))) File "/home/southern/.local/lib/python3.8/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler raise e.with_traceback(filtered_tb) from None File "/home/southern/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 7186, in raise_from_not_ok_status raise core._status_to_exception(e) from None # pylint: disable=protected-access tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [4,784] vs. [784,4] [Op:Mul]
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
