'InvalidArgumentError: Matrix size-incompatible: In[0]: [2,761856], In[1]: [12288,4096] [[node vggish/fc1/fc1_1/Relu (defined at C:\... ]]
I am trying adapt Google's VGGish CNN (github repo: here), created for audio analysis. Off the shelf, this model extracts features from 1 second chunks of audio, puts these in a tuple with one-hot coding to represent class, and trains on these 1sec chunks. Any longer files are cut into 1sec chunks if input. I want to instead use 1min chunks.
I'm doing a toy test using two classes, with two 1min audio files for each class.
I made some changes (see code block 1 below) to the vggish_train_demo.py script to extract features from real audio, instead of the original scripts toy example. The network runs perfectly when I use this new code and enter my audio files into the model (code block 2) to train on 1sec chunks of these. However, I modified some parameters of the vggish_params script to extract features across a whole minute, and train on these instead:
NUM_FRAMES = 96*62 #60 sec/0.96 = 62 rounding down
EXAMPLE_WINDOW_SECONDS = 0.96*62 #formally 0.96
EXAMPLE_HOP_SECONDS = 0.96*62 #formally 0.96
I can generate features using my new code block 1 no problem. But, when I try to execute the model in code block 2, I now get the error (full error at bottom):
InvalidArgumentError: Matrix size-incompatible: In[0]: [2,761856], In[1]: [12288,4096]
[[node vggish/fc1/fc1_1/Relu
(defined at C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py:1912)
]]
I can't figure out whats causing this and how to fix it? Can input these 1min chunks that are of a different size to the 1sec files? Any help appreciated!
data[0][0].shape using 1sec approach = (96, 64)
data[0][0].shape using 1min approach = (5952, 64)
Code block 1
def get_features(directory):
all_fs = os.listdir(directory) #list of all files in directory
audio_fs = [f for f in all_fs if '.wav' in f.lower() or '.mp3' in f.lower()]
for f in audio_fs:
#import mel_features
path = os.path.join(directory, f)
features_from_files_list = vggish_input_new.wavfile_to_examples(path)
features_from_files = np.array(features_from_files_list)
return features_from_files
def _get_examples_batch(dir1, dir2): #example using two classes, each stored in a seperate directory
#Get features from audio files and generate one-hot coding for each dir
dir1_feats = get_features(dir1)
dir1_labels = np.array([[1, 0]] * dir1_feats.shape[0])
dir2_feats = get_features(dir2)
dir2_labels = np.array([[0, 1]] * dir2_feats.shape[0])
#Shuffle (example, label) pairs across all classes.
all_examples = np.concatenate((dir1_feats, dir2_feats))
all_labels = np.concatenate((dir1_labels, dir2_labels))
labeled_examples = list(zip(all_examples, all_labels))
shuffle(labeled_examples)
#Separate and return the features and labels.
features = [example for (example, _) in labeled_examples]
labels = [label for (_, label) in labeled_examples]
return (features, labels)"""
_NUM_CLASSES = 2
dir1 = (r'C:\Users\class1')
dir2 = (r'C:\Users\class2')
#define sample rate of audio:
sr = 44100
data = _get_examples_batch(healthy, degraded)
Code block 2
#run using:
#python vggish_train_demo.py --num_batches 100
def main(_):
with tf.Graph().as_default(), tf.Session() as sess:
# Define VGGish.
embeddings = vggish_slim.define_vggish_slim(training=FLAGS.train_vggish)
# Define a shallow classification model and associated training ops on top
# of VGGish.
with tf.variable_scope('mymodel'):
# Add a fully connected layer with 100 units. Add an activation function
# to the embeddings since they are pre-activation.
num_units = 100
fc = slim.fully_connected(tf.nn.relu(embeddings), num_units)
# Add a classifier layer at the end, consisting of parallel logistic
# classifiers, one per class. This allows for multi-class tasks.
logits = slim.fully_connected(
fc, _NUM_CLASSES, activation_fn=None, scope='logits')
tf.sigmoid(logits, name='prediction')
# Add training ops.
with tf.variable_scope('train'):
global_step = tf.train.create_global_step()
# Labels are assumed to be fed as a batch multi-hot vectors, with
# a 1 in the position of each positive class label, and 0 elsewhere.
labels_input = tf.placeholder(
tf.float32, shape=(None, _NUM_CLASSES), name='labels')
# Cross-entropy label loss.
xent = tf.nn.sigmoid_cross_entropy_with_logits(
logits=logits, labels=labels_input, name='xent')
loss = tf.reduce_mean(xent, name='loss_op')
tf.summary.scalar('loss', loss)
# We use the same optimizer and hyperparameters as used to train VGGish.
optimizer = tf.train.AdamOptimizer(
learning_rate=vggish_params.LEARNING_RATE,
epsilon=vggish_params.ADAM_EPSILON)
train_op = optimizer.minimize(loss, global_step=global_step)
# Initialize all variables in the model, and then load the pre-trained
# VGGish checkpoint.
sess.run(tf.global_variables_initializer())
vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
# The training loop.
features_input = sess.graph.get_tensor_by_name(
vggish_params.INPUT_TENSOR_NAME)
for _ in range(FLAGS.num_batches):
(features, labels) = data #or use: _get_examples_batch(dir1, dir2)
[num_steps, loss_value, _] = sess.run(
[global_step, loss, train_op],
feed_dict={features_input: features, labels_input: labels})
print('Step %d: loss %g' % (num_steps, loss_value))
if __name__ == '__main__':
tf.app.run()
Full error:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1379 try:
-> 1380 return fn(*args)
1381 except errors.OpError as e:
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1362 self._extend_graph()
-> 1363 return self._call_tf_sessionrun(options, feed_dict, fetch_list,
1364 target_list, run_metadata)
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1455 run_metadata):
-> 1456 return tf_session.TF_SessionRun_wrapper(self._session, options, feed_dict,
1457 fetch_list, target_list,
InvalidArgumentError: Matrix size-incompatible: In[0]: [2,761856], In[1]: [12288,4096]
[[{{node vggish/fc1/fc1_1/Relu}}]]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_19908/2146454281.py in <module>
66
67 if __name__ == '__main__':
---> 68 tf.app.run()
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\platform\app.py in run(main, argv)
38 main = main or _sys.modules['__main__'].main
39
---> 40 _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py in run(main, argv, flags_parser)
310 callback()
311 try:
--> 312 _run_main(main, args)
313 except UsageError as error:
314 usage(shorthelp=True, detailed_error=error, exitcode=error.exitcode)
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py in _run_main(main, argv)
256 sys.exit(retval)
257 else:
--> 258 sys.exit(main(argv))
259
260
~\AppData\Local\Temp/ipykernel_19908/2146454281.py in main(_)
60 for _ in range(FLAGS.num_batches):
61 (features, labels) = data #or use: _get_examples_batch(dir1, dir2)
---> 62 [num_steps, loss_value, _] = sess.run(
63 [global_step, loss, train_op],
64 feed_dict={features_input: features, labels_input: labels})
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
968
969 try:
--> 970 result = self._run(None, fetches, feed_dict, options_ptr,
971 run_metadata_ptr)
972 if run_metadata:
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1191 # or if the call is a partial run that specifies feeds.
1192 if final_fetches or final_targets or (handle and feed_dict_tensor):
-> 1193 results = self._do_run(handle, final_targets, final_fetches,
1194 feed_dict_tensor, options, run_metadata)
1195 else:
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1371
1372 if handle is None:
-> 1373 return self._do_call(_run_fn, feeds, fetches, targets, options,
1374 run_metadata)
1375 else:
~\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1397 '\nsession_config.graph_options.rewrite_options.'
1398 'disable_meta_optimizer = True')
-> 1399 raise type(e)(node_def, op, message) # pylint: disable=no-value-for-parameter
1400
1401 def _extend_graph(self):
InvalidArgumentError: Matrix size-incompatible: In[0]: [2,761856], In[1]: [12288,4096]
[[node vggish/fc1/fc1_1/Relu
(defined at C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py:1912)
]]
Errors may have originated from an input operation.
Input Source operations connected to node vggish/fc1/fc1_1/Relu:
In[0] vggish/fc1/fc1_1/BiasAdd (defined at C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\keras\layers\core.py:1250)
Operation defined at: (most recent call last)
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\runpy.py", line 197, in _run_module_as_main
>>> return _run_code(code, main_globals, None,
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\runpy.py", line 87, in _run_code
>>> exec(code, run_globals)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>> app.launch_new_instance()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>> app.start()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
>>> self.io_loop.start()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>> self.asyncio_loop.run_forever()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\base_events.py", line 596, in run_forever
>>> self._run_once()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\base_events.py", line 1890, in _run_once
>>> handle._run()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\events.py", line 80, in _run
>>> self._context.run(self._callback, *self._args)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>> await self.process_one()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>> await dispatch(*args)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>> await result
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>> reply_content = await reply_content
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
>>> res = shell.run_cell(code, store_history=store_history, silent=silent)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
>>> return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
>>> result = self._run_cell(
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
>>> return runner(coro)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
>>> coro.send(None)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
>>> has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
>>> if (await self.run_code(code, result, async_=asy)):
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
>>> exec(code_obj, self.user_global_ns, self.user_ns)
>>>
>>> File "C:\Users\bw339\AppData\Local\Temp/ipykernel_19908/2146454281.py", line 68, in <module>
>>> tf.app.run()
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py", line 312, in run
>>> _run_main(main, args)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py", line 258, in _run_main
>>> sys.exit(main(argv))
>>>
>>> File "C:\Users\bw339\AppData\Local\Temp/ipykernel_19908/2146454281.py", line 8, in main
>>> embeddings = vggish_slim.define_vggish_slim(training=FLAGS.train_vggish)
>>>
>>> File "C:\Users\bw339\models\research\audioset\vggish\Untitled Folder\vggish_slim.py", line 102, in define_vggish_slim
>>> net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py", line 2648, in repeat
>>> outputs = layer(outputs, *args, **kwargs)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\ops\arg_scope.py", line 184, in func_with_args
>>> return func(*args, **current_args)
>>>
>>> File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py", line 1912, in fully_connected
>>> outputs = activation_fn(outputs)
>>>
Original stack trace for 'vggish/fc1/fc1_1/Relu':
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
app.start()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
self.io_loop.start()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
self.asyncio_loop.run_forever()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\base_events.py", line 596, in run_forever
self._run_once()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\base_events.py", line 1890, in _run_once
handle._run()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\asyncio\events.py", line 80, in _run
self._context.run(self._callback, *self._args)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
await self.process_one()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
await dispatch(*args)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
await result
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
reply_content = await reply_content
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
result = self._run_cell(
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
return runner(coro)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\bw339\AppData\Local\Temp/ipykernel_19908/2146454281.py", line 68, in <module>
tf.app.run()
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py", line 312, in run
_run_main(main, args)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\absl\app.py", line 258, in _run_main
sys.exit(main(argv))
File "C:\Users\bw339\AppData\Local\Temp/ipykernel_19908/2146454281.py", line 8, in main
embeddings = vggish_slim.define_vggish_slim(training=FLAGS.train_vggish)
File "C:\Users\bw339\models\research\audioset\vggish\Untitled Folder\vggish_slim.py", line 102, in define_vggish_slim
net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py", line 2648, in repeat
outputs = layer(outputs, *args, **kwargs)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\ops\arg_scope.py", line 184, in func_with_args
return func(*args, **current_args)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tf_slim\layers\layers.py", line 1912, in fully_connected
outputs = activation_fn(outputs)
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 10553, in relu
_, _, _op, _outputs = _op_def_library._apply_op_helper(
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 744, in _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\framework\ops.py", line 3697, in _create_op_internal
ret = Operation(
File "C:\Users\bw339\Anaconda3\envs\test-vggishtraining-env\lib\site-packages\tensorflow\python\framework\ops.py", line 2101, in __init__
self._traceback = tf_stack.extract_stack_for_node(self._c_op)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
