'How to skip problematic hyperparameter combinations when tuning models using Keras Tuner?

When using Keras Tuner, there doesn't seem to be a way to allow the skipping of a problematic combination of hyperparams. For example, the number of filters in a Conv1D layer may not be compatible with all values of pool size in the following MaxPooling1D layer and thus lead to an error in model building. However, this may not be known before running the tuner. Once the tuner is run, this will lead to an error that will terminate the whole tuning process. Is there a way to skip any hyperparam combinations that result in an error?

Sample code:

def model_builder(hp):
    model = Sequential()
    model.add(
        Embedding(
            input_dim=hp.Int(
                'vocab_size', 
                min_value=4000,
                max_value=10000,
                step=1000,
                default=4000
            ), 
            output_dim=hp.Choice(
                'embedding_dim',
                values=[32, 64, 128, 256],
                default=32
            ), 
            input_length=hp.Int(
                'max_length',
                min_value=50,
                max_value=200,
                step=10,
                default=50
            )
        )
    )
    model.add(
        Conv1D(
            filters=hp.Choice(
                'num_filters_1',
                values=[32, 64],
                default=32
            ), 
            kernel_size=hp.Choice(
                'kernel_size_1',
                values=[3, 5, 7, 9],
                default=7
            ),
            activation='relu'
        )
    )
    model.add(
        MaxPooling1D(
            pool_size=hp.Choice(
                'pool_size', 
                values=[3, 5],
                default=5
            )
        )
    )
    model.add(
        Conv1D(
            filters=hp.Choice(
                'num_filters_2',
                values=[32, 64],
                default=32
            ), 
            kernel_size=hp.Choice(
                'kernel_size_2',
                values=[3, 5, 7, 9],
                default=7
            ), 
            activation='relu'
        )
    )
    model.add(
        GlobalMaxPooling1D()
    )
    model.add(
        Dropout(
            rate=hp.Float(
                'dropout_1',
                min_value=0.0,
                max_value=0.5,
                default=0.5,
                step=0.05
            )
        )
    )
    model.add(
        Dense(
            units=hp.Int(
                'units',
                min_value=10,
                max_value=100,
                step=10,
                default=10
            ), 
            kernel_regularizer=tf.keras.regularizers.l2(
                hp.Float(
                    'regularizer_1',
                    min_value=1e-4,
                    max_value=1e-1,
                    sampling='LOG',
                    default=1e-2
                )
            ), 
            activation='relu'
        )
    )
    model.add(
        Dropout(
            hp.Float(
                'dropout_2',
                min_value=0.0,
                max_value=0.5,
                default=0.5,
                step=0.05
            )
        )
    )
    model.add(
        Dense(
            1, 
            kernel_regularizer=tf.keras.regularizers.l2(
                hp.Float(
                    'regularizer_2',
                    min_value=1e-4,
                    max_value=1e-1,
                    sampling='LOG',
                    default=1e-2
                )
            ), 
            activation='sigmoid'
        )
    )

    
    model.compile(
        loss='binary_crossentropy', 
        optimizer=hp.Choice(
            'optimizer',
            values=['rmsprop', 'adam', 'sgd']
        ), 
        metrics=['accuracy']
    )
    
    return model


tuner = kt.Hyperband(
    model_builder,
    objective='val_accuracy', 
    max_epochs=20,
    #factor=3,
    directory='my_dir',
    project_name='cec',
    seed=seed
)   

class ClearTrainingOutput(tf.keras.callbacks.Callback):
  def on_train_end(*args, **kwargs):
    IPython.display.clear_output(wait=True)
    
tuner.search(
    X_train, 
    y_train, 
    epochs=20, 
    validation_data=(X_test, y_test), 
    callbacks=[ClearTrainingOutput()]
)

The error message:

Epoch 1/3
WARNING:tensorflow:Model was constructed with shape (None, 150) for input Tensor("embedding_input:0", shape=(None, 150), dtype=float32), but it was called on an input with incompatible shape (32, 50).
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-62-16a1eae457d8> in <module>
      3     IPython.display.clear_output(wait=True)
      4 
----> 5 tuner.search(
      6     X_train,
      7     y_train,

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/base_tuner.py in search(self, *fit_args, **fit_kwargs)
    128 
    129             self.on_trial_begin(trial)
--> 130             self.run_trial(trial, *fit_args, **fit_kwargs)
    131             self.on_trial_end(trial)
    132         self.on_search_end()

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/tuners/hyperband.py in run_trial(self, trial, *fit_args, **fit_kwargs)
    385             fit_kwargs['epochs'] = hp.values['tuner/epochs']
    386             fit_kwargs['initial_epoch'] = hp.values['tuner/initial_epoch']
--> 387         super(Hyperband, self).run_trial(trial, *fit_args, **fit_kwargs)
    388 
    389     def _build_model(self, hp):

~/anaconda3/envs/cec/lib/python3.8/site-packages/kerastuner/engine/multi_execution_tuner.py in run_trial(self, trial, *fit_args, **fit_kwargs)
     94 
     95             model = self.hypermodel.build(trial.hyperparameters)
---> 96             history = model.fit(*fit_args, **copied_fit_kwargs)
     97             for metric, epoch_values in history.history.items():
     98                 if self.oracle.objective.direction == 'min':

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
     64   def _method_wrapper(self, *args, **kwargs):
     65     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
---> 66       return method(self, *args, **kwargs)
     67 
     68     # Running inside `run_distribute_coordinator` already.

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
    846                 batch_size=batch_size):
    847               callbacks.on_train_batch_begin(step)
--> 848               tmp_logs = train_function(iterator)
    849               # Catch OutOfRangeError for Datasets of unknown size.
    850               # This blocks until the batch has finished executing.

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
    578         xla_context.Exit()
    579     else:
--> 580       result = self._call(*args, **kwds)
    581 
    582     if tracing_count == self._get_tracing_count():

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
    625       # This is the first call of __call__, so we have to initialize.
    626       initializers = []
--> 627       self._initialize(args, kwds, add_initializers_to=initializers)
    628     finally:
    629       # At this point we know that the initialization is complete (or less

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
    503     self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
    504     self._concrete_stateful_fn = (
--> 505         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
    506             *args, **kwds))
    507 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2444       args, kwargs = None, None
   2445     with self._lock:
-> 2446       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2447     return graph_function
   2448 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
   2775 
   2776       self._function_cache.missed.add(call_context_key)
-> 2777       graph_function = self._create_graph_function(args, kwargs)
   2778       self._function_cache.primary[cache_key] = graph_function
   2779       return graph_function, args, kwargs

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2655     arg_names = base_arg_names + missing_arg_names
   2656     graph_function = ConcreteFunction(
-> 2657         func_graph_module.func_graph_from_py_func(
   2658             self._name,
   2659             self._python_function,

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    979         _, original_func = tf_decorator.unwrap(python_func)
    980 
--> 981       func_outputs = python_func(*func_args, **func_kwargs)
    982 
    983       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
    439         # __wrapped__ allows AutoGraph to swap in a converted function. We give
    440         # the function a weak reference to itself to avoid a reference cycle.
--> 441         return weak_wrapped_fn().__wrapped__(*args, **kwds)
    442     weak_wrapped_fn = weakref.ref(wrapped_fn)
    443 

~/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:531 train_step  **
        y_pred = self(x, training=True)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py:277 call
        return super(Sequential, self).call(inputs, training=training, mask=mask)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:717 call
        return self._run_internal_graph(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py:888 _run_internal_graph
        output_tensors = layer(computed_tensors, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:927 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py:207 call
        outputs = self._convolution_op(inputs, self.kernel)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1106 __call__
        return self.conv_op(inp, filter)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:638 __call__
        return self.call(inp, filter)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:231 __call__
        return self.conv_op(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:220 _conv1d
        return conv1d(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
        return func(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/util/deprecation.py:574 new_func
        return func(*args, **kwargs)
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/nn_ops.py:1655 conv1d
        result = gen_nn_ops.conv2d(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/ops/gen_nn_ops.py:965 conv2d
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py:742 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:3319 _create_op_internal
        ret = Operation(
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1816 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    /home/george/anaconda3/envs/cec/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:1657 _create_c_op
        raise ValueError(str(e))

    ValueError: Negative dimension size caused by subtracting 7 from 6 for '{{node sequential/conv1d_1/conv1d}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential/conv1d_1/conv1d/ExpandDims, sequential/conv1d_1/conv1d/ExpandDims_1)' with input shapes: [32,1,6,32], [1,7,32,32].



Solution 1:[1]

I myself have been looking for a solution to this problem for a very long time and found it. Yes, not very elegant, but it works. I'll leave it here, maybe it will help someone else. The point is to wrap the model construction in a try-except block and, if a Value Error occurs, build a highly simplified model. The important thing here is to create your own loss function that would return too large a loss value, which we could catch with our own callback function and stop training the model (code with an example for convolutional neural networks attached) P.S. It would be possible to make your own loss function return NaN, and catch it with a ready-made callback function TerminateOnNaN(). But for some reason keras-tuner thinks that NaN < any number, and therefore it will give the value NaN for best val_loss

def invalid_loss(y_true, y_pred):
    return keras.losses.BinaryCrossentropy()(y_true, y_pred) + 2000000


def invalid_model():
    model = keras.Sequential()

    model.add(layers.Input((input_shape)))

    model.add(layers.Resizing(height=2, width=2))

    model.add(layers.Conv2D(filters=1,
                            kernel_size=2,
                            activation='relu',
                           ))

    model.add(layers.GlobalMaxPooling2D())

    model.add(layers.Dense(units=output_shape,
                           activation="sigmoid",
                          ))

    model.compile(optimizer="Adam",
                  loss=invalid_loss,
                  metrics=[metrics.BinaryAccuracy()])

    return model


def build_model(hp):
    try:
       model = keras.Sequential()

       model.add(layers.Input((input_shape)))

       ...

       model.add(layers.Dense(units=output_shape, 
                              activation=dense_activation))

       model.compile(optimizer="Adam",
                     loss=losses.BinaryCrossentropy(),
                     metrics=[metrics.BinaryAccuracy()])
    
    except ValueError:
        model = invalid_model()

    return model

And here is an example of your own callback, which would stop training so as not to waste time on a "invalid" model

class EarlyStoppingByLoss(keras.callbacks.Callback):
    def __init__(self, max_loss):
        self.max_loss = max_loss

    def on_train_batch_end(self, batch, logs=None):
        if logs["loss"] >= self.max_loss:
            self.model.stop_training = True

You can also control oversized models (for example, if you use Flatten() when switching from convolutional layers to fully connected ones)

from keras.utils.layer_utils import count_params

class EarlyStoppingByModelSize(keras.callbacks.Callback):
    def __init__(self, max_size):
        self.max_size = max_size

    def on_train_begin(self, logs=None):
        trainable_count = count_params(self.model.trainable_weights)
        if trainable_count > self.max_size:
            self.model.stop_training = True

And, accordingly, at the end we add these callbacks to the list and use them when training the model

callbacks = []
callbacks.append(EarlyStoppingByLoss(900000))
callbacks.append(EarlyStoppingByModelSize(12000000))

tuner.search(x=x_train,
             y=y_train,
             epochs=epochs,
             validation_data=(x_test, y_test),
             callbacks=callbacks,
             verbose=1,
             batch_size=batch_size)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 NoobLock