'Target Data Missing from tensorflow fit()

So I have a problem when train deep learning with BERT with tensorflow which contain text dataset. So i want to fit() the model but got an error when training. I think it happen because the data_train did't have the label. But from my research It also same problem like SO question in here Same problem. Since it didn't have a answer is this a bug? The error is like this

ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.

My code like this

X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))

def generate_training_data(df, ids, masks, tokenizer):
    for i, text in tqdm(enumerate(df['text'])):
        tokenized_text = tokenizer.encode_plus(
        ids[i, :] = tokenized_text.input_ids
        masks[i, :] = tokenized_text.attention_mask
    return ids, masks

X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)

labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1

dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))

def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
    return {
        'input_ids': input_ids,
        'attention_mask': attn_masks

dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)

p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')

bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes

sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)

optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])

hist = sentiment_model.fit(

Solution 1:[1]

I spend a bit of time finding something we can update and I download the model from the websites.

[ Sample ]:

import tensorflow as tf
import tensorflow_text as text  # Registers the ops.
import tensorflow_hub as hub

import os
from os.path import exists

def generate_training_data(train_labels):
    input_ids = [ ]
    attn_masks = [ ]
    labels = [ ]
    for item in train_labels:
        input_ids.append( str(item) )
        attn_masks.append( int(1) )
        labels.append( item )

    attn_masks = tf.constant(attn_masks, shape=(1, len(attn_masks),1), dtype=tf.float32)
    labels = tf.constant(labels, shape=(1, len(labels),1), dtype=tf.int64)
    input_ids = tf.constant(input_ids, shape=(1, len(input_ids),1), dtype=tf.string)
    return input_ids, attn_masks, labels

def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
    return {
        'input_ids': input_ids,
        'attention_mask': attn_masks,
        'labels': labels
def build_classifier_model():
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.1)(net)
  net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
  return tf.keras.Model(text_input, net)
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

X_input_ids, X_attn_masks, labels = generate_training_data(train_labels)
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks))
options = tf.saved_model.LoadOptions(

text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='sentences')
preprocessor = hub.KerasLayer(export_dir)
encoder_inputs = preprocessor(text_input)
encoder = hub.KerasLayer( export_dir_2, trainable=False, load_options=options)
outputs = encoder(encoder_inputs)

intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(outputs['default'])
output_layer = tf.keras.layers.Dense(1, activation='softmax', name='output_layer')(intermediate_layer)
sentiment_model = tf.keras.Model(inputs=[text_input], outputs=output_layer)

optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])

hist = sentiment_model.fit(

[ Output ]:

outputs: KerasTensor(type_spec=TensorSpec(shape=(None, 512), dtype=tf.float32, name=None), name='keras_layer_1/StatefulPartitionedCall:0', description="created by layer 'keras_layer_1'")
Model: "model"
 Layer (type)                   Output Shape         Param #     Connected to
 sentences (InputLayer)         [(None,)]            0           []

 keras_layer (KerasLayer)       {'input_mask': (Non  0           ['sentences[0][0]']
                                e, 128),
                                (None, 128),
                                (None, 128)}

 keras_layer_1 (KerasLayer)     {'default': (None,   28763649    ['keras_layer[0][0]',
                                512),                             'keras_layer[0][1]',
                                 'encoder_outputs':               'keras_layer[0][2]']
                                 [(None, 128, 512),
                                 (None, 128, 512),
                                 (None, 128, 512),
                                 (None, 128, 512)],
                                 (None, 128, 512),
                                 'pooled_output': (
                                None, 512)}

 intermediate_layer (Dense)     (None, 512)          262656      ['keras_layer_1[0][0]']

 output_layer (Dense)           (None, 1)            513         ['intermediate_layer[0][0]']

Total params: 29,026,818
Trainable params: 263,169
Non-trainable params: 28,763,649
Epoch 1/2



