'calculate precision and recall for finetuned bert model using keras ( ValueError: Shapes (8, 2) and (8, 1) are incompatible)

I tried different ways of calculating precision and recall for the fine tuned model but the code show this error. /usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs) 1145 except Exception as e: # pylint:disable=broad-except 1146 if hasattr(e, "ag_error_metadata"): -> 1147 raise e.ag_error_metadata.to_exception(e) 1148 else: 1149 raise

ValueError: in user code:

File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function  *
    return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function  **
    outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step  **
    outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/transformers/modeling_tf_utils.py", line 922, in train_step
    self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py", line 459, in update_state
    metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 70, in decorated
    update_op = update_state_fn(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 178, in update_state_fn
    return ag_update_state(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/keras/metrics.py", line 1414, in update_state  **
    sample_weight=sample_weight)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py", line 619, in update_confusion_matrix_variables
    y_pred.shape.assert_is_compatible_with(y_true.shape)

ValueError: Shapes (8, 2) and (8, 1) are incompatible

This is my code:

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
import tensorflow as tf
from tensorflow.keras.losses import BinaryCrossentropy,SparseCategoricalCrossentropy
from transformers import TFAutoModelForSequenceClassification
import torch

import keras.backend as K

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

metrics = [
           # tf.keras.metrics.BinaryAccuracy(name='accuracy'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
             ]
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

model.compile(
optimizer="adam",
loss=SparseCategoricalCrossentropy(from_logits=False),
metrics=metrics,
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
kfold = KFold(n_splits=5, shuffle=True, random_state=3)  
scores = []

for (train_fold_index, val_fold_index) in KFold(n_splits=2,shuffle=True).split(tokenized_datasets['train']['input_ids'],tokenized_datasets['train']['labels']):

      train_fold= tokenized_datasets['train'].select(train_fold_index).to_tf_dataset( 
        columns=["attention_mask", "input_ids", "token_type_ids"],
        label_cols=["labels"],
        shuffle=True,
        collate_fn=data_collator,
        batch_size=8,
        
    )
      
      
      val_fold = tokenized_datasets['train'].select(val_fold_index).to_tf_dataset( 
        columns=["attention_mask", "input_ids", "token_type_ids"],
        label_cols=["labels"],
        shuffle=True,
        collate_fn=data_collator,
        batch_size=8,
    )

      history= model.fit(train_fold,
                          validation_data = val_fold
                          ,epochs=1
                          , callbacks=[callback]
                          #,batch_size=32 batch_size={{choice([16,32,128])}}
                          )

      #score_ev = model.evaluate(X_val_fold,y_val_fold)
      #precision = history.history['val_precision'][len(history.history['val_precision'])-1]
      #recall = history.history['val_recall'][len(history.history['val_recall'])-1]
      #score = 2*(precision*recall)/(precision+recall+K.epsilon())
      #scores.append(score)
      scores.append( history.history['val_accuracy'][len(history.history['val_accuracy'])-1])
avg_score = np.mean(scores)    
print('F1', avg_score)


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source