'Loss function has negative values
I'm training a CNN for binary segmentation with a loss function that is a combination of dice coefficient and cross entropy. During the training I'm getting a loss that is negative. The dice is always positive (0-1) while the binary cross entropy (I am using sigmoid as output function) I think should be also positive. Training images were standardized with zero mean and unit standard deviation. Even normalizing images in range 0-1 the loss is always negative. I've truncated the training after the first epoch, but the loss in negative even in all the other epochs. Does anyone know why the loss is negative? Thanks for considering my request.
My code:
def Unet(input_size1 = (160,160,1), num_class=2, n_filt=32):
input_model1 = Input(input_size1)
#layer1 2D
x1 = ReLU()(BatchNormalization()(Conv2D(n_filt, 3, padding = 'same', kernel_initializer = 'he_normal')(input_model1)))
conv1 = ReLU()(BatchNormalization()(Conv2D(n_filt, 3, padding = 'same', kernel_initializer = 'he_normal')(x1)))
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
#layer2 2D
conv2 = ReLU()(BatchNormalization()(Conv2D(n_filt*2, 3, padding = 'same', kernel_initializer = 'he_normal')(pool1)))
conv2 = ReLU()(BatchNormalization()(Conv2D(n_filt*2, 3, padding = 'same', kernel_initializer = 'he_normal')(conv2)))
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
#layer3 2D
conv3 = ReLU()(BatchNormalization()(Conv2D(n_filt*4, 3, padding = 'same', kernel_initializer = 'he_normal')(pool2)))
conv3 = ReLU()(BatchNormalization()(Conv2D(n_filt*4, 3, padding = 'same', kernel_initializer = 'he_normal')(conv3)))
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
#layer4 2D
conv4 = ReLU()(BatchNormalization()(Conv2D(n_filt*8, 3, padding = 'same', kernel_initializer = 'he_normal')(pool3)))
conv4 = ReLU()(BatchNormalization()(Conv2D(n_filt*8, 3, padding = 'same', kernel_initializer = 'he_normal')(conv4)))
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
#layer5 2D
conv5 = ReLU()(BatchNormalization()(Conv2D(n_filt*16, 3, padding = 'same', kernel_initializer = 'he_normal')(pool4)))
conv5 = ReLU()(BatchNormalization()(Conv2D(n_filt*16, 3, padding = 'same', kernel_initializer = 'he_normal')(conv5)))
conv_up5 = ReLU()(BatchNormalization()(Conv2DTranspose(num_class, 4, strides=(2, 2), padding='same',activation = 'relu',kernel_initializer = 'he_normal')(conv5)))
merge6 = concatenate([conv_up5,conv4], axis = 3)
conv6 = ReLU()(BatchNormalization()(Conv2D(n_filt*8, 3, padding = 'same', kernel_initializer = 'he_normal')(merge6)))
conv6 = ReLU()(BatchNormalization()(Conv2D(n_filt*8, 3, padding = 'same', kernel_initializer = 'he_normal')(conv6)))
conv_up6 = ReLU()(BatchNormalization()(Conv2DTranspose(num_class, 4, strides=(2, 2), padding='same',activation = 'relu',kernel_initializer = 'he_normal')(conv6)))
merge7 = concatenate([conv_up6,conv3], axis = 3)
conv7 = ReLU()(BatchNormalization()(Conv2D(n_filt*4, 3, padding = 'same', kernel_initializer = 'he_normal')(merge7)))
conv7 = ReLU()(BatchNormalization()(Conv2D(n_filt*4, 3, padding = 'same', kernel_initializer = 'he_normal')(conv7)))
conv_up7 = ReLU()(BatchNormalization()(Conv2DTranspose(num_class, 4, strides=(2, 2), padding='same',activation = 'relu',kernel_initializer = 'he_normal')(conv7)))
merge8 = concatenate([conv_up7,conv2], axis = 3)
conv8 = ReLU()(BatchNormalization()(Conv2D(n_filt*2, 3, padding = 'same', kernel_initializer = 'he_normal')(merge8)))
conv8 = ReLU()(BatchNormalization()(Conv2D(n_filt*2, 3, padding = 'same', kernel_initializer = 'he_normal')(conv8)))
conv_up8 = ReLU()(BatchNormalization()(Conv2DTranspose(num_class, 4, strides=(2, 2), padding='same',activation = 'relu',kernel_initializer = 'he_normal')(conv8)))
merge9 = concatenate([conv_up8,conv1], axis = 3)
conv9 = ReLU()(BatchNormalization()(Conv2D(n_filt, 3, padding = 'same', kernel_initializer = 'he_normal')(merge9)))
conv9 = ReLU()(BatchNormalization()(Conv2D(n_filt, 3, padding = 'same', kernel_initializer = 'he_normal')(conv9)))
if num_class>2:
output=Conv2D(num_class, 1, activation = 'softmax', padding = 'same', kernel_initializer = 'he_normal')(conv9)
else:
output=Conv2D(num_class-1, 1, activation = 'sigmoid', padding = 'same', kernel_initializer = 'he_normal')(conv9)
model = Model(inputs=input_model1, outputs=output)
return model
model = Unet()
def dice_loss(delta = 0.5, smooth = 0.000001):
def loss_function(y_true, y_pred):
axis = identify_axis(y_true.get_shape())
# Calculate true positives (tp), false negatives (fn) and false positives (fp)
tp = K.sum(y_true * y_pred, axis=axis)
fn = K.sum(y_true * (1-y_pred), axis=axis)
fp = K.sum((1-y_true) * y_pred, axis=axis)
# Calculate Dice score
dice_class = (tp + smooth)/(tp + delta*fn + (1-delta)*fp + smooth)
# Average class scores
dice_loss = K.mean(1-dice_class)
return dice_loss
return loss_function
def combo_loss(alpha=0.5, beta=0.6):
def loss_function(y_true,y_pred):
dice = dice_loss()(y_true, y_pred)
axis = identify_axis(y_true.get_shape())
# Clip values to prevent division by zero error
epsilon = K.epsilon()
y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
cross_entropy = -y_true * (K.log(y_pred)) + (1-y_true) * (-K.log(1-y_pred))
if beta is not None:
beta_weight = np.array([beta, 1-beta])
cross_entropy = beta_weight * cross_entropy
# sum over classes
cross_entropy = K.mean(K.sum(cross_entropy, axis=[-1]))
if alpha is not None:
combo_loss = (alpha * cross_entropy) - ((1 - alpha) * dice)
else:
combo_loss = cross_entropy - dice
return combo_loss
return loss_function
# each img has zero mean and unit standard deviation
img_train = train_img[...,np.newaxis].astype('float32')
mask_train = train_label[...,np.newaxis].astype('float32')
img_val = val_img[...,np.newaxis].astype('float32')
mask_val = val_label[...,np.newaxis].astype('float32')
batch_size = 2
epochs = 10
initial_learning_rate = 1e-3
opt = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate)
model.compile(optimizer=opt, loss=combo_loss(),
metrics = [dice_coef])
results = model.fit(img_train, mask_train, batch_size=batch_size, epochs=epochs, validation_data=(img_val, mask_val), callbacks=[stopping, checkpoint, callback], shuffle=True)
Fitting model...
Epoch 1/10
23/23 [==============================] - ETA: 0s - loss: -0.0231 - dice_coef: 0.4117
Solution 1:[1]
The dice_loss seems wrong, if i remember correctly.
In your code dice coefficient is $(TP)/(TP+FN+FP)$, it should be $(2*TP)/(TP+FN+FP+TP)$.
And this is a better way to implemention
def dice_coeff(input, target):
inputs = input.float()
target = target.float()
noreducedim = [0] + list(range(2, len(inputs.shape)))
intersect = (inputs * target).sum(dim=noreducedim)
denominator = (inputs + target).sum(dim=noreducedim)
dices = (2 * intersect + 1e-6) / (denominator + 1e-6)
return dices
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
