'ValueError: Found input variables with inconsistent numbers of samples: [204, 745]

currently I am doing ensembling methods for deep learning with 4 available models. However, when I wish to perform sugeno ensemble, some errors pop up which are inconsistent number of samples but I did all the models using the same dataset.

Is there anywhere where I did errors? The error is ValueError: Found input variables with inconsistent numbers of samples: [204, 745].

The line with errors are line 73, 56 and 31

def getfile(filename, root="../"):
    file = root+filename+'.csv'
    df = pd.read_csv(file,header=None)
    df = np.asarray(df)

    labels=[]
    for i in range(204):
        labels.append(0)
    for i in range(745):
        labels.append(1)
    labels = np.asarray(labels)
    return df,labels

def predicting(ensemble_prob):
    prediction = np.zeros((ensemble_prob.shape[0],))
    for i in range(ensemble_prob.shape[0]):
        temp = ensemble_prob[i]
        t = np.where(temp == np.max(temp))[0][0]
        prediction[i] = t
    return prediction

def metrics(labels,predictions,classes):
    print("Classification Report:")
    print(classification_report(labels, predictions, target_names = classes,digits = 4))
    matrix = confusion_matrix(labels, predictions)
    print("Confusion matrix:")
    print(matrix)
    print("\nClasswise Accuracy :{}".format(matrix.diagonal()/matrix.sum(axis = 1)))
    print("\nBalanced Accuracy Score: ",balanced_accuracy_score(labels,predictions))

#Sugeno Integral
def ensemble_sugeno(labels,prob1,prob2,prob3,prob4):
    num_classes = prob1.shape[1]
    Y = np.zeros(prob1.shape,dtype=float)
    for samples in range(prob1.shape[0]):
        for classes in range(prob1.shape[1]):
            X = np.array([prob1[samples][classes], prob2[samples][classes], prob3[samples][classes], prob4[samples][classes] ])
            measure = np.array([1.5, 1.5, 0.01, 1.2])
            X_agg = sugeno_integral.sugeno_fuzzy_integral_generalized(X,measure)
            Y[samples][classes] = X_agg

    sugeno_pred = predicting(Y)

    correct = np.where(sugeno_pred == labels)[0].shape[0]
    total = labels.shape[0]

    print("Accuracy = ",correct/total)
    classes = ['Benign','Malignant','Normal']
    metrics(sugeno_pred,labels,classes)

if __name__ == '__main__': 
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_directory', type=str, required = True, help='Directory where data is stored')
    parser.add_argument('--epochs', type=int, default = 25, help='Number of epochs to run the models')
    args = parser.parse_args()



    data_dir = args.data_directory
          

    prob1,labels = getfile("/Kaggle_vgg11",root = data_dir)
    prob2,_ = getfile("/Kaggle_squeezenet",root = data_dir)
    prob3,_ = getfile("/Kaggle_googlenet",root = data_dir)
    prob4,_ = getfile("/Kaggle_wideresnet",root = data_dir)

    ensemble_sugeno(labels,prob1,prob2,prob3,prob4)


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source