'I have an error in my code that I cannot fix

I'm trying to write a code that can help me to better understand some wheaters data but I cannot go ahead because this error : The problem sould be the loades data.

---------------------------------------------------------------------------
Empty                                     Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    821             try:
--> 822                 tasks = self._ready_batches.get(block=False)
    823             except queue.Empty:

C:\ProgramData\Anaconda3\lib\queue.py in get(self, block, timeout)
    167                 if not self._qsize():
--> 168                     raise Empty
    169             elif timeout is None:

Empty: 

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16492/421023720.py in <module>
     37 for name, model in models:
     38         kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
---> 39         cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
     40         results.append(cv_results)
     41         names.append(name)

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    443     scorer = check_scoring(estimator, scoring=scoring)
    444 
--> 445     cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
    446                                 scoring={'score': scorer}, cv=cv,
    447                                 n_jobs=n_jobs, verbose=verbose,

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    248     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
    249                         pre_dispatch=pre_dispatch)
--> 250     results = parallel(
    251         delayed(_fit_and_score)(
    252             clone(estimator), X, y, scorers, train, test, verbose, None,

C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1041             # remaining jobs.
   1042             self._iterating = False
-> 1043             if self.dispatch_one_batch(iterator):
   1044                 self._iterating = self._original_iterator is not None
   1045 

C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    831                 big_batch_size = batch_size * n_jobs
    832 
--> 833                 islice = list(itertools.islice(iterator, big_batch_size))
    834                 if len(islice) == 0:
    835                     return False

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in <genexpr>(.0)
    248     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
    249                         pre_dispatch=pre_dispatch)
--> 250     results = parallel(
    251         delayed(_fit_and_score)(
    252             clone(estimator), X, y, scorers, train, test, verbose, None,

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
    330                 .format(self.n_splits, n_samples))
    331 
--> 332         for train, test in super().split(X, y, groups):
    333             yield train, test
    334 

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
     78         X, y, groups = indexable(X, y, groups)
     79         indices = np.arange(_num_samples(X))
---> 80         for test_index in self._iter_test_masks(X, y, groups):
     81             train_index = indices[np.logical_not(test_index)]
     82             test_index = indices[test_index]

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _iter_test_masks(self, X, y, groups)
    691 
    692     def _iter_test_masks(self, X, y=None, groups=None):
--> 693         test_folds = self._make_test_folds(X, y)
    694         for i in range(self.n_splits):
    695             yield test_folds == i

C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _make_test_folds(self, X, y)
    643         allowed_target_types = ('binary', 'multiclass')
    644         if type_of_target_y not in allowed_target_types:
--> 645             raise ValueError(
    646                 'Supported target types are: {}. Got {!r} instead.'.format(
    647                     allowed_target_types, type_of_target_y))

ValueError: Supported target types are: ('binary', 'multiclass'). Got 'unknown' instead.

This is the error that I got. I'm attaching the entire code with data, can you take a look?

#load dataset
import pandas as pd
df = pd.read_csv (r'C:\Users\danie\OneDrive\Desktop\Prod_dataset.csv')
print (df)
#import libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# Split-out validation dataset
import numpy as np
from sklearn.model_selection import train_test_split
array = df.values
X = array[:,1:60]
y = array[:,60]
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1)

# Spot Check Algorithms
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))

# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold=StratifiedKFold(n_splits=10,random_state=1,shuffle=True)
    cv_results=cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy' )
    results.append(cv_results)
    names.append(name)
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

This is Prod_dataset.xlsx file data that I took.



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source