'I have an error in my code that I cannot fix
I'm trying to write a code that can help me to better understand some wheaters data but I cannot go ahead because this error : The problem sould be the loades data.
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
821 try:
--> 822 tasks = self._ready_batches.get(block=False)
823 except queue.Empty:
C:\ProgramData\Anaconda3\lib\queue.py in get(self, block, timeout)
167 if not self._qsize():
--> 168 raise Empty
169 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_16492/421023720.py in <module>
37 for name, model in models:
38 kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
---> 39 cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
40 results.append(cv_results)
41 names.append(name)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
443 scorer = check_scoring(estimator, scoring=scoring)
444
--> 445 cv_results = cross_validate(estimator=estimator, X=X, y=y, groups=groups,
446 scoring={'score': scorer}, cv=cv,
447 n_jobs=n_jobs, verbose=verbose,
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
248 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
249 pre_dispatch=pre_dispatch)
--> 250 results = parallel(
251 delayed(_fit_and_score)(
252 clone(estimator), X, y, scorers, train, test, verbose, None,
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1045
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
831 big_batch_size = batch_size * n_jobs
832
--> 833 islice = list(itertools.islice(iterator, big_batch_size))
834 if len(islice) == 0:
835 return False
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in <genexpr>(.0)
248 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
249 pre_dispatch=pre_dispatch)
--> 250 results = parallel(
251 delayed(_fit_and_score)(
252 clone(estimator), X, y, scorers, train, test, verbose, None,
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
330 .format(self.n_splits, n_samples))
331
--> 332 for train, test in super().split(X, y, groups):
333 yield train, test
334
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in split(self, X, y, groups)
78 X, y, groups = indexable(X, y, groups)
79 indices = np.arange(_num_samples(X))
---> 80 for test_index in self._iter_test_masks(X, y, groups):
81 train_index = indices[np.logical_not(test_index)]
82 test_index = indices[test_index]
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _iter_test_masks(self, X, y, groups)
691
692 def _iter_test_masks(self, X, y=None, groups=None):
--> 693 test_folds = self._make_test_folds(X, y)
694 for i in range(self.n_splits):
695 yield test_folds == i
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in _make_test_folds(self, X, y)
643 allowed_target_types = ('binary', 'multiclass')
644 if type_of_target_y not in allowed_target_types:
--> 645 raise ValueError(
646 'Supported target types are: {}. Got {!r} instead.'.format(
647 allowed_target_types, type_of_target_y))
ValueError: Supported target types are: ('binary', 'multiclass'). Got 'unknown' instead.
This is the error that I got. I'm attaching the entire code with data, can you take a look?
#load dataset
import pandas as pd
df = pd.read_csv (r'C:\Users\danie\OneDrive\Desktop\Prod_dataset.csv')
print (df)
#import libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# Split-out validation dataset
import numpy as np
from sklearn.model_selection import train_test_split
array = df.values
X = array[:,1:60]
y = array[:,60]
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1)
# Spot Check Algorithms
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold=StratifiedKFold(n_splits=10,random_state=1,shuffle=True)
cv_results=cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy' )
results.append(cv_results)
names.append(name)
print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
This is Prod_dataset.xlsx file data that I took.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
