'AxisError: axis 1 is out of bounds for array of dimension 1 using sklearn
I have trouble using OneVsRestClassifier and cross-validation from sklearn.
train, test = train_test_split(tickets, random_state=42, test_size=0.30, shuffle=True)
X_train = train[['TK_Poids_brut', 'TK_Poids_tare']]
y_train = train['TK_Qualite']
X_test = test[['TK_Poids_brut', 'TK_Poids_tare']]
y_test = test['TK_Qualite']
le = preprocessing.LabelEncoder()
y_train_tra = le.fit_transform(y_train)
printDataInfo(X_train,y_train_tra)
#The printDataInfo function is there just to display information about X and y
clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra, error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
cross_v
When I do this I get the following error:
---------------------------------------------------------------------------
AxisError Traceback (most recent call last)
C:\TEMP/ipykernel_20332/2926737612.py in <module>
23
24 clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
---> 25 cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra ,error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
26 cross_v
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
248 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
249 pre_dispatch=pre_dispatch)
--> 250 results = parallel(
251 delayed(_fit_and_score)(
252 clone(estimator), X, y, scorers, train, test, verbose, None,
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1041 # remaining jobs.
1042 self._iterating = False
-> 1043 if self.dispatch_one_batch(iterator):
1044 self._iterating = self._original_iterator is not None
1045
~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
859 return False
860 else:
--> 861 self._dispatch(tasks)
862 return True
863
~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
777 with self._lock:
778 job_idx = len(self._jobs)
--> 779 job = self._backend.apply_async(batch, callback=cb)
780 # A job can complete so quickly than its callback is
781 # called before we get here, causing self._jobs to
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\Anaconda3\lib\site-packages\sklearn\utils\fixes.py in __call__(self, *args, **kwargs)
220 def __call__(self, *args, **kwargs):
221 with config_context(**self.config):
--> 222 return self.function(*args, **kwargs)
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
623
624 fit_time = time.time() - start_time
--> 625 test_scores = _score(estimator, X_test, y_test, scorer, error_score)
626 score_time = time.time() - start_time - fit_time
627 if return_train_score:
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer, error_score)
685 scores = scorer(estimator, X_test)
686 else:
--> 687 scores = scorer(estimator, X_test, y_test)
688 except Exception:
689 if error_score == 'raise':
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
85 for name, scorer in self._scorers.items():
86 if isinstance(scorer, _BaseScorer):
---> 87 score = scorer._score(cached_call, estimator,
88 *args, **kwargs)
89 else:
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _score(self, method_caller, estimator, X, y_true, sample_weight)
240 **self._kwargs)
241 else:
--> 242 return self._sign * self._score_func(y_true, y_pred,
243 **self._kwargs)
244
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
61 extra_args = len(args) - len(all_args)
62 if extra_args <= 0:
---> 63 return f(*args, **kwargs)
64
65 # extra_args > 0
~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)
535 if multi_class == 'raise':
536 raise ValueError("multi_class must be in ('ovo', 'ovr')")
--> 537 return _multiclass_roc_auc_score(y_true, y_score, labels,
538 multi_class, average, sample_weight)
539 elif y_type == "binary":
~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in _multiclass_roc_auc_score(y_true, y_score, labels, multi_class, average, sample_weight)
593 """
594 # validation of the input y_score
--> 595 if not np.allclose(1, y_score.sum(axis=1)):
596 raise ValueError(
597 "Target scores need to be probabilities for multiclass "
~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
45 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
46 initial=_NoValue, where=True):
---> 47 return umr_sum(a, axis, dtype, out, keepdims, initial, where)
48
49 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,
AxisError: axis 1 is out of bounds for array of dimension 1
Here is the input data format: I already tried to put both in numpy array and I tried to reshape y in (6108,1) but I always get the same error.
type :
x: <class 'pandas.core.frame.DataFrame'>
y: <class 'numpy.ndarray'>
shape :
X: (6108, 2)
y: (6108,)
data :
x: TK_Poids_brut TK_Poids_tare
8436 14420 14160
7014 17160 12320
3931 28060 15040
6749 16680 14360
2984 10060 9100
... ... ...
5734 19700 15420
5191 25380 14620
5390 19460 14760
860 16160 14100
7270 15520 14500
[6108 rows x 2 columns]
y: [132 85 160 118 118 40 88 126 12 40 41 138 5 125 125 147 111 118
153 40 118 126 118 125 123 62 177 45 118 105 3 1 105 142 116 100
118 125 118 78 124 3 126 53 138 118 40 118 53 124 126 98 118 155
118 131 5 135 130 3 118 105 118 126 105 87 118 118 24 124 130 130
...
118 124 118 180 118 58 124 126 153 126 124 118 125 153 86 94 126 118
130 105 42 62 124 78]
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
