'AxisError: axis 1 is out of bounds for array of dimension 1 using sklearn

I have trouble using OneVsRestClassifier and cross-validation from sklearn.

train, test = train_test_split(tickets, random_state=42, test_size=0.30, shuffle=True)

X_train = train[['TK_Poids_brut', 'TK_Poids_tare']]
y_train = train['TK_Qualite']
X_test = test[['TK_Poids_brut', 'TK_Poids_tare']]
y_test = test['TK_Qualite']

le = preprocessing.LabelEncoder()
y_train_tra = le.fit_transform(y_train)

printDataInfo(X_train,y_train_tra)
#The printDataInfo function is there just to display information about X and y

clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra, error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
cross_v

When I do this I get the following error:

---------------------------------------------------------------------------
AxisError                                 Traceback (most recent call last)
C:\TEMP/ipykernel_20332/2926737612.py in <module>
     23 
     24 clf_OvR_SVC = OneVsRestClassifier(LinearSVC(random_state=0))
---> 25 cross_v = cross_validate(clf_OvR_SVC, X_train, y_train_tra ,error_score="raise",scoring=dict(ac=make_scorer(accuracy_score), roc=make_scorer(roc_auc_score, multi_class="ovr")), cv=5)
     26 cross_v

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    248     parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
    249                         pre_dispatch=pre_dispatch)
--> 250     results = parallel(
    251         delayed(_fit_and_score)(
    252             clone(estimator), X, y, scorers, train, test, verbose, None,

~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1041             # remaining jobs.
   1042             self._iterating = False
-> 1043             if self.dispatch_one_batch(iterator):
   1044                 self._iterating = self._original_iterator is not None
   1045 

~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    859                 return False
    860             else:
--> 861                 self._dispatch(tasks)
    862                 return True
    863 

~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    777         with self._lock:
    778             job_idx = len(self._jobs)
--> 779             job = self._backend.apply_async(batch, callback=cb)
    780             # A job can complete so quickly than its callback is
    781             # called before we get here, causing self._jobs to

~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    206     def apply_async(self, func, callback=None):
    207         """Schedule a func to be run"""
--> 208         result = ImmediateResult(func)
    209         if callback:
    210             callback(result)

~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
    570         # Don't delay the application, to avoid keeping the input
    571         # arguments in memory
--> 572         self.results = batch()
    573 
    574     def get(self):

~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 

~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 

~\Anaconda3\lib\site-packages\sklearn\utils\fixes.py in __call__(self, *args, **kwargs)
    220     def __call__(self, *args, **kwargs):
    221         with config_context(**self.config):
--> 222             return self.function(*args, **kwargs)

~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
    623 
    624         fit_time = time.time() - start_time
--> 625         test_scores = _score(estimator, X_test, y_test, scorer, error_score)
    626         score_time = time.time() - start_time - fit_time
    627         if return_train_score:

~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer, error_score)
    685             scores = scorer(estimator, X_test)
    686         else:
--> 687             scores = scorer(estimator, X_test, y_test)
    688     except Exception:
    689         if error_score == 'raise':

~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
     85         for name, scorer in self._scorers.items():
     86             if isinstance(scorer, _BaseScorer):
---> 87                 score = scorer._score(cached_call, estimator,
     88                                       *args, **kwargs)
     89             else:

~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _score(self, method_caller, estimator, X, y_true, sample_weight)
    240                                                  **self._kwargs)
    241         else:
--> 242             return self._sign * self._score_func(y_true, y_pred,
    243                                                  **self._kwargs)
    244 

~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
     61             extra_args = len(args) - len(all_args)
     62             if extra_args <= 0:
---> 63                 return f(*args, **kwargs)
     64 
     65             # extra_args > 0

~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in roc_auc_score(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)
    535         if multi_class == 'raise':
    536             raise ValueError("multi_class must be in ('ovo', 'ovr')")
--> 537         return _multiclass_roc_auc_score(y_true, y_score, labels,
    538                                          multi_class, average, sample_weight)
    539     elif y_type == "binary":

~\Anaconda3\lib\site-packages\sklearn\metrics\_ranking.py in _multiclass_roc_auc_score(y_true, y_score, labels, multi_class, average, sample_weight)
    593     """
    594     # validation of the input y_score
--> 595     if not np.allclose(1, y_score.sum(axis=1)):
    596         raise ValueError(
    597             "Target scores need to be probabilities for multiclass "

~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
     45 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
     46          initial=_NoValue, where=True):
---> 47     return umr_sum(a, axis, dtype, out, keepdims, initial, where)
     48 
     49 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,

AxisError: axis 1 is out of bounds for array of dimension 1

Here is the input data format: I already tried to put both in numpy array and I tried to reshape y in (6108,1) but I always get the same error.

type : 

x:  <class 'pandas.core.frame.DataFrame'>
y:  <class 'numpy.ndarray'>

shape : 

X:  (6108, 2)
y:  (6108,)

data : 

x:        TK_Poids_brut  TK_Poids_tare
8436          14420          14160
7014          17160          12320
3931          28060          15040
6749          16680          14360
2984          10060           9100
...             ...            ...
5734          19700          15420
5191          25380          14620
5390          19460          14760
860           16160          14100
7270          15520          14500

[6108 rows x 2 columns]
y:  [132  85 160 118 118  40  88 126  12  40  41 138   5 125 125 147 111 118
 153  40 118 126 118 125 123  62 177  45 118 105   3   1 105 142 116 100
 118 125 118  78 124   3 126  53 138 118  40 118  53 124 126  98 118 155
 118 131   5 135 130   3 118 105 118 126 105  87 118 118  24 124 130 130
...
 118 124 118 180 118  58 124 126 153 126 124 118 125 153  86  94 126 118
 130 105  42  62 124  78]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'AxisError: axis 1 is out of bounds for array of dimension 1 using sklearn

Sources

Related Questions