'Sklearn: only size-1 arrays can be converted to Python scalars
When I tried to take the word_vector transformed from Chinese as the feature of sklearn,an error occurred. The shape of x_train and word_vector are (747,) and (1,100) and the latter's dtype is float64
for this question, I guess the type of the data may be different, but i tried to traverse all the data, it was ok ……
Here are the code:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
import SZ_function as sz
import gensim
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
def remove_stop_words(text):
stop_words = sz.get_step_words('notebook/HIT.txt')
text = text.split()
word_list = ''
for word in text:
if word not in stop_words:
word_list += word
word_list += ' '
return word_list
def pre_process(path):
data = pd.read_excel(path)
data['text'] = data['text'].apply(sz.remove_number_en)
data['text'] = data['text'].apply(sz.cut_words)
data['text'] = data['text'].apply(remove_stop_words)
data = data.replace(to_replace='', value='None')
data = data.replace(to_replace='None', value=np.nan).dropna()
return data
def create_corpus(data):
text = data['text']
return [sentences.split() for sentences in text]
def word_vec(corpus):
model = gensim.models.word2vec.Word2Vec(corpus)
return model
def get_sent_vec(sent,model,size):
vec = np.zeros(size).reshape((1,size))
count = 0
for word in sent[1:]:
try:
vec += model.wv[word].reshape((1,size))
count += 1
except:
continue
if count != 0:
vec /= count
return vec
if __name__ == '__main__':
data = pre_process('datasets_demo.xlsx')
corpus = create_corpus(data)
model = word_vec(corpus)
data['text']=data['text'].apply(get_sent_vec,model=model,size=100)
x_train,y_train,x_test,y_test = train_test_split(data['text'],data['label'])
estimator = MultinomialNB()
estimator.fit(x_train,y_train)
here are the all trackback:
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\12996\AppData\Local\Temp\jieba.cache
Loading model cost 0.628 seconds.
Prefix dict has been built successfully.
TypeError: only size-1 arrays can be converted to Python scalars
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-8366eff678ac>", line 1, in <module>
runfile('C:/Users/12996/Desktop/Tensorflow_/datasets_demo.py', wdir='C:/Users/12996/Desktop/Tensorflow_')
File "E:\pycharm\PyCharm 2022.1\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "E:\pycharm\PyCharm 2022.1\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/12996/Desktop/Tensorflow_/datasets_demo.py", line 66, in <module>
estimator.fit(x_train,y_train)
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\naive_bayes.py", line 663, in fit
X, y = self._check_X_y(X, y)
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\naive_bayes.py", line 523, in _check_X_y
return self._validate_data(X, y, accept_sparse="csr", reset=reset)
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\base.py", line 581, in _validate_data
X, y = check_X_y(X, y, **check_params)
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\utils\validation.py", line 976, in check_X_y
estimator=estimator,
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\sklearn\utils\validation.py", line 746, in check_array
array = np.asarray(array, order=order, dtype=dtype)
File "E:\Anaconda3\envs\tensorflow-gpu\lib\site-packages\pandas\core\series.py", line 857, in __array__
return np.asarray(self._values, dtype)
ValueError: setting an array element with a sequence.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
