'ValueError: Found array with 0 feature(s) (shape=(54, 0)) while a minimum of 1 is required
I want to perform PCA on a list of dataframes, such as cna. My code raised ValueError: Found array with 0 feature(s) (shape=(54, 0)) while a minimum of 1 is required.
class StringConverter(dict):
def __contains__(self, item):
return True
def __getitem__(self, item):
return str
def get(self, default=None):
return str
def get_pathways():
pathways = []
with open(dir + "hsa.txt", newline='') as csvfile:
freader = csv.reader(csvfile, delimiter='\t')
for row in freader:
pathways.append(row)
return pathways
class DataProcessing:
def __init__(self, data, header=0):
self.df = pd.read_csv(data, sep="\t", header=header)
def split_data(self):
X = self.df.iloc[:, :-1]
y = self.df.iloc[:, -1]
return X, y
def pca(self):
pca = PCA()
if np.any(np.isnan(self.df)):
pass
elif np.all(np.isfinite(self.df)):
pass
else:
pca.fit(self.df.iloc[1:, 3:])
self.pca_components = pca.components_
return self.pca_components
def main():
cna = DataProcessing(dir + "data_linear_cna.txt")
patients_source = DataProcessing(dir + "data_clinical_patient_reduced.txt", 0)
patients = {}
# get list of allowed patients
for index, row in patients_source.df.iterrows():
if row.OS_MONTHS != '[Not Available]':
if float(row.OS_MONTHS) > 2 * 12:
patients[row.PATIENT_ID] = 1
pathways = get_pathways()
#### Process the CNA data
q = 5
C = []
G = []
M = []
# find common samples
n = cna.df.shape[1]
common_samples = {}
cna_sample_index = {}
for i in range(2, n):
sample_name = list(cna.df)[i][0:-3]
if sample_name in patients:
common_samples[sample_name] = 1
cna_sample_index[sample_name] = i
ordered_common_samples = list(common_samples.keys())
ordered_common_samples.sort()
process_cna = True
if process_cna:
C = []
for i, p in enumerate(pathways):
allowed_symbols = {}
first = True
for s in p:
if first:
first = False
else:
if s != 'NA':
allowed_symbols[s] = 1
# going through each sample
B = []
allowed_columns = []
for s in ordered_common_samples:
B.append([])
allowed_columns.append(cna_sample_index[s])
Bi = 0
for index, row in cna.df.iterrows():
if row[0].upper() in allowed_symbols:
Bi2 = Bi
for c in allowed_columns:
B[Bi2].append(cna.df.iloc[index, c])
Bi2 = Bi2 + 1
pca_B = PCA()
pca_B.fit(B)
C.append(pca_B.transform(B)[:, 0:q])
cna.pca()
main()
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-59-d61a65bf4d9e> in <module>()
248
249
--> 250 main()
4 frames
/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
815 "Found array with %d feature(s) (shape=%s) while"
816 " a minimum of %d is required%s."
--> 817 % (n_features, array.shape, ensure_min_features, context)
818 )
819
ValueError: Found array with 0 feature(s) (shape=(54, 0)) while a minimum of 1 is required.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
