'How do I write a DataProcessing function that has an attribute to obtain the pandas dataframe index and column?
I defined a DataProcessing class before loading my data in load_data. I want to concatenate the meth27 and meth450 dataframes to form the meth dataframe.
Finally, I want to find the sets of columns and indexes of meth, mrna, and cna dataframes to keep only rows/columns that exist in all three dataframes.
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from reactome2py import analysis
import csv
directory = "./gbm_tcga/"
class DataProcessing:
def __init__(self, data):
self.df = pd.read_csv(data, sep="\t").drop("Entrez_Gene_Id", axis=1, errors="ignore")
self.df = self.df.loc[:, ~self.df.columns.duplicated()]
self.df = self.df.set_index("Hugo_Symbol")
self.df = self.df.sort_index()
def split_data(self):
X = self.df.iloc[:, :-1]
y = self.df.iloc[:, -1]
return X, y
def pca(self):
pca = PCA()
if np.any(np.isnan(self.df)):
pass
elif np.all(np.isfinite(self.df)):
pass
else:
pca.fit(self.df.iloc[1:, 3:])
self.pca_components = pca.components
return self.pca_components
def load_data():
meth27 = DataProcessing(directory + "data_methylation_hm27.txt")
meth450 = DataProcessing(directory + "data_methylation_hm450.txt")
meth = pd.concat([meth27.df, meth450.df], axis=1)
meth.to_csv(directory + "methylation_merged.csv")
cna = DataProcessing(directory + "data_linear_cna.txt")
mrna = DataProcessing(directory + "data_mrna_affymetrix_microarray_zscores_ref_all_samples.txt")
# Retrieve only omics cases and genes that exist in all three omics types
common_index = set(mrna.index) & set(meth.index) & set(cna.df.index)
common_column = set(mrna.columns) & set(meth.columns) & set(cna.columns)
common = mrna.loc[common_index, common_column]
return common
Traceback:
Traceback (most recent call last):
File "../main.py", line 66, in <module>
ld = load_data()
File "../main.py", line 60, in load_data
common_index = set(mrna.index) & set(meth.index) & set(cna.index)
AttributeError: 'DataProcessing' object has no attribute 'index'
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
