'How do I write a DataProcessing function that has an attribute to obtain the pandas dataframe index and column?

I defined a DataProcessing class before loading my data in load_data. I want to concatenate the meth27 and meth450 dataframes to form the meth dataframe. Finally, I want to find the sets of columns and indexes of meth, mrna, and cna dataframes to keep only rows/columns that exist in all three dataframes.

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from reactome2py import analysis
import csv

directory = "./gbm_tcga/"

class DataProcessing:

    def __init__(self, data):
        self.df = pd.read_csv(data, sep="\t").drop("Entrez_Gene_Id", axis=1, errors="ignore")
        self.df = self.df.loc[:, ~self.df.columns.duplicated()]
        self.df = self.df.set_index("Hugo_Symbol")
        self.df = self.df.sort_index()

    def split_data(self):
        X = self.df.iloc[:, :-1]
        y = self.df.iloc[:, -1]
        return X, y

    def pca(self):
        pca = PCA()
        if np.any(np.isnan(self.df)):
            pass
        elif np.all(np.isfinite(self.df)):
            pass
        else:
            pca.fit(self.df.iloc[1:, 3:])
            self.pca_components = pca.components
            return self.pca_components

def load_data():

    meth27 = DataProcessing(directory + "data_methylation_hm27.txt")
    meth450 = DataProcessing(directory + "data_methylation_hm450.txt")
    meth = pd.concat([meth27.df, meth450.df], axis=1)
    meth.to_csv(directory + "methylation_merged.csv")

    cna = DataProcessing(directory + "data_linear_cna.txt")
    mrna = DataProcessing(directory + "data_mrna_affymetrix_microarray_zscores_ref_all_samples.txt")

    # Retrieve only omics cases and genes that exist in all three omics types
    common_index = set(mrna.index) & set(meth.index) & set(cna.df.index)
    common_column = set(mrna.columns) & set(meth.columns) & set(cna.columns)
    common = mrna.loc[common_index, common_column]

    return common

Traceback:

Traceback (most recent call last):
  File "../main.py", line 66, in <module>
    ld = load_data()
  File "../main.py", line 60, in load_data
    common_index = set(mrna.index) & set(meth.index) & set(cna.index)
AttributeError: 'DataProcessing' object has no attribute 'index'


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source