'TypeError: issubclass() arg 2 must be a class, a tuple of classes, or a union when added an 'ignore_warnings' wrapper

I tried to create a backward selection lasso regression model, but I meet a strange issue. Here is my code:

import pandas as pd
from sklearn import preprocessing
import statsmodels.api as sm
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import mean_squared_error as mse

# settings
shift_n=1
previous = True
intercept=True
alpha=[0.01, 0.05, 0.1, 0.5, 1]
threshold=0.05
y_mark='Close.'
var_list = ['Ethereum', 'S&P 500', 'Nasdaq', 'DJ Composite', 'Gold', 'Copper', 'Silver',  'Crude Oil WTI', 'Natural Gas']
y_label = y_mark+ 'Ethereum'
ETH_index = 69



# function to get df shift
def df_shift(dataset, y_label, shift_n = 1):
    df = dataset.copy()
    new_col = y_label + '_p'
    df[new_col] = df[y_label].shift(periods= shift_n)
    df = df.dropna()
    return df


# function to split df into trainset and testset
def split_df(df):
    token_index = df.ne(0).idxmax()
    if token_index > ETH_index:
        ind = token_index
    else:
        ind = ETH_index
    partition = df.index[ind + int((len(df) - ind)*0.8)]
    df = df[ind:].copy()
    trainset = df[df.index <= partition]
    testset = df[df.index > partition]
    return trainset, testset


# function to normalize df
def df_preprocessing(df, type='standardize'):
    X = df.values
    if type == 'standardize':
        std_scaler = preprocessing.StandardScaler().fit(X)
        x_scaled = std_scaler.transform(X)
        res = pd.DataFrame(x_scaled, columns=df.columns, index=df.index)
        return res, std_scaler
    elif type == 'minmax':
        minmax_scaler = preprocessing.MinMaxScaler().fit(X)
        x_scaled = minmax_scaler.transform(X)
        res = pd.DataFrame(x_scaled, columns=df.columns, index=df.index)
        return res, minmax_scaler


# function to get data for modelling
def get_data(df, y_label, preprocess='standardize', intercept=True):

    # 01 split X and Y
    X = df.loc[:, df.columns != y_label]
    Y = df.loc[:, df.columns == y_label]

    # 02 preprocess
    scaler = None
    if preprocess == 'standardize':
        X, scaler = df_preprocessing(X, type='standardize')
    if preprocess == 'minmax':
        X, scaler = df_preprocessing(X, type='minmax')

    # 03 add constant term
    if intercept == True:
        X = sm.add_constant(X)
    return X, Y, scaler


def alpha_search(x, y, alpha=[0.01, 0.05, 0.1, 0.5, 1], type='lasso'):
    if type == 'ridge':
        ridge_cv = RidgeCV(alphas=alpha)
        model_cv = ridge_cv.fit(x, y)
        return model_cv.alpha_
    if type == 'lasso':
        lasso_cv = LassoCV(alphas=alpha)
        model_cv = lasso_cv.fit(x, y)
        return model_cv.alpha_


def liner_model(X, Y, type='lasso', alpha=None):
    model = sm.OLS(Y, X)
    results_fu = model.fit()
    Best_alpha = None
    if type == 'ridge':
        best_alpha = alpha_search(X, Y, alpha=alpha, type='ridge')
        model_ridge = model.fit_regularized(L1_wt=0, alpha=best_alpha, start_params=results_fu.params)
        ridge_result = sm.regression.linear_model.OLSResults(model, model_ridge.params, model.normalized_cov_params)
        return ridge_result, best_alpha
    elif type == 'lasso':
        best_alpha = alpha_search(X, Y, alpha=alpha, type='lasso')
        model_lasso = model.fit_regularized(L1_wt=1, alpha=best_alpha, start_params=results_fu.params)
        lasso_result = sm.regression.linear_model.OLSResults(model, model_lasso.params, model.normalized_cov_params)
        return lasso_result, best_alpha
    else:
        return results_fu, Best_alpha



def backward_selection(df, y_label, type='lasso', alpha=[0.01, 0.05, 0.1, 0.5, 1], threshold=0.05):
    X, Y, scaler = get_data(df, y_label=y_label)

    # create linear model
    model, best_alpha = liner_model(X, Y, type=type, alpha=alpha)

    # backward selection model
    # .1 get feature coef result
    res = list(model.pvalues)
    max_p = max(res)

    # .2 find the biggest coef and correlated feature name
    while max_p > threshold:
        ind = res.index(max_p)  # the index of max p value
        col = X.columns[ind]  # find the column name

        # .3 remove the feature from X
        X = X.drop(col, axis=1)
        # .4 build a new model
        if len(X.columns) == 0:
            print('all features have been removed, return the last avaiable model')
            return model, X, best_alpha, scaler
        model, best_alpha = liner_model(X, Y, type=type, alpha=alpha)
        res = list(model.pvalues)
        max_p = max(res)

    # return result
    return model, X, best_alpha, scaler


class backward_selection_model:
    def __init__(self, df, y_label, type='lasso', alpha=[0.01, 0.05, 0.1, 0.5, 1], threshold=0.05):
        self.original_df = df.copy()
        self.df = df_shift(df, y_label)
        self.y_label = y_label
        self.type = type
        self.alpha = alpha
        self.threshold = threshold
        model, X, best_alpha, scaler = backward_selection(self.df, y_label=self.y_label, type=self.type,
                                                          alpha=self.alpha, threshold=self.threshold)
        self.model = model
        self.X = X
        self.best_alpha = best_alpha
        self.scaler = scaler

    def get_model(self):
        return self.model

    def get_final_features(self):
        return self.X.columns

    def get_best_alpha(self):
        return self.best_alpha

    def get_scaler(self):
        return self.scaler

    def get_prediction(self):
        scaler = self.get_scaler()
        target_cols = list(self.get_final_features())
        model = self.get_model()

        target_df = self.df.copy()

        target_X = target_df.loc[:, target_df.columns != self.y_label]

        X_ = scaler.transform(target_X)
        target_X = pd.DataFrame(X_, columns=target_X.columns, index=target_X.index)

        target_X = sm.add_constant(target_X)

        target_X = target_X.loc[:, target_cols]

        return model.predict(target_X)

    def get_mse(self):
        prediction = self.get_prediction()

        target_df = self.df.copy()

        target_y = target_df.loc[:, target_df.columns == self.y_label].values.ravel()

        return mse(target_y, prediction)

    def get_coef_df(self):
        return pd.DataFrame({'coef': self.model.params, 'P-value': self.model.pvalues})


test_path = 'https://raw.githubusercontent.com/Carloszone/Cryptocurrency_Research_project/main/datasets/test.csv'
df = pd.read_csv(test_path, parse_dates = ['Date']).set_index('Date')
test = backward_selection_model(df, y_label)

print('Model MSE: ', test.get_mse())

It works, I got the result I need. But I found there were many 'ConvergenceWarning' in this process, so I used a wrapper before the backward_selection to solve the problem, like this:

@ignore_warnings(category=[ConvergenceWarning, UserWarning])
def backward_selection(df, y_label, type='lasso', alpha=[0.01, 0.05, 0.1, 0.5, 1], threshold=0.05):
    X, Y, scaler = get_data(df, y_label=y_label)

    # create linear model
    model, best_alpha = liner_model(X, Y, type=type, alpha=alpha)

    # backward selection model
    # .1 get feature coef result
    res = list(model.pvalues)
    max_p = max(res)

    # .2 find the biggest coef and correlated feature name
    while max_p > threshold:
        ind = res.index(max_p)  # the index of max p value
        col = X.columns[ind]  # find the column name

        # .3 remove the feature from X
        X = X.drop(col, axis=1)
        # .4 build a new model
        if len(X.columns) == 0:
            print('all features have been removed, return the last avaiable model')
            return model, X, best_alpha, scaler
        model, best_alpha = liner_model(X, Y, type=type, alpha=alpha)
        res = list(model.pvalues)
        max_p = max(res)

    # return result
    return model, X, best_alpha, scaler

However, I got an error: TypeError: issubclass() arg 2 must be a class, a tuple of classes, or a union

Traceback (most recent call last):
  File "...\cryptoapp\model.py", line 202, in <module>
    test = backward_selection_model(df, y_label)
  File "...\cryptoapp\model.py", line 150, in __init__
    model, X, best_alpha, scaler = backward_selection(self.df, y_label=self.y_label, type=self.type,
  File "...\venv\lib\site-packages\sklearn\utils\_testing.py", line 313, in wrapper
    return fn(*args, **kwargs)
  File "...\cryptoapp\model.py", line 116, in backward_selection
    model, best_alpha = liner_model(X, Y, type=type, alpha=alpha)
  File "C:\Users\carlo\PycharmProjects\ETH_transaction_fee_Study\cryptoapp\model.py", line 103, in liner_model
    best_alpha = alpha_search(X, Y, alpha=alpha, type='lasso')
  File "...\cryptoapp\model.py", line 89, in alpha_search
    model_cv = lasso_cv.fit(x, y)
  File "...\venv\lib\site-packages\sklearn\linear_model\_coordinate_descent.py", line 1571, in fit
    y = column_or_1d(y, warn=True)
  File "...\venv\lib\site-packages\sklearn\utils\validation.py", line 1029, in column_or_1d
    warnings.warn(
TypeError: issubclass() arg 2 must be a class, a tuple of classes, or a union

It is clear that the wrapper caused the error, but I don't know how to fix it.



Solution 1:[1]

I think I found the solution. It is so simple. just replacing [] with ()

@ignore_warnings(category=(ConvergenceWarning, UserWarning))

Here is another finding: If you meet convergencewarning like "DataConversionWarning: A column-vector y was passed when a 1d array was expected." and your Y is a slice of pandas dataframe, try this:

y.values.reval()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Carlos