'AttributeError: Can't pickle local object 'train.<locals>.create_model'

I am trying to use my own ML models for creating trainings job in aws Sagemaker. When I start training process everything goes well but at the end it says that "AttributeError: Can't pickle local object 'train..create_model'". I am new into this job. I did the same things for mlp, knn, cart, and svr but never encountered with that issue. I know that lstm uses too much different things to create model but I can not figure out how to solve that issue.

Here is my train.py file where I get the error:

from __future__ import print_function

import json
import os
import pickle
import sys
import traceback

import pandas as pd
import numpy as np
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# These are the paths to where SageMaker mounts interesting things in your container.

prefix = "/opt/ml/"

input_path = prefix + "input/data"
output_path = os.path.join(prefix, "output")
model_path = os.path.join(prefix, "model")

# This algorithm has a single channel of input data called 'training'. Since we run in
# File mode, the input files are copied to the directory specified here.
channel_name = "training"
training_path = os.path.join(input_path, channel_name)

# The function to execute training.
def train():
    print("Starting the training")
    
    print(training_path)
    
    try:
        # Take the set of files and read them all into a single pandas dataframe
        input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ]
        if len(input_files) == 0:
            raise ValueError(('There are no files in {}.\n' +
                              'This usually indicates that the channel ({}) was incorrectly specified,\n' +
                              'the data specification in S3 was incorrectly specified or the role specified\n' +
                              'does not have permission to access the data.').format(training_path, channel_name))
        raw_data = [ pd.read_csv(file, header=0, index_col=0) for file in input_files ]
        data = pd.concat(raw_data)
        
        print(data)

        # convert series to supervised learning
        def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
            n_vars = 1 if type(data) is list else data.shape[1]
            df = DataFrame(data)
            cols, names = list(), list()
            # input sequence (t-n, ... t-1)
            for i in range(n_in, 0, -1):
                cols.append(df.shift(i))
                names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
            # forecast sequence (t, t+1, ... t+n)
            for i in range(0, n_out):
                cols.append(df.shift(-i))
                if i == 0:
                    names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
                else:
                    names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
            # put it all together
            agg = concat(cols, axis=1)
            agg.columns = names
            # drop rows with NaN values
            if dropnan:
                agg.dropna(inplace=True)
            return agg


        values = data.values
        # ensure all data is float
        values = values.astype('float32')
        # normalize features
        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(values)


        # specify the number of lag time steps
        n_timesteps = 3
        n_features = 4
        # frame as supervised learning
        reframed = series_to_supervised(scaled, n_timesteps, 1)
        print(reframed.shape)


        # drop columns we don't want to predict
        reframed.drop(reframed.columns[[4,9,14,15,16,17,18]], axis=1, inplace=True)
        print(reframed.head())


        # split into train and test sets
        values = reframed.values
        n_train_size = 403
        train = values[:n_train_size, :]
        test = values[n_train_size:, :]
        # split into input and outputs
        n_obs = n_timesteps * n_features
        train_X, train_y = train[:, :n_obs], train[:, -1]
        test_X, test_y = test[:, :n_obs], test[:, -1]
        print(train_X.shape, len(train_X), train_y.shape)
        # reshape input to be 3D [samples, timesteps, features]
        train_X = train_X.reshape((train_X.shape[0], n_timesteps, n_features))
        test_X = test_X.reshape((test_X.shape[0], n_timesteps, n_features))
        print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)


        # Function to create model
        def create_model():
            # create model
            model = Sequential()
            model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
            model.add(Dense(1))
            # Compile model
            # optimizer = SGD(lr=learn_rate, momentum=momentum)
            model.compile(loss='mae',optimizer='adam')
            return model


        from scikeras.wrappers import KerasRegressor
        # create model
        model = KerasRegressor(model=create_model, verbose=0)


        from sklearn.model_selection import GridSearchCV
        # define the grid search parameters
        batch_size = [2,4,8,16,32]
        epochs = [10, 50, 100]
        #learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
        #momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
        param_grid = dict(batch_size=batch_size, epochs=epochs)
        grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
        grid_result = grid.fit(train_X, train_y)
        # summarize results
        print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
        means = grid_result.cv_results_['mean_test_score']
        stds = grid_result.cv_results_['std_test_score']
        params = grid_result.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

        # save the model
        with open(os.path.join(model_path, "snop-lstm.pkl"), "wb") as out:
            pickle.dump(grid, out)
        print("Training complete.")
    except Exception as e:
        # Write out an error file. This will be returned as the failureReason in the
        # DescribeTrainingJob result.
        trc = traceback.format_exc()
        with open(os.path.join(output_path, "failure"), "w") as s:
            s.write("Exception during training: " + str(e) + "\n" + trc)
        # Printing this causes the exception to be in the training job logs, as well.
        print("Exception during training: " + str(e) + "\n" + trc, file=sys.stderr)
        # A non-zero exit code causes the training job to be marked as Failed.
        sys.exit(255)
        
if __name__ == "__main__":
    train()
    
    # A zero exit code causes the job to be marked a Succeeded.
    sys.exit(0)

And this is the log:

2022-02-25T10:28:16.751+03:00
Exception during training: Can't pickle local object 'train.<locals>.create_model'
Exception during training: Can't pickle local object 'train.<locals>.create_model'

2022-02-25T10:28:16.751+03:00
Traceback (most recent call last):
  File "/opt/program/train", line 154, in train
    pickle.dump(grid, out)
Traceback (most recent call last): File "/opt/program/train", line 154, in train pickle.dump(grid, out)

2022-02-25T10:28:16.751+03:00
AttributeError: Can't pickle local object 'train.<locals>.create_model'
AttributeError: Can't pickle local object 'train.<locals>.create_model'


Solution 1:[1]

It seems that you are trying to pickle an object of class GridSearchCV instead of the model itself:

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
...
...
pickle.dump(grid, out)

I think what you want instead is to retrieve the best model (via best_model_, see here: https://github.com/scikit-learn/scikit-learn/blob/37ac6788c/sklearn/model_selection/_search.py#L1247) and then pickle that model

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Heiko Hotz