'Saving Patsy for sklean inference
I am a huge lover of your sklego project, especially patsy implementation within sklean.
However, there is one thing I still would like your opinion on - how do you use a pipeline containing PatsyTransformer only for inference?
As the pickling is not yet supported on the patsy side I came up with a workaround.
import seaborn as sns
from joblib import dump, load
from sklego.preprocessing import PatsyTransformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
# Load The data
data = sns.load_dataset("tips")
# Basic Pipeline
pipe = Pipeline([
("patsy", PatsyTransformer("tip + C(day)")),
("model", LinearRegression())
])
data
# Train the pipeline
pipe.fit(data, data['total_bill'])
from sklearn.base import BaseEstimator, TransformerMixin
# Class for inferencing with pre-trained model (fit only passes, no training happens)
class Model_Inferencer(BaseEstimator, TransformerMixin):
"""
Function that applyes pre-trained models within a pipeline setting.
"""
def __init__(self, pre_trained_model=None):
self.pre_trained_model = pre_trained_model
def transform(self, X):
preds = self.pre_trained_model.predict(X)
return preds
def predict(self, X):
preds = self.pre_trained_model.predict(X)
return preds
def fit(self, X, y=None, **fit_params):
return self
pipe.predict(data)[:10]
# Save the model
dump(pipe['model'], 'model_github.joblib')
# Load The model
loaded_model = load('model_github.joblib')
# Create Inference Pipeline
pipe_inference = Pipeline([
("patsy", PatsyTransformer("tip + C(day)")),
("inferencer", Model_Inferencer(loaded_model))
])
# Inference pipeline needs to be fitted
# pipe_inference.fit(data)
# Save predictions (works only when fitted)
pipe_inference.predict(data)
I also tried saving the info by hand:
import h5py
def save_patsy(patsy_step, filename):
"""Save the coefficients of a linear model into a .h5 file."""
with h5py.File(filename, 'w') as hf:
hf.create_dataset("design_info", data=patsy_step.design_info_)
def load_coefficients(patsy_step, filename):
"""Attach the saved coefficients to a linear model."""
with h5py.File(filename, 'r') as hf:
design_info = hf['design_info'][:]
patsy_step.design_info_ = design_info
save_patsy(pipe['patsy'], "clf.h5")
However, a bummer error will occur.
Object dtype dtype('O') has no native HDF5 equivalent
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
