'How do I optimise a process given the ML model has also been tested and fitted?

I've spotted checked/grid searched several regression models in python to predict an single continuous output from 5 controlled variable inputs, it does well (95% accuracy). I've of course selected the best optimised algorithm given by spot check and grid search/Hyperparameter opt and now I have a fully working model that is able to make predictions.

How can I use the model to calculate which set of controlled variable will give me the desired output?

In a sense of optimised - Which design area should I operate my 5 controlled variable input (X1,X2,X3,X4,X5) that will likely give me a desired output (Y)?

In other words I want to have an end point Y and use the model to calculate which set of X1,X2,X3,X4,X5 I should use.

Any snippets?

Here is my code:


# load the dataset, returns X and y elements
def load_dataset():
    return X, Y
# create a dict of standard models to evaluate {name:object}
def get_models(models=dict()):
    # linear models
    models['Linear Regression'] = LinearRegression()
    models['Least Angle'] = LassoLars()
    models['Ridge'] = Ridge()
    models['Elastic Net'] = ElasticNet()
    models['HuberRegressor'] = HuberRegressor()
    models['Support Vector Machine'] = SVR() 
    
    # non-linear models
    n_neighbors = range(1, 20)
    for k in n_neighbors:
        models['K-neighbours'+ str(k)] = KNeighborsRegressor(n_neighbors=k)
    models['Decision Tree'] = DecisionTreeRegressor()
    models['Extra Tree'] = ExtraTreeRegressor()
    
    # Ensemble models
    models['Bagging'] = BaggingRegressor(n_estimators=100)
    models['Random Forest'] = RandomForestRegressor(n_estimators=100)
    
    # Wrapped Models
    tree = range(1,1000,500)
    for i in tree:
        models['Wrapped Extra Tree'+ str(i)]= RegressorChain(ExtraTreesRegressor(n_estimators=i))
        models['Wrapped Random Forest'+ str(i)]= RegressorChain( RandomForestRegressor(n_estimators=i))
        models['Wrapped Bagging'+ str(i)]= RegressorChain( BaggingRegressor(n_estimators=i))
    

    print('Defined %d models' % len(models))
    return models
 
# create a feature preparation pipeline for a model
def make_pipeline(model):
    steps = list()
    # standardization
    steps.append(('standardize', StandardScaler()))
    # normalization
    steps.append(('normalize', MinMaxScaler()))
    # the model
    steps.append(('model', model))
    # create pipeline
    pipeline = Pipeline(steps=steps)
    return pipeline
 
# evaluate a single model
def evaluate_model(X, Y, model, folds, metric):
    # create the pipeline
    pipeline = make_pipeline(model)
    # evaluate model
    scores = cross_val_score(pipeline, X, Y, scoring=metric, cv=folds, n_jobs=-1)
    return scores
 
# evaluate a model 
def robust_evaluate_model(X, Y, model, folds, metric):
    scores = None
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            scores = evaluate_model(X, Y, model, folds, metric)
    except:
        scores = None
    return scores
 
# evaluate a dict of models {name:object}, returns {name:score}
def evaluate_models(X, Y, models, folds=5, metric='accuracy'):
    results = dict()
    for name, model in models.items():
        # evaluate the model
        scores = robust_evaluate_model(X, Y, model, folds, metric)
        # show process
        if scores is not None:
            # store a result
            results[name] = scores
            n_scores = absolute(scores)
            mean_score, std_score = mean(n_scores),std(n_scores)
            print('>%s: %.3f (+/-%.3f)' % (name, mean_score, std_score))
        else:
            print('>%s: error' % name)
    return results
 
# print and plot the top n results
def summarize_results(results, maximize=True, top_n=10):
    # check for no results
    if len(results) == 0:
        print('no results')
        return
    # determine how many results to summarize
    n = min(top_n, len(results))
    # create a list of (name, mean(scores)) tuples
    mean_scores = [(k,mean(v)) for k,v in results.items()]
    # sort tuples by mean score
    mean_scores = sorted((mean_scores), key=lambda x: x[1])
    # reverse for descending order (e.g. for accuracy)
    if maximize:
        mean_scores = list(reversed(mean_scores))
    # retrieve the top n for summarization
    names = [x[0] for x in mean_scores[:n]]
    scores = [results[x[0]] for x in mean_scores[:n]]
    # print the top n
    print()
    for i in range(n):
        name = names[i]
        mean_score, std_score = mean(results[name]), std(results[name])
        print('Rank=%d, Name=%s, Score=%.3f (+/- %.3f)' % (i+1, name, mean_score, std_score))
    # boxplot for the top n
    fig = plt.figure(figsize=(20,10))
    pyplot.boxplot(scores,labels=names,)
    _, labels = pyplot.xticks()
    pyplot.setp(labels, rotation=70)
    pyplot.ylabel('Mean Average Error % (MAE)')
    pyplot.title('Machine Learning Model Selection',)
    pyplot.savefig('spotcheck.png',bbox_inches='tight',)
 
# load dataset
X, Y= load_dataset()
# get model list
models = get_models()
# evaluate models
results = evaluate_models(X, Y, models, metric='neg_mean_squared_error')
# summarize results
summarize_results(results)


# fit the model on the whole datase
Model_5=HuberRegressor()
# fit the model on the whole dataset
Model_5.fit(X, Y)

# make a single prediction with known controlled variables. 
row = [58.3447131,
    1.25,
    0.05,
    1.3,
    7.2]


yhat = Model_5.predict([row])
# summarize the prediction
print('Predicted: %s' % yhat[0])```


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source