'I am having problems with parameter estimation

I am interested in using pyomo for simulation and model fitting, especially using dynamic data. I have been trying some of the examples available and adapt them to the kind of problems to handle at last. One of the examples is the semibatch reactor which is available from various sources. First I implemented the code to simulate the dynamics and save the dynamics in a .csv file. No problems, here. Then, I been trying to use this data for parameter estimation, without success yet. Below, I include the model

import numpy as np
import pandas as pd
import sys
from pyomo.environ import ConcreteModel, Set, Param, Var, Constraint, ConstraintList, Expression, Objective, TransformationFactory, SolverFactory, exp, minimize
from pyomo.dae import ContinuousSet, DerivativeVar
import pyomo.contrib.parmest.parmest as parmest
from os.path import join, abspath, dirname
#
# introduce the model
def generate_model(data):

    # unpack and fix the data
    texp       = data['time']
    cameastemp = data['ca']
    cbmeastemp = data['cb']
    ccmeastemp = data['cc']
    trmeastemp = data['tr']

    # built dictionaries with data
    timeas={}
    cameas={}
    cbmeas={}
    ccmeas={}
    trmeas={}
    for i in cameastemp.keys():
        timeas[float(i)] = texp[i]
        cameas[float(i)] = cameastemp[i]
        cbmeas[float(i)] = cbmeastemp[i]
        ccmeas[float(i)] = ccmeastemp[i]
        trmeas[float(i)] = trmeastemp[i]

    # model specification    
    m = ConcreteModel()
    #
    # Measurement Data
    #
    m.measT = Set(initialize=sorted(cameas.keys()))
    m.time_meas = Param(m.measT, initialize=timeas)
    m.Ca_meas = Param(m.measT, initialize=cameas)
    m.Cb_meas = Param(m.measT, initialize=cbmeas)
    m.Cc_meas = Param(m.measT, initialize=ccmeas)
    m.Tr_meas = Param(m.measT, initialize=trmeas)
    #
    # Parameters for semi-batch reactor model
    #
    m.R = Param(initialize=8.314) # kJ/kmol/K
    m.Mwa = Param(initialize=50.0) # kg/kmol
    m.rhor = Param(initialize=1000.0) # kg/m^3
    m.cpr = Param(initialize=3.9) # kJ/kg/K
    m.Tf = Param(initialize=300) # K
    m.deltaH1 = Param(initialize=-40000.0) # kJ/kmol
    m.deltaH2 = Param(initialize=-50000.0) # kJ/kmol
    m.alphaj = Param(initialize=0.8) # kJ/s/m^2/K
    m.alphac = Param(initialize=0.7) # kJ/s/m^2/K
    m.Aj = Param(initialize=5.0) # m^2
    m.Ac = Param(initialize=3.0) # m^2
    m.Vj = Param(initialize=0.9) # m^3
    m.Vc = Param(initialize=0.07) # m^3
    m.rhow = Param(initialize=700.0) # kg/m^3
    m.cpw = Param(initialize=3.1) # kJ/kg/K
    m.Ca0 = Param(initialize=3.5) # kmol/m^3)
    m.Cb0 = Param(initialize=1.0) # kmol/m^3)
    m.Cc0 = Param(initialize=0.0) # kmol/m^3)
    m.Tr0 = Param(initialize=300.0) # K
    m.Vr0 = Param(initialize=1.0) # m^3
    #
    # set the time
    m.time = ContinuousSet(bounds=(0, 21600), initialize=m.measT)  # Time in seconds
    #
    # Control Inputs
    #
    def _initTc(m, t):
        if t < 10800:
            return 300
        else:
            return 305
    m.Tc = Param(m.time, initialize=_initTc, default=_initTc)  # bounds= (288,432) Cooling coil temp, control input

    def _initFa(m, t):
        if t < 10800:
            return 0.001
        else:
            return 0.0015
    m.Fa = Param(m.time, initialize=_initFa, default=_initFa)  # bounds=(0,0.05) Inlet flow rate, control input

    #
    # Parameters being estimated
    #
    m.k1 = Var(initialize=14, bounds=(2,2000))  # 1/s Actual: 15.01
    m.k2 = Var(initialize=90, bounds=(2,1500))  # 1/s Actual: 85.01
    m.E1 = Var(initialize=27000.0, bounds=(1500,40000))  # kJ/kmol Actual: 30000
    m.E2 = Var(initialize=45000.0, bounds=(1500,80000))  # kJ/kmol Actual: 40000
    # m.E1.fix(30000)
    # m.E2.fix(40000)
    #
    # Time dependent variables
    #
    m.Ca = Var(m.time, initialize=m.Ca0, bounds=(0,250))
    m.Cb = Var(m.time, initialize=m.Cb0, bounds=(0,250))
    m.Cc = Var(m.time, initialize=m.Cc0, bounds=(0,250))
    m.Vr = Var(m.time, initialize=m.Vr0, bounds=(1,3))
    m.Tr = Var(m.time, initialize=m.Tr0, bounds=(288,315))
    m.Tj = Var(m.time, initialize=310.0, bounds=(288,None)) # Cooling jacket temp, follows coil temp until failure

    #
    # Derivatives in the model
    #
    m.dCa = DerivativeVar(m.Ca)
    m.dCb = DerivativeVar(m.Cb)
    m.dCc = DerivativeVar(m.Cc)
    m.dVr = DerivativeVar(m.Vr)
    m.dTr = DerivativeVar(m.Tr)

    #
    # Differential Equations in the model
    #

    def _dCacon(m,t):
        if t == 0:
            return Constraint.Skip
        return m.dCa[t] == m.Fa[t]/m.Vr[t] - m.k1*exp(-m.E1/(m.R*m.Tr[t]))*m.Ca[t]
    m.dCacon = Constraint(m.time, rule=_dCacon)

    def _dCbcon(m,t):
        if t == 0:
            return Constraint.Skip
        return m.dCb[t] == m.k1*exp(-m.E1/(m.R*m.Tr[t]))*m.Ca[t] - \
                           m.k2*exp(-m.E2/(m.R*m.Tr[t]))*m.Cb[t]
    m.dCbcon = Constraint(m.time, rule=_dCbcon)

    def _dCccon(m,t):
        if t == 0:
            return Constraint.Skip
        return m.dCc[t] == m.k2*exp(-m.E2/(m.R*m.Tr[t]))*m.Cb[t]
    m.dCccon = Constraint(m.time, rule=_dCccon)

    def _dVrcon(m,t):
        if t == 0:
            return Constraint.Skip
        return m.dVr[t] == m.Fa[t]*m.Mwa/m.rhor
    m.dVrcon = Constraint(m.time, rule=_dVrcon)

    def _dTrcon(m,t):
        if t == 0:
            return Constraint.Skip
        return m.rhor*m.cpr*m.dTr[t] == \
               m.Fa[t]*m.Mwa*m.cpr/m.Vr[t]*(m.Tf-m.Tr[t]) - \
               m.k1*exp(-m.E1/(m.R*m.Tr[t]))*m.Ca[t]*m.deltaH1 - \
               m.k2*exp(-m.E2/(m.R*m.Tr[t]))*m.Cb[t]*m.deltaH2 + \
               m.alphaj*m.Aj/m.Vr0*(m.Tj[t]-m.Tr[t]) + \
               m.alphac*m.Ac/m.Vr0*(m.Tc[t]-m.Tr[t])
    m.dTrcon = Constraint(m.time, rule=_dTrcon)

    def _singlecooling(m,t):
        return m.Tc[t] == m.Tj[t]
    m.singlecooling = Constraint(m.time, rule=_singlecooling)

    # Initial Conditions
    def _initcon(m):
        yield m.Ca[m.time.first()] == m.Ca0
        yield m.Cb[m.time.first()] == m.Cb0
        yield m.Cc[m.time.first()] == m.Cc0
        yield m.Vr[m.time.first()] == m.Vr0
        yield m.Tr[m.time.first()] == m.Tr0
    m.initcon = ConstraintList(rule=_initcon)

    #
    # Stage-specific cost computations
    #
    def ComputeFirstStageCost_rule(model):
        return 0
    m.FirstStageCost = Expression(rule=ComputeFirstStageCost_rule)

    def AllMeasurements(m):
        return sum((m.Ca[t] - m.Ca_meas[t]) ** 2 + (m.Cb[t] - m.Cb_meas[t]) ** 2 
                   + (m.Cc[t] - m.Cc_meas[t]) ** 2
                   + 0.01 * (m.Tr[t] - m.Tr_meas[t]) ** 2 for t in m.measT)

    def MissingMeasurements(m):
            return sum((m.Tr[t] - m.Tr_meas[t]) ** 2 for t in m.measT)
    m.SecondStageCost = Expression(rule=MissingMeasurements)

    def total_cost_rule(model):
        return model.FirstStageCost + model.SecondStageCost
    m.Total_Cost_Objective = Objective(rule=total_cost_rule, sense=minimize)

    # Discretize model
    disc = TransformationFactory('dae.collocation')
    disc.apply_to(m, wrt= m.time, nfe=25, ncp=4)
    return m

Now I include the main procedure with the call to solver deactivated and the 4 lines to write the model into a file for checking out...

   # load data for fitting
    def main():
        ### read data
        file_dirname = dirname(abspath(str(__file__)))
        file_name = abspath(join(file_dirname, 'myresults1.csv'))
        data1 = pd.read_csv(file_name)  
        model = generate_model(data1)
        f = open('file6.txt', 'w')
        sys.stdout = f
        model.pprint()
        f.close()
        solver = SolverFactory('ipopt')
    #    solver.solve(model)
    #    print('k1 = ', model.k1())
    #    print('E1 = ', model.E1())
    #    print('k2 = ', model.k2())
    #    print('E2 = ', model.E2())
        
    if __name__ == '__main__':
        main()

When I run the code I got this message:

WARNING: More finite elements were found in ContinuousSet 'time' than the
    number of finite elements specified in apply. The larger number of finite
    elements will be used.

I am using 'daecollocotion' with nfe=25 and ncp=4, and the data I am using is formed by 101 records with the measurements available at equidistant points in time. Then, the optimal obtained with ipopt is very different from that expected.

I believe one of the problems can be related with Control inputs. In simulation they were handled using var_input:

Fai_profile = {0: 0.001, 10800: 0.0015}
Tci_profile = {0: 300, 10800: 305}
m.var_input = Suffix(direction=Suffix.LOCAL)
m.var_input[m.Fai] = Fai_profile
m.var_input[m.Tci] = Tci_profile 

Here, I am using a declaration within the model to represent them:

 #
    # Control Inputs
    #
    def _initTc(m, t):
        if t < 10800:
            return 300
        else:
            return 305
    m.Tc = Param(m.time, initialize=_initTc, default=_initTc)  # bounds= (288,432) Cooling coil temp, control input

    def _initFa(m, t):
        if t < 10800:
            return 0.001
        else:
            return 0.0015
    m.Fa = Param(m.time, initialize=_initFa, default=_initFa)  # bounds=(0,0.05) Inlet flow rate, control input

The reason to believe that one of the problems is here is coming from the model. Where for these variable m.Fa and m.Tc something like this occurs:

 Fa : Size=405, Index=time, Domain=Any, Default=(function), Mutable=False
        Key   : Value
          0.0 :  0.001
    ...
        100.0 :  0.001
        21600 : 0.0015

So it seems the keys for these variables are wrongly specified. The former 100 points (4*25) appear in a sequence of t and the last point is the upper bound of the time domain.

In case somebody has ideas of what can i do to overcome the problem let me know, please. I can provide the codes and data; they are adaptations of available examples.



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source