'Adjusting shape of a data array to perform optimization in SciPy

I have a code which performs optimization to infer a parameter:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
    
    
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
    
def peak_infections(beta, df):
    
    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()
    
    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, weeks[-1], weeks[-1] + 1)
    
    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ
    
    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T
    
    return I/N
    
def residual(x, df):
    
    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
    
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

However, it is not giving the correct values, so instead of taking weeks as 1,2,3... in the line d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]} I'd like to use days instead so Python has clearer information to work with. I'd like to slice the linspace of days as weekly intervals. However, I'm having some shape alignment issues:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import root
from scipy.optimize import minimize
import pandas as pd
    
time = np.linspace(0, 77, 77 + 1)
d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
    
def peak_infections(beta, df):
    
    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()
    
    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, 77, 77 + 1)
    
    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ
    
    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T
    
    return I/N
    
def residual(x, df):
    
    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)
    
x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

The approach I tried here was recreating the dataframe by slicing time which is 77 days, so 11 weeks. It still returns that the shape error, 77 against 11 elements occurs within my function residual in the line return np.sum((peak_infections(x, df)[1:] - incidence) ** 2). Where is my approach going wrong?

-----------EDIT---------- updated code

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd

t = np.arange(7,84,7)
d = {'Week': t, 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [time[7],time[14],time[21],time[28],time[35],time[42],time[49],time[56],time[63],time[70],time[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)

def peak_infections(beta, df):

    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()

    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points 
    t = np.linspace(0, 77, 77 + 1)

    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ

    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T

    return I/N

def residual(x, df):

    # Total population, N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df) - incidence) ** 2)

x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source