'Alternatives for df.iterrows() to speed up function

I have written a function, which calculates the days since the last game for each team ("Home Team" and "Away Team") based on the columns "Primary Key", "Home Team", "Away Team" and "Date". If no previous game can be calculated, the value will be set no -1.

I am using df.iterrows(), which seems super slow and the code smells in my opinion. Is there a faster and more elegant way to solve that? Here is the function:

def add_feat_Days_Since_Last_Game(df):
    col_home = "Home Days Since Last Game"
    col_away = "Away Days Since Last Game"

    df[col_home] = -1
    df[col_away] = -1

    for index, row in df.iterrows():
        key = row["Primary Key"]

        date = row["Date"]
        if not isinstance(date, dt.date):
            raise TypeError

        observed_team = row["Home Team"]
        helper = df.loc[
            ((df["Home Team"] == observed_team) | (df["Away Team"] == observed_team))
        ]
        shifted_game = helper.loc[(helper["Primary Key"].shift(-1) == key)]
        if not shifted_game.empty:
            days_since_last_game = date - shifted_game.iloc[0]["Date"]
            df.at[index, col_home] = days_since_last_game.days
        else:
            df.at[index, col_home] = -1

        observed_team = row["Away Team"]
        helper = df.loc[
            ((df["Home Team"] == observed_team) | (df["Away Team"] == observed_team))
        ]
        shifted_game = helper.loc[(helper["Primary Key"].shift(-1) == key)]
        if not shifted_game.empty:
            days_since_last_game = date - shifted_game.iloc[0]["Date"]
            df.at[index, col_away] = days_since_last_game.days
        else:
            df.at[index, col_away] = -1

    return df

I have also written a simple test:

def test_add_feat_Days_Since_Last_Game():
    data = {
        "Primary Key": ["1", "2", "3", "4"],
        "Home Team": ["Team A", "Team B", "Team C", "Team D"],
        "Away Team": ["Team B", "Team C", "Team A", "Team E"],
        "Date": [
            dt.datetime(2022, 1, 1),
            dt.datetime(2022, 1, 3),
            dt.datetime(2022, 1, 7),
            dt.datetime(2022, 1, 8),
        ],
    }
    df = pd.DataFrame(data=data)
    df = add_feat_Days_Since_Last_Game(df)

    col_home = "Home Days Since Last Game"
    col_away = "Away Days Since Last Game"

    game_key = "1"
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_home] == -1
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_away] == -1

    game_key = "2"
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_home] == 2
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_away] == -1

    game_key = "3"
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_home] == 4
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_away] == 6

    game_key = "4"
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_home] == -1
    assert df.loc[df["Primary Key"] == game_key].iloc[0][col_away] == -1


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source