'Why using statmodels.adfuller when using a for to iterate through a dataframe and selects the time series give me an error?

The problem here is to test if all stocks are integrated of order 1 , I(1), then search for cointegrated pairs. Until now, I'm just testing if they're I(1) using ADF test, but is not working correctly

import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib as ptl
import statsmodels.tsa.stattools as ts

#List of the 50 companies with most % in IBOVESPA at 05-02-2022
Stocks = ["VALE3","PETR4","ITUB4","BBDC4","PETR3","B3SA3","ABEV3","JBSS3","BBAS3","WEGE3","ITSA4","HAPV3","SUZB3","RENT3"
,"GGBR4","BPAC11","RDOR3","EQTL3","CSAN3","VBBR3","LREN3","BBDC3","RADL3","PRIO3","VIVT3","RAIL3","ENEV3","BBSE3","KLBN11","TOTS3"
,"CMIG4","NTCO3","HYPE3","SBSP3","BRFS3","ELET3","AMER3","UGPA3","MGLU3","CCRO3","CSNA3","ASAI3","ENGI11","SANB11","TIMS3","CPLE6"
,"EGIE3","BRKM5","EMBR3","ELET6"]

Stocks_SA = []
for tickers in Stocks:
    new_i = f'{tickers}.SA'
    Stocks_SA.append(new_i)

def download_data(List):
    data = pd.DataFrame()
    names = []
    for i in List:
        df = pd.DataFrame(yf.download(i,start = "2020-04-30", end = "2021-04-30"))
        df = df.dropna()
        df["Adj Close"] = np.log(df["Adj Close"])
        df2 = df.iloc[:,4]
        data = pd.concat([data,df2],axis =1)
        names.append(i)
    data.columns = names
    return data
s_data = download_data(Stocks_SA)

import statsmodels.tsa.stattools as ts
def Testing_ADF(data): #Test if all stocks are integrated in order one I(1)
    names = data.columns.values.tolist()
    n = data.shape[1]
    I_one = []
    keys = data.keys()
    for n in names:
        series = data[n]
        result_adf = ts.adfuller(series)
        if result_adf[1]> 0.05:
            I_one.append(n)
    return I_one

I_one_list = Testing_ADF(s_data)
I_one_list

When i run Testing_ADF(s_data) I get MissingDataError: exog contains inf or nansbut if I run just this code it works perfectly:

df = pd.DataFrame(yf.download("VALE3.SA",start = "2020-04-30", end = "2021-04-30"))
#df2 = ts.adfuller(df)
df["Adj Close"] = np.log(df["Adj Close"])
df2 = df.iloc[:,4]
df2.dropna()

adfuller = ts.adfuller(df2)
adfuller

So, why it works in one and not in the other one? and How can I fix it?

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'Why using statmodels.adfuller when using a for to iterate through a dataframe and selects the time series give me an error?

Sources

Related Questions