'Python : Extract specific string pattern from a pandas columns and storing into new columns

I have a Dataframe in the below format:

Ticker
ABAN10OCTFUT
ABAN10SEP700PA
ABAN10SEP720PA
ABAN10SEP740PA
ABAN10SEP760PA

I am trying to extract instrument_name, year, month,strike_price and instrument_type from the ticker column and storing the same in the existing dataframe. I'm able to extract the values but I couldn't able to save the values in the existing dataframe.

The below code is used to extract the values from a dataframe.

for index,row in all_files_tickers.iterrows():
    ticker = row['ticker']
    print(index)
    print(ticker)
    try:
        #Grouped FUT tickers here# ABAN10OCTFUT AREVAT&D10SEPFUT MCDOWELL-N10NOVFUT TV-1810OCTFUT NFTYMCAP5011FEBFUT FTSE10012AUGFUT
        instrument_name,year,month,instrument_type = re.findall(r'([A-Z]{2,10}|[A-Z]{1,6}&[A-Z]{1,6}|[A-Z]{1,6}-[A-Z]{1,6}|[A-Z]{1,6}-[0-9]{1,5}|[A-Z]{1,9}[0-9]{1,2}|[A-Z]{1,6}&[A-Z]{1,6}[0-9]{1,3}|[A-Z]{1,6}[0-9]{1,3})(09|10|11|12|13|14|15|16|17|18|19|20|21)(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(FUT)',ticker)[0]
    except:
        try:
            #Grouped other tickers here# ABAN10SEP700PA ABAN10SEP780CA AREVAT&D10SEP300CA DISHTV10SEP52.5CA MCDOWELL-N10SEP1550CA TV-1810SEP90CA
            instrument_name,year,month,strike_price,instrument_type = re.findall(r'([A-Z]{2,10}|[A-Z]{1,6}&[A-Z]{1,6}|[A-Z]{1,6}-[A-Z]{1,6}|[A-Z]{1,6}-[0-9]{1,5}|[A-Z]{1,9}[0-9]{1,2}|[A-Z]{1,6}&[A-Z]{1,6}[0-9]{1,3}|[A-Z]{1,6}[0-9]{1,3})(09|10|11|12|13|14|15|16|17|18|19|20|21)(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)([0-9]{1,5}|[0-9]{1,5}.[0-9]{1,3})(PA|CA)',ticker)[0]
        except:
            try:
                #Grouped option tickers here# BANKNIFTY10OCT10500PE BANKNIFTY10OCT11000CE ISPATIND11JAN22.5CE NFTYMCAP5010DEC2600PE S&P50011SEP1100PE
                instrument_name,year,month,strike_price,instrument_type = re.findall(r'([A-Z]{2,10}|[A-Z]{1,6}&[A-Z]{1,6}|[A-Z]{1,6}-[A-Z]{1,6}|[A-Z]{1,6}-[0-9]{1,5}|[A-Z]{1,9}[0-9]{1,2}|[A-Z]{1,6}&[A-Z]{1,6}[0-9]{1,3}|[A-Z]{1,6}[0-9]{1,3})(09|10|11|12|13|14|15|16|17|18|19|20|21)(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)([0-9]{1,5}|[0-9]{1,5}.[0-9]{1,3})(PE|CE)',ticker)[0]
            except:
                try:
                    #ss
                    instrument_name = re.findall(r'([a-z]{1,3})',ticker)[0]
                except:
                    try:
                        #S&P5001SEPFUT
                        instrument_name,year,month,instrument_type = re.findall(r'([A-Z]{1,6}&[A-Z]{1,6}[0-9]{1,3})(1)(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(FUT)',ticker)[0]
                    except:
                        try:
                            #COALINDIA14130265CE
                            instrument_name,year,month,strike_price,instrument_type = re.findall(r'([A-Z]{2,10})(09|10|11|12|13|14|15|16|17|18|19|20|21)([0-9]{1,3})([0-9]{1,5})(PE|CE)',ticker)[0]
                        except:
                            try:
                                #FTSE100FEBFUT
                                instrument_name,month,instrument_type = re.findall(r'([A-Z]{1,6}[0-9]{1,3})(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)(FUT)',ticker)[0]
                            except:
                                #ADANIPOWER28FEB1927.5PE
                                instrument_name,date,month,strike_price,instrument_type = re.findall(r'([A-Z]{1,10})([0-9]{1,2})(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)([0-9]{1,5}.[0-9]{1,3})(CE|PE)',ticker)[0]

I have tried another method to extract values and save into the datafram but I didn't get the desired output.

def get_instrument_params(ticker):
    instrument_name= re.findall(r'([A-Z]{2,10}|[A-Z]{1,6}&[A-Z]{1,6}|[A-Z]{1,6}-[A-Z]{1,6}|[A-Z]{1,6}-[0-9]{1,5}|[A-Z]{1,9}[0-9]{1,2}|[A-Z]{1,6}&[A-Z]{1,6}[0-9]{1,3}|[A-Z]{1,6}[0-9]{1,3})',ticker)
    year= re.findall(r'(09|10|11|12|13|14|15|16|17|18|19|20|21)',ticker)
    month= re.findall(r'(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)',ticker)
    strike_price = re.findall(r'([0-9]{1,5}|[0-9]{1,5}.[0-9]{1,3})',ticker)
    instrument_type = re.findall(r'(CA|PA|PE|CE)',ticker)
    return " ".join(instrument_name,year,month,strike_price,instrument_type)
all_files_tickers['instrument_name','year','month','strike_price','instrument_type']=all_files_tickers['ticker'].apply(lambda x: get_instrument_params(x))
print(all_files_tickers)

I need the desired output like below:

Ticker instrument_name year month strike_price instrument_type
ABAN10OCTFUT ABAN 10 OCT - FUT
ABAN10SEP700PA ABAN 10 SEP 700 PA
ABAN10SEP720PA ABAN 10 SEP 720 PA
ABAN10SEP740PA ABAN 10 SEP 740 PA
ABAN10SEP760PA ABAN 10 SEP 760 PA

Thanks



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source