'How do I implement tree depth control on my current decision tree?

How do I implement max_depth on my current tree algorithm? I am currently working on a classifier for mushroom-related data "agaricus-lepiota.data". I want to set the stopping depth of my tree to 3 so it wouldn't further splitting. Here is my code:

import pandas as pd
import numpy as np

eps = np.finfo(float).eps
from numpy import log2 as log
df = pd.read_csv('agaricus-lepiota.data', header=None)
df.columns = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22"]
del df['11']

def find_entropy(df):
    Class = df.keys()[0]
    entropy = 0
    values = df[Class].unique()
    for value in values:
        fraction = df[Class].value_counts()[value]/len(df[Class])
        entropy += -fraction*np.log2(fraction)
    return entropy

def find_entropy_attribute(df,attribute):
    Class = df.keys()[0]   
    target_variables = df[Class].unique() 
    variables = df[attribute].unique() 
    entropy2 = 0
    for variable in variables:
        entropy = 0
        for target_variable in target_variables:
            num = len(df[attribute][df[attribute]==variable][df[Class] ==target_variable])
            den = len(df[attribute][df[attribute]==variable])
            fraction = num/(den+eps)
            entropy += -fraction*log(fraction+eps)
        fraction2 = den/len(df)
        entropy2 += -fraction2*entropy
    return abs(entropy2)

def find_winner(df):
    Entropy_att = []
    IG = []
    for key in df.keys()[1:]:
        IG.append(find_entropy(df)-find_entropy_attribute(df,key))
    return df.keys()[1:][np.argmax(IG)] 

def get_subtable(df, node, value):
    return df[df[node] == value].reset_index(drop=True)

def buildTree(df,tree=None): 
    Class = df.keys()[0]   
    node = find_winner(df)
    attValue = np.unique(df[node])  
    if tree is None:                    
        tree={}
        tree[node] = {}
    for value in attValue:
        subtable = get_subtable(df,node,value)
        clValue,counts = np.unique(subtable['0'],return_counts=True)                        
        if len(counts)==1:
            tree[node][value] = clValue[0]                                                    
        else:        
            tree[node][value] = buildTree(subtable) #Calling the function recursively               
    return tree

train_df = df.iloc[:6093,:]
test_df = df.iloc[6093:,:]

tree= buildTree(train_df)
import pprint
pprint.pprint(tree)

And here is my current decision tree. As you can see there, the depth of my tree is >3. How do I set the max depth of my tree to 3?

{'5': {'a': 'e',

       'c': 'p',
       'f': 'p',
       'l': 'e',
       'n': {'20': {'h': 'e',
                    'k': 'e',
                    'n': 'e',
                    'r': 'p',
                    'w': {'22': {'d': 'p',
                                 'g': 'e',
                                 'l': {'3': {'c': 'e', 'n': 'e', 'w': 'p'}},
                                 'w': 'e'}}}},
       'p': 'p',
       's': 'p',
       'y': 'p'}}

I could provide the data if you needed



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source