import numpy as np
import decision_tree

# This is the same housing data we worked with in class. 
# The three columns are "homeowner", "marital status" and "income".  
# The label array (y) represents "defaulted borrower".

X = np.array([[1., 0., 120.],
              [0., 1., 100.],
              [1., 0., 70.],
              [0., 0., 150.],
              [1., 2., 85.],
              [0., 1., 80.],
              [0., 0., 75.]])

y = np.array([0, 0, 0, 1, 0, 1, 1])

# Instantiate a generator
split_gen = decision_tree.split_generator(X, y)

# Print the information associated with the first two splits:
print("FIRST SPLIT:")
print(next(split_gen))
print("\nSECOND SPLIT:")
print(next(split_gen))

# Now let's count to see if we get the expected number of splits:
counter = 0
for split in decision_tree.split_generator(X, y):
    counter += 1

print("\nThere are {} possible splits.".format(counter))

a = [1.0, 2.0, 3.0]
array = np.fromiter(a, dtype=float)
print(array)

from collections import Counter

def impurity(y, y_counts=None):
    """ Calculate Gini impurity for the class labels y.
        If y_counts is provided it will be the counts of the labels in y.
    """
    # YOUR CODE HERE
    raise NotImplementedError()

# TESTS FOR IMPURITY

np.testing.assert_allclose(impurity(y), 0.48979, atol=.001)

split_gen = decision_tree.split_generator(X, y)

split = next(split_gen)
np.testing.assert_allclose(impurity(split.y_left, split.counts_left), 0.375, atol=.001)
np.testing.assert_allclose(impurity(split.y_right, split.counts_right), 0, atol=.001)

def weighted_impurity(split):
    """ Weighted gini impurity for a possible split. """ 
    # YOUR CODE HERE
    raise NotImplementedError()

# TESTS FOR WEIGHTED IMPURITY

split_gen = decision_tree.split_generator(X, y)

split = next(split_gen)
np.testing.assert_allclose(weighted_impurity(split), 0.214286, atol=.001)
split = next(split_gen)
np.testing.assert_allclose(weighted_impurity(split), 0.47619, atol=.001)

Decision Tree Warmup Activity¶

Gini Impurity¶

Exercise 1¶

Split Generator for the Decision Tree PA¶

Exercise 2 - Implement Gini Impurity¶