import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Decision Trees
Decision trees are a machine learning algorithm that models decisions through a tree-like structure. Each node represents a decision based on a specific feature, and branches lead to possible outcomes. They’re used for classification and regression tasks.
Importing Libraries
Dataset
X_train
: For each example, contains 3 features:Ear Shape (1 if pointy, 0 otherwise)
Face Shape (1 if round, 0 otherwise)
Whiskers (1 if present, 0 otherwise)
y_train
: Whether the animal is a cat1 if the animal is a cat
0 otherwise
= np.array([[1, 1, 1],
X_train 0, 0, 1],
[0, 1, 0],
[1, 0, 1],
[1, 1, 1],
[1, 1, 0],
[0, 0, 0],
[1, 1, 0],
[0, 1, 0],
[0, 1, 0]])
[
= np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0]) y_train
Entropy Function
\[H(p_1) = -p_1 \text{log}_2(p_1) - (1- p_1) \text{log}_2(1- p_1)\]
def entropy(p):
if p == 0 or p == 1:
return 0
else:
return -p * np.log2(p) - (1 - p) * np.log2(1 - p)
= np.linspace(0.000, 1.000, 100)
p_values
= [entropy(p) for p in p_values]
entropy_values
=(8, 6))
plt.figure(figsize='Entropy', color='b')
plt.plot(p_values, entropy_values, label'p')
plt.xlabel('Entropy')
plt.ylabel('Entropy vs. p')
plt.title(True)
plt.grid(
plt.legend() plt.show()
Information Gain
\[ \text {Information Gain }= H(p^{\text{node}}) - (w^{\text{left}} H(p^{\text{left}}) + w^{\text{right}} H(p^{\text{right}})) \]
def split_indices(X, node_indices, feature):
= []
left_indices = []
right_indices
for i in node_indices:
if X[i][feature] == 1:
left_indices.append(i)else:
right_indices.append(i)
return left_indices, right_indices
def weighted_entropy(X, y, node_indices, index_feature):
= 0
weighted_entropy
= split_indices(X, node_indices, index_feature)
left_indices, right_indices = len(left_indices) / len(X)
w_left = len(right_indices) / len(X)
w_right =0
p_left=0
p_right
if len(left_indices)>0:
= sum(y[left_indices]) / len(left_indices)
p_left
if len(right_indices)>0:
= sum(y[right_indices]) / len(right_indices)
p_right = w_left * entropy(p_left) + w_right * entropy(p_right)
weighted_entropy
return weighted_entropy
def information_gain(X, y, node_indices, index_feature):
= X[node_indices], y[node_indices]
X_node, y_node = 0
information_gain
= sum(y_node[y_node == 1]) / len(y_node)
p_node = entropy(p_node)
node_entropy
= weighted_entropy(X, y, node_indices, index_feature)
weighted_entropy_temp = node_entropy - weighted_entropy_temp
information_gain
return information_gain
= [0, 3, 4, 5, 7]
node_indices
for i, feature_name in enumerate(['Ear Shape', 'Face Shape', 'Whiskers']):
= information_gain(X_train, y_train, node_indices, i)
i_gain print(f"Feature: {feature_name}, information gain if we split the root node using this feature: {i_gain:.2f}")
Feature: Ear Shape, information gain if we split the root node using this feature: 0.36
Feature: Face Shape, information gain if we split the root node using this feature: 0.72
Feature: Whiskers, information gain if we split the root node using this feature: 0.45
Making the Tree
def get_best_split(X, y, node_indices):
= X.shape[1]
num_features = -1
best_feature = 0
max_info_gain
for feature in range(num_features):
= information_gain(X, y, node_indices, feature)
info_gain if info_gain > max_info_gain:
= info_gain
max_info_gain = feature
best_feature
return best_feature
def build_tree_recursive(X, y, node_indices, branch_name, max_depth, current_depth, tree):
if current_depth == max_depth:
= " "*current_depth + "-"*current_depth
formatting print(formatting, "%s leaf node with indices" % branch_name, node_indices)
return
= get_best_split(X, y, node_indices)
best_feature
= "-"*current_depth
formatting print("%s Depth %d, %s: Split on feature: %d" % (formatting, current_depth, branch_name, best_feature))
= split_indices(X, node_indices, best_feature)
left_indices, right_indices
tree.append((left_indices, right_indices, best_feature))
"Left", max_depth, current_depth+1, tree)
build_tree_recursive(X, y, left_indices, "Right", max_depth, current_depth+1, tree)
build_tree_recursive(X, y, right_indices, return tree
= []
tree 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], "Root", max_depth=2, current_depth=0, tree=tree) build_tree_recursive(X_train, y_train, [
Depth 0, Root: Split on feature: 0
- Depth 1, Left: Split on feature: 1
-- Left leaf node with indices [0, 4, 5, 7]
-- Right leaf node with indices [3]
- Depth 1, Right: Split on feature: 2
-- Left leaf node with indices [1]
-- Right leaf node with indices [2, 6, 8, 9]
[([0, 3, 4, 5, 7], [1, 2, 6, 8, 9], 0),
([0, 4, 5, 7], [3], 1),
([1], [2, 6, 8, 9], 2)]