random forest from scratch python github

import numpy as np
from collections import Counter

#implement decision tree
def entropy(d):
    c = np.bincount(d)
    c = c[c != 0]
    prop = c/len(d)
    E = - prop*np.log2(prop)
    return np.sum(E)

class Node:
    def __init__(self, beast_split_feature = None, threashold = None, left = None,right = None,*,value = None):
        self.beast_split_feature = beast_split_feature
        self.threashold  = threashold
        self.left = left
        self.right = right
        self.value = value
    def is_leaf(self):
        return self.value is not None



class Tree:
    def __init__(self, min_split_sample = 2, max_depth = 100, n_feat = None):
        self.min_split_sample = min_split_sample
        self.max_depth = max_depth
        self.n_feat = n_feat
        self.root = None

    def fit(self, X,y):
        _, n_features = X.shape
        self.n_feat = n_features if  not self.n_feat else min(n_features, self.n_feat)
        self.root = self._grow_tree(X,y)


    def predict(self, X):
        return np.array([self._trav(x, self.root) for x in X])
    
    def _trav(self, x, node):
        if node.is_leaf():
            return node.value
        elif x[node.beast_split_feature] <= node.threashold:
            return self._trav(x, node.left)
        return self._trav(x, node.right)


    def _grow_tree(self, X, y, depth = 0):
        _, n_features = X.shape
        n_classes = len(np.unique(y))

        #stopping cratiria
        if (depth >= self.max_depth or n_classes <= self.min_split_sample
        or n_classes == 1):
            leaf_val = self._most_commen_val(y)
            return Node(value=leaf_val)

        feat_idx = np.random.choice(n_features, self.n_feat, replace=False)

        best_threash, best_feat = self._best_criteria(X, y, feat_idx)
        left_idx, right_idx = self._split(X[:, best_feat] ,best_threash)

        righTree = self._grow_tree(X[right_idx, :], y[right_idx], depth + 1)
        lefTree = self._grow_tree(X[left_idx, :], y[left_idx], depth + 1)
        return Node(best_feat, best_threash, lefTree, righTree)



    
    def _best_criteria(self, X, y, feat_idxs):
        best_gian = -1
        split_idx, split_thrishold = None, None

        for feat_idx in feat_idxs:
            X_cloumnd = X[:, feat_idx]
            thrisholds = np.unique(X_cloumnd)
            for thrishold in thrisholds:
                gain = self._info_gain(y, X_cloumnd, thrishold)

                if gain > best_gian:
                    best_gian = gain
                    split_idx = feat_idx
                    split_thrishold = thrishold
        return split_thrishold, split_idx
    
    def _info_gain(self, y, X_cloumnd, thrishold):
        #perant entropy
        perant_entropy = entropy(y)
        #split
        left_idx, right_idx = self._split(X_cloumnd, thrishold)
        if len(left_idx) ==0  or len(right_idx) ==0:
            return 0
        left_label = y[left_idx]
        right_label =  y[right_idx]
        l_n, r_n = len(left_label), len(right_label)
        child_entropy = (l_n/len(y))*entropy(left_label) + (r_n/len(y))*entropy(right_label)

        #wighted avg
        return perant_entropy - child_entropy
    
    def _split(self, X_cloumnd, thrishold):
        left = np.argwhere(X_cloumnd < thrishold).flatten()
        right = np.argwhere(X_cloumnd >= thrishold).flatten()
        return left, right
    
    def _most_commen_val(self, y):
        c = Counter(y)
        most = c.most_common(1)
        return most[0][0]

# implement Random Forest
def bootstrab(X, y):
    n_sample = X.shape[0]
    idexs = np.random.choice(n_sample,n_sample, replace=True)
    return X[idexs], y[idexs]

def most_commen_val(y):
    c = Counter(y)
    most = c.most_common(1)
    return most[0][0]

class RandomForst:

    def __init__(self, num_tree = 100, min_split_sample = 2, max_depth = 100, n_feat = None):
        self.num_tree = num_tree
        self.min_split_sample = min_split_sample
        self.max_depth = max_depth
        self.n_feat = n_feat

    def fit(self, X, y):
        self.tree = []
        for _ in range(self.num_tree):
            tree = Tree(self.min_split_sample, self.max_depth, self.n_feat)
            x_sample, y_sample  = bootstrab(X, y)
            tree.fit(x_sample, y_sample)
            self.tree.append(tree)
        
    def predict(self, X):
        tree_predict = np.array([tree.predict(X) for tree in self.tree])
        tree_predict = np.swapaxes(tree_predict, 0, 1)
        y_pred = [most_commen_val(y) for y in tree_predict]
        return y_pred

Posted by: Guest on May-23-2021

Source

Code answers related to "random forest from scratch python github"

Code answers related to "Python"

Browse Popular Code Answers by Language

Answers for "random forest from scratch python github"

Code answers related to "random forest from scratch python github"

Code answers related to "Python"

Python Answers by Framework

Browse Popular Code Answers by Language

Popular Programming Languages

Advertisements

Company

Compilers

Help

Connect with us