from collections import Counterfrom math import logdefentropy(y): counter =Counter(y)# counter是键值数据对,键是y的取值,值是y取这个键个数据 res =0.0for num in counter.values(): p = num /len(y) res +=-p *log(p)return res
寻找熵最小的d和value
deftry_split(X,y): best_entropy =float('inf') best_d, best_v =-1,-1for d inrange(X.shape[1]):# 穷搜每一个维度 sorted_index = np.argsort(X[:,d])for i inrange(1, len(X)):# 对每个样本遍历,可选的域值为两个点之间的值if X[sorted_index[i-1], d]!= X[sorted_index[i], d]: v = (X[sorted_index[i-1], d]+ X[sorted_index[i], d]) /2 x_l, x_r, y_l, y_r =split(X, y, d, v) e =entropy(y_l)+entropy(y_r)if e < best_entropy: best_entropy, best_d, best_v = e, d, vreturn best_entropy, best_d, best_v