def multi(y):
ySet = set(y)
bestCount = 0
for yi in ySet:
count = y.count(yi)
if count > bestCount:
bestCount = count
bestyi = yi
return bestyi
def ID3(X, y, epsilon):
# 若D中所有实例属于同一类
if len(set(y))==1:
# 将类$$C_k$$作为该结点的类标记
return y[0]
# 若$$A=\emptyset$$
if X.shape[1] == 0:
# 实例数最大的类$$C_k$$作为该结点的类标记
return multi(y)
bestInfo = 0
# 计算A中各个特征对D的信息增益
for feature in range(X.shape[1]):
info = svm(X, y, feature)
# 选择信息特征最大的Ag
if svm(X, y, feature) > bestInfo:
bestInfo = info
bestfeature = feature
# 如果Ag的信息增益小于阈值$$\epsilon$$
if bestInfo < epsilon:
# 将D中实例数最大的类$$C_k$$作为该结点的类标记
return multi(y)
feature = bestfeature
ret = {'feature':feature}
# 对Ag的每一个可能的值ai
a = set(X[:, feature])
for ai in a:
yai = y[X[:,feature] == ai]
Xai = X[X[:,feature] == ai]
ret[ai] = ID3(Xai, yai, epsilon)
return ret