# 特征和标签的可取值范围:defH(y):sum=0# 计算y可取到的值 k =set(y)for ck in k: Pk = y[y==ck].shape[0]/ y.shape[0]if Pk !=0:sum-= Pk * np.log2(Pk)returnsumdefsvm(X,y,feature):# 计算X的每个特征可取到的值 a =set(X[:,feature])# 计算数据集的经验熵 HD =H(y)# 计算特征A对数据集D的经验条件熵H(D|A) HDA =0for value in a: yDi = y[X[:,feature]==value] HDA += yDi.shape[0]/y.shape[0]*H(yDi)return HD - HDA