from sklearn.preprocessing import StandardScalerstandardScaler =StandardScaler()standardScaler.fit(X_train)X_train_standard = standardScaler.transform(X_train)X_test_standard = standardScaler.transform(X_test)
StandardScaler + KNN + accuracy
from sklearn.neighbors import KNeighborsClassifierknn_clf =KNeighborsClassifier(n_neighbors=3)knn_clf.fit(X_train_standard, y_train)knn_clf.score(X_test_standard, y_test)
自己实现StandardScaler并封装成类
import numpy as npclassStandardScaler:def__init__(self): self.mean_ =None self.scale_ =Nonedeffit(self,X):"""根据训练数据集X获取数据的均值和方差"""assert X.ndim ==2,"The dimension of X must be 2" self.mean_ = np.array([np.mean(X[:,i]) for i inrange(X.shape[1])]) self.scale_ = np.array([np.std(X[:,i]) for i inrange(X.shape[1])])deftransform(self,X):"""将X根据这个StandardScaler进行均值方差归一化处理"""assert X.ndim ==2,"The dimension of X must be 2"assert self.mean_ isnotNoneand self.scale_ isnotNone,"must fit before transform!" retX = np.empty(shape = X.shape, type =float)for col inrange(X.shape[1]): retX[:, col]= (X[:,col]- self.mean_[col]) / self.scale_[col]return retX