import numpy as np
from sklearn.datasets import fetch_openml
mnist_data = fetch_openml("mnist_784")
X, y = mnist_data['data'], mnist_data['target'] # X.shape = (70000, 784)
X_train = np.array(X[:60000], dtype=float)
y_train = np.array(y[:60000], dtype=float)
X_test = np.array(X[60000:], dtype=float)
y_test = np.array(y[60000:], dtype=float)
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
%time knn_clf.fit(X_train, y_train) # Wall time: 1min 3s
%time knn_clf.score(X_test, y_test) # Wall time = 15min 43, score = 0.9688
from sklearn.decomposition import PCA
pca = PCA(0.9)
pca.fit(X_train)
X_train_reduction = pca.transform(X_train) # X_train_reductionduction.shape = (60000, 784)
knn_clf = KNeighborsClassifier()
%time knn_clf.fit(X_train_reduction, y_train) # Wall time: 15.6 s
X_test_reduction = pca.transform(X_test)
%time knn_clf.score(X_test_reduction, y_test) # Wall time = 2min 20s, score = 0.8728