import numpy as np
import matplotlib.pyplot as plt
np.random.seed(666)
x = 2 * np.random.random(size=100)
y = x * 3. + 4. + np.random.normal(size=100)
X = x.reshape(-1, 1)
plt.scatter(x, y)
plt.show()
def dJ(theta, X_b, y):
return X_b.T.dot(X_b.dot(theta)-y) * 2. / len(X_b)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=666)
lin_reg1 = LinearRegression()
lin_reg1.fit_normal(X_train, y_train) # 见5-4
lin_reg1.score(X_test, y_test)
lin_reg2 = LinearRegression()
lin_reg2.fit_gd(X_train, y_train)
lin_reg2.score(X_test, y_test)
真实数据 + 梯度下降法 + eta=0.000001
lin_reg2 = LinearRegression()
lin_reg2.fit_gd(X_train, y_train, eta = 0.000001)
lin_reg2.score(X_test, y_test)
真实数据 + 梯度下降法 + eta=0.000001 + n_iters=1e6
lin_reg2 = LinearRegression()
lin_reg2.fit_gd(X_train, y_train, eta = 0.000001, n_iters=1e6)
lin_reg2.score(X_test, y_test)
from sklearn.preprocessing import StandardScaler
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train_standard = standardScaler.transform(X_train)
X_test_standard = standardScaler.transform(X_test)
lin_reg3 = LinearRegression()
lin_reg3.fit_gd(X_train_standard, y_train)
lin_reg3.score(X_test_standard, y_test)