import numpy as np
import matplotlib.pyplot as plt
import math
Linear Regression With One Variable
Libraries Required
Some Plotting Fxn
def plt_house_x(X, y, f_wb=None, ax=None):
='x', c='r', label="Actual Value")
ax.scatter(X, y, marker"Y-data")
ax.set_ylabel("X-data")
ax.set_xlabel(="Our Prediction")
ax.plot(X, f_wb, label
ax.legend()
def mk_cost_lines(x, y, w, b, ax):
= 'Cost of point'
label = False
addedbreak for p in zip(x, y):
= w*p[0]+b
f_wb_p = ((f_wb_p - p[1])**2)/2
c_p = c_p
c_p_txt 0], p[1], f_wb_p, lw=3, ls='dotted', label=label)
ax.vlines(p[= ''
label = [p[0], p[1] + (f_wb_p-p[1])/2]
cxy f'{c_p_txt:0.0f}', xy=cxy, xycoords='data',
ax.annotate(=(5, 0), textcoords='offset points')
xytext
def plt_stationary(x_train, y_train, w0, b):
= plt.subplots(1, 2, figsize=(9, 8))
fig, ax = 'top'
fig.canvas.toolbar_position
= np.array([200-300., 200+300])
w_range = np.array([50-300., 50+300])
b_range = np.linspace(*b_range, 100)
b_space = np.linspace(*w_range, 100)
w_space
= np.meshgrid(b_space, w_space)
tmp_b, tmp_w = np.zeros_like(tmp_b)
z for i in range(tmp_w.shape[0]):
for j in range(tmp_w.shape[1]):
= cost_fxn(x_train, y_train, tmp_w[i][j], tmp_b[i][j])
z[i, j] if z[i, j] == 0:
= 1e-6
z[i, j]
= np.dot(x_train, w0) + b
f_wb 0])
mk_cost_lines(x_train, y_train, w0, b, ax[=f_wb, ax=ax[0])
plt_house_x(x_train, y_train, f_wb
= ax[1].contour(tmp_w, tmp_b, np.log(z), levels=12,
CS =2, alpha=0.7)
linewidths1].set_title('Cost(w,b) [Contour Plot]')
ax[1].set_xlabel('w', fontsize=10)
ax[1].set_ylabel('b', fontsize=10)
ax[1].set_xlim(w_range)
ax[1].set_ylim(b_range)
ax[= ax[1].scatter(w0, b, s=100, zorder=10,)
cscat = ax[1].hlines(b, ax[1].get_xlim()[0], w0,
chline =4, ls='dotted')
lw= ax[1].vlines(w0, ax[1].get_ylim()[0], b,
cvline =4, ls='dotted')
lw
fig.tight_layout()return fig, ax, [cscat, chline, cvline]
def soup_bowl(x_train, y_train, w0=200, b0=-100):
= plt.figure(figsize=(10, 10))
fig
= fig.add_subplot(111, projection='3d')
ax 1.0, 1.0, 1.0, 0.0))
ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.zaxis.set_pane_color((False)
ax.zaxis.set_rotate_label(30, -30)
ax.view_init(
= np.linspace(-100, 300, 100)
w = np.linspace(-200, 300, 100)
b
= np.zeros((len(w), len(b)))
z for i in range(len(w)):
for j in range(len(b)):
= cost_fxn(x_train, y_train, w[i], b[j])
z[i, j]
= np.meshgrid(w, b)
W, B
="Spectral_r", alpha=0.7, antialiased=False)
ax.plot_surface(W, B, z, cmap='k', alpha=0.1)
ax.plot_wireframe(W, B, z, color"$w$")
ax.set_xlabel("$b$")
ax.set_ylabel("$J(w,b)$", rotation=90)
ax.set_zlabel("$J(w,b)$", size=15)
ax.set_title(= ax.scatter(w0, b0, s=100, color='red')
cscat
plt.show()
Dataset
= np.array([1.0, 1.7, 2.0, 2.5, 3.0, 3.2])
x_train = np.array([250, 300, 480, 430, 630, 730])
y_train = 200
w = -100 b
Finding Function f_wb
\[ f_{w,b}(x^{(i)}) = wx^{(i)} + b \]
def fxn(x, w, b):
= w * x + b
f_wb
return f_wb
Finding Cost Function
\[J(w,b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})^2\]
def cost_fxn(x, y, w, b):
= x.shape[0]
m
= fxn(x, w, b)
f_wb = (f_wb - y) ** 2
cost = (1 / (2 * m)) * np.sum(cost)
total_cost
return total_cost
Some Plots ->
Original w, b
= plt_stationary(x_train, y_train, w, b)
fig, ax, dyn_items soup_bowl(x_train, y_train, w, b)
Finding dJ/dw and dJ/db
\[ \frac{\partial J(w,b)}{\partial w} = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)})x^{(i)} \\ \frac{\partial J(w,b)}{\partial b} = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{w,b}(x^{(i)}) - y^{(i)}) \\ \]
def compute_gradient(x, y, w, b):
= x.shape[0]
m = fxn(x, w, b) - y
a = (np.dot(a, x))/m
dj_dw = np.sum(a)/m
dj_db return dj_dw, dj_db
Gradient Descent
\[\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline \; w &= w - \alpha \frac{\partial J(w,b)}{\partial w} \; \newline b &= b - \alpha \frac{\partial J(w,b)}{\partial b} \newline \rbrace \end{align*}\] where, parameters \(w\), \(b\) are updated simultaneously.
def gradient_descent(x, y, w, b, alpha, num_iters):
= []
J_history = []
p_history
for i in range(num_iters + 1):
= compute_gradient(x, y, w, b)
dj_dw, dj_db -= alpha * dj_dw
w -= alpha * dj_db
b
= cost_fxn(x, y, w, b)
cost
J_history.append(cost)
p_history.append((w, b))
if i % math.ceil(num_iters / 10) == 0:
print(f"Iteration {i:4}: Cost {cost:.2e}, w: {w}, b: {b}")
return w, b, J_history, p_history
= 10000
iterations = 1.0e-2
tmp_alpha
= gradient_descent(
w_final, b_final, J_hist, p_hist
x_train, y_train, w, b, tmp_alpha, iterations)print(f"(w,b) found by gradient descent: {w_final},{b_final}")
= fxn(x_train, w_final, b_final)
f_wb print("Cost is", cost_fxn(x_train, y_train, w_final, b_final))
= plt.subplots(figsize=(6, 4))
fig, ax 1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
ax.plot("Cost vs. iteration (end)")
ax.set_title('Cost')
ax.set_ylabel('iteration step')
ax.set_xlabel( plt.show()
Iteration 0: Cost 8.46e+03, w: 202.80833333333334, b: -98.76666666666667
Iteration 1000: Cost 1.80e+03, w: 223.52137552092964, b: -32.28296188836681
Iteration 2000: Cost 1.75e+03, w: 215.18089273879156, b: -11.838434218472974
Iteration 3000: Cost 1.74e+03, w: 211.75363820423198, b: -3.4374097221896087
Iteration 4000: Cost 1.74e+03, w: 210.3453176127893, b: 0.014722492685807966
Iteration 5000: Cost 1.74e+03, w: 209.76661332681294, b: 1.4332657708600276
Iteration 6000: Cost 1.74e+03, w: 209.5288133168987, b: 2.0161707428604965
Iteration 7000: Cost 1.74e+03, w: 209.43109701154987, b: 2.255696890289678
Iteration 8000: Cost 1.74e+03, w: 209.39094362242898, b: 2.3541224966202314
Iteration 9000: Cost 1.74e+03, w: 209.37444387193037, b: 2.394567350284724
Iteration 10000: Cost 1.74e+03, w: 209.3676638273943, b: 2.4111868688115714
(w,b) found by gradient descent: 209.3676638273943,2.4111868688115714
Cost is 1735.8832116012204
Some Plots ->
Final w, b
= plt_stationary(x_train, y_train, w_final, b_final)
fig, ax, dyn_items soup_bowl(x_train, y_train, w_final, b_final)
Regularized Linear Regression
Finding Cost Fxn
\[J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 + \frac{\lambda}{2m} \sum_{j=0}^{n-1} w_j^2 \]
def cost_fxn_regularized(X, y, w, b, lambda_=1):
= cost_fxn(X, y, w, b)
cost += np.sum(w ** 2)
cost return cost
Finding dJ/dw and dJ/db
\[\begin{align*} \frac{\partial J(\mathbf{w},b)}{\partial w_j} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} + \frac{\lambda}{m} w_j \\ \frac{\partial J(\mathbf{w},b)}{\partial b} &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \end{align*}\]
def compute_gradient_regularized(X, y, w, b, lambda_):
= X.shape[0]
m = compute_gradient(X, y, w, b)
dj_dw, dj_db
+= (lambda_ / m) * w
dj_dw
return dj_db, dj_dw