import numpy as np
import matplotlib.pyplot as plt
import math
Linear Regression With Multiple Variable
Libraries Required
Dataset
= np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
x_train = np.array([460, 232, 178])
y_train = -100
b = np.array([10, -10, 50, -20]) w
Finding Function f_wb
\[ f_{\mathbf{w},b}(\mathbf{x}) = w_0x_0 + w_1x_1 +... + w_{n-1}x_{n-1} + b \] \[ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + b \]
def fxn(x, w, b):
= np.dot(x, w) + b
f_wb
return f_wb
Finding Cost Function
\[J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \]
def cost_fxn(x, y, w, b):
= x.shape[0]
m
= fxn(x, w, b)
f_wb = (f_wb - y) ** 2
cost = (1 / (2 * m)) * np.sum(cost)
total_cost
return total_cost
Finding dJ/dw and dJ/db
\[ \frac{\partial J(\mathbf{w},b)}{\partial w_j} = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \\ \frac{\partial J(\mathbf{w},b)}{\partial b} = \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \\ \]
def compute_gradient(x, y, w, b):
= x.shape[0]
m = fxn(x, w, b) - y
a = (np.dot(a, x))/m
dj_dw = np.sum(a)/m
dj_db return dj_dw, dj_db
Gradient Descent
\[\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\; & w_j = w_j - \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \; & \text{for j = 0..n-1}\newline &b\ \ = b - \alpha \frac{\partial J(\mathbf{w},b)}{\partial b} \newline \rbrace \end{align*}\]
def gradient_descent(x, y, w, b, alpha, num_iters):
= []
J_history = []
p_history
for i in range(num_iters + 1):
= compute_gradient(x, y, w, b)
dj_dw, dj_db = w - alpha * dj_dw
w = b - alpha * dj_db
b
= cost_fxn(x, y, w, b)
cost
J_history.append(cost)
p_history.append((w, b))
if i % math.ceil(num_iters / 10) == 0:
print(f"Iteration {i:4}: Cost {cost:.2e}, w: {w}, b: {b}")
return w, b, J_history, p_history
= 100000
iterations = 5.0e-7
tmp_alpha
= gradient_descent(
w_final, b_final, J_hist, p_hist
x_train, y_train, w, b, tmp_alpha, iterations)print(f"(w,b) found by gradient descent: {w_final},{b_final}")
= fxn(x_train, w_final, b_final)
f_wb print("Cost is", cost_fxn(x_train, y_train, w_final, b_final))
= plt.subplots(figsize=(6, 4))
fig, ax 1000 + np.arange(len(J_hist[1000:])), J_hist[1000:])
ax.plot("Cost vs. iteration (end)")
ax.set_title('Cost')
ax.set_ylabel('iteration step')
ax.set_xlabel( plt.show()
Iteration 0: Cost 3.85e+06, w: [ -1.03240533 -10.02538967 49.99110867 -20.27834 ], b: -100.00670833333334
Iteration 10000: Cost 8.71e+03, w: [ 0.58117979 -10.02160299 50.21100127 -13.2564183 ], b: -99.75760289212182
Iteration 20000: Cost 4.63e+03, w: [ 0.467721 -10.00429679 50.3020335 -8.78158635], b: -99.59764948351305
Iteration 30000: Cost 2.95e+03, w: [ 0.39492336 -9.97570356 50.311448 -5.91048217], b: -99.49414563632797
Iteration 40000: Cost 2.26e+03, w: [ 0.34819099 -9.93989568 50.26860068 -4.06742547], b: -99.42682903727315
Iteration 50000: Cost 1.97e+03, w: [ 0.31816743 -9.89948366 50.19230962 -2.88338545], b: -99.38271084836177
Iteration 60000: Cost 1.85e+03, w: [ 0.29885486 -9.85614089 50.09463778 -2.12180075], b: -99.35346477833464
Iteration 70000: Cost 1.80e+03, w: [ 0.28640849 -9.81094006 49.98331786 -1.63102743], b: -99.33375338803842
Iteration 80000: Cost 1.77e+03, w: [ 0.27836366 -9.76456876 49.86330674 -1.3138565 ], b: -99.32015519486049
Iteration 90000: Cost 1.76e+03, w: [ 0.27314044 -9.71746776 49.73778185 -1.10797371], b: -99.31047690994058
Iteration 100000: Cost 1.75e+03, w: [ 0.26972607 -9.66991956 49.60877999 -0.97343345], b: -99.3033125586677
(w,b) found by gradient descent: [ 0.26972607 -9.66991956 49.60877999 -0.97343345],-99.3033125586677
Cost is 1753.0909002940225