from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
Principal Component Analysis ( P.C.A. )
PCA reduces data dimensions by finding key patterns through orthogonal axes (principal components), simplifying complexity while retaining essential information.
Import Libraries
Data
= np.array([[1, 1], [2, 1], [3, 2], [-1, -1], [-2, -1], [-3, -2]])
X 0], X[:, 1], 'ro')
plt.plot(X[:, plt.show()
PCA (1 component)
= PCA(n_components=1)
pca pca.fit(X)
PCA(n_components=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA(n_components=1)
sum(pca.explained_variance_ratio_)
0.9924428900898052
= pca.transform(X)
X_transformed print(X_transformed)
[[ 1.38340578]
[ 2.22189802]
[ 3.6053038 ]
[-1.38340578]
[-2.22189802]
[-3.6053038 ]]
= pca.inverse_transform(X_transformed)
X_reduced print(X_reduced)
0], X_reduced[:, 1], 'ro')
plt.plot(X_reduced[:, plt.show()
[[ 1.15997501 0.75383654]
[ 1.86304424 1.21074232]
[ 3.02301925 1.96457886]
[-1.15997501 -0.75383654]
[-1.86304424 -1.21074232]
[-3.02301925 -1.96457886]]
PCA (2 component)
= PCA(n_components=2)
pca pca.fit(X)
PCA(n_components=2)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
PCA(n_components=2)
sum(pca.explained_variance_ratio_)
1.0
= pca.transform(X)
X_transformed print(X_transformed)
[[ 1.38340578 0.2935787 ]
[ 2.22189802 -0.25133484]
[ 3.6053038 0.04224385]
[-1.38340578 -0.2935787 ]
[-2.22189802 0.25133484]
[-3.6053038 -0.04224385]]
My PCA
def My_PCA(X, k):
= (X - np.mean(X, axis=0))
X_std
= np.cov(X_std.T)
cov_mat
= np.linalg.eig(cov_mat)
eig_vals, eig_vecs
= eig_vecs[:, np.argsort(eig_vals)[::-1]]
eigenvectors
= eigenvectors[:, :k]
pca_mat
= np.dot(X_std, pca_mat)
pca
return pca
print(My_PCA(X, 1))
[[ 1.38340578]
[ 2.22189802]
[ 3.6053038 ]
[-1.38340578]
[-2.22189802]
[-3.6053038 ]]