quantitative analysis/statistics
Correlation and Covariance Analysis
ecstasy
2023. 5. 8. 12:15
Linear Algebra Summary
Linear map
Linearity
$${\displaystyle \begin{aligned} \text{Additivity:}&& f(x+y) &= f(x) + f(y)\\ \text{Homogeneity:}&& f(\alpha x)&= \alpha f(x)\\ \end{aligned} }$$Independence
$${\displaystyle \text{Multiplicativity:}\qquad f(xy) = f(x)f(y) }$$
Matrix Operation
import numpy as np
import torch
import tensorflow as tf
np.einsum('ij,ij->ij', np.eye(3), np.eye(3))
torch.einsum('ij,ij->ij', [torch.eye(3), torch.eye(3)])
tf.einsum('ij,ij->ij', tf.eye(3), tf.eye(3))
Matrix Decomposition
Matrix Types: Invertible / Definite / Symmetric / Normal / Orthogonal / Triangular / Diagonal / Identity
Decomposition: LU&Cholesky, QR / SVD, EigenDecomposition
cholesky decomposition
import numpy as np
u = np.random.normal(size=(3,3))
sigma_u = u.T@u # positive definite
P = np.linalg.cholesky(sigma_u)
np.dot(P, P.T.conj()) # sigma_u = PP*
eigen decomposition
$${\displaystyle \begin{aligned} T(\mathbf {v} ) &=\lambda \mathbf {v}, \quad \mathbf{v}_{i}:\text{ eigen vector}, \quad \lambda_{i} :\text{ eigen value} \\ \mathbf{A}\mathbf{x} &= \lambda \mathbf{x} \\ \frac{d}{dx} e^{\lambda x} &=\lambda e^{\lambda x} \\ \end{aligned} }$$ $${\displaystyle \begin{aligned} \mathbf{A} &= \mathbf{Q} \boldsymbol{\Lambda} \mathbf{Q}^{-1}, \quad \mathbf{Q}={\begin{bmatrix}\mathbf {v} _{1}&\mathbf {v} _{2}&\cdots &\mathbf {v} _{n}\end{bmatrix}} \\ \operatorname {tr} (\mathbf{A}) &=\sum _{i=1}^{n}a_{ii}=\sum _{i=1}^{n}\lambda _{i}= \lambda _{1} +\lambda _{2}+\cdots +\lambda _{n} \\ \det(\mathbf{A}) &=\prod _{i=1}^{n}\lambda _{i}=\lambda _{1}\lambda _{2}\cdots \lambda _{n} \\ \end{aligned} }$$import numpy as np
u = np.random.normal(size=(3,3))
sigma_u = u.T@u # positive definite
eigen_values, eigen_vectors = np.linalg.eig(sigma_u) # eigen_values[i], eigen_vectors[:, i]
Q = eigen_vectors
L = np.diag(eigen_values)
R = np.linalg.inv(Q)
# reconstruction
Q@L@R # ~ sigma_u = QLR
eigen_values.sum() # np.diag(sigma_u).sum()
eigen_values.prod() # np.linalg.det(sigma_u)
singular value decomposition
$${\displaystyle \begin{aligned} T(\mathbf {V} _{i})=\sigma _{i}\mathbf {U} _{i},\qquad i&=1,\ldots ,\min(m,n), \\ T &\colon \left\{ {\begin{aligned} K^{n}&\to K^{m}\\ x&\mapsto \mathbf {M} x \end{aligned}} \right. \\ \end{aligned} }$$ $${\displaystyle \begin{aligned} \mathbf{M} &= \mathbf{U}\boldsymbol{\Sigma}\mathbf{V}^{\dagger}, \quad \mathbf{U}\mathbf{U}^{\dagger}= \mathbf{I}_{m} \; \And \; \mathbf{V}\mathbf{V}^{\dagger}= \mathbf{I}_{n} \\ \mathbf{M}^{\dagger}\mathbf{M} &= \mathbf{V}\boldsymbol{\Sigma}^{\dagger}\mathbf{U}^{\dagger}\mathbf{U}\boldsymbol{\Sigma}\mathbf{V}^{\dagger} = \mathbf{V}(\boldsymbol{\Sigma}^{\dagger}\boldsymbol{\Sigma})\mathbf{V}^{\dagger} \\ \mathbf{M}\mathbf{M}^{\dagger} &= \mathbf{U}\boldsymbol{\Sigma}\mathbf{V}^{\dagger}\mathbf{V}\boldsymbol{\Sigma}^{\dagger}\mathbf{U}^{\dagger} = \mathbf{U}(\boldsymbol{\Sigma}\boldsymbol{\Sigma}^{\dagger})\mathbf{U}^{\dagger} \\ \end{aligned} }$$import numpy as np
u = np.random.normal(size=(3,3))
sigma_u = u.T@u # positive definite
U, s, V = np.linalg.svd(sigma_u, full_matrices=False)
S = np.diag(s) # s**2 / (sigma_u.shape[0] - 1): eigen values
U@S@V # sigma_u = USV, V: eigen vectors; V[i, :]
Covariance
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(30, 10)); gs = mpl.gridspec.GridSpec(1, 4)
# Cross
mean_vector = [0,0,0]
cov_matrix = [[1,0,0], [0,2,0], [0,0,3]]
y = np.random.multivariate_normal(mean_vector, cov_matrix, size=1000)
sns.heatmap(pd.DataFrame(y).cov(), cmap="RdBu", cbar_kws={"shrink": 0.5}, square=True, center=0, linewidth=.5, ax=fig.add_subplot(gs[0,0])).set_title('Cross-Covariance')
sns.heatmap(pd.DataFrame(y).cov(), cmap="RdBu", cbar_kws={"shrink": 0.5}, square=True, center=0, linewidth=.5, ax=fig.add_subplot(gs[0,1])).set_title('Cross-Correlation')
# Auto
y = np.random.normal(0, 1, size=1000)
sns.heatmap(pd.DataFrame(np.c_[[np.roll(y, shift=i)[30:] for i in range(30)]].T).cov(), cmap="RdBu", cbar_kws={"shrink": 0.5}, square=True, center=0, linewidth=.5, ax=fig.add_subplot(gs[0,2])).set_title('Auto-Covariance')
sns.heatmap(pd.DataFrame(np.c_[[np.roll(y, shift=i)[30:] for i in range(30)]].T).cov(), cmap="RdBu", cbar_kws={"shrink": 0.5}, square=True, center=0, linewidth=.5, ax=fig.add_subplot(gs[0,3])).set_title('Auto-Correlation')
Covariance
$${\displaystyle \begin{aligned} \operatorname {cov} (X,Y) & \equiv \operatorname {E} {{\big [}(X-\operatorname {E} [X])(Y-\operatorname {E} [Y]){\big ]}} \\ \; &=\sum _{i=1}^{n}p_{i}(x_{i}-E(X))(y_{i}-E(Y)) \\ \end{aligned} }$$ $${\displaystyle {\begin{aligned} \operatorname {cov} (X,Y)&=\operatorname {E} \left[\left(X-\operatorname {E} \left[X\right]\right)\left(Y-\operatorname {E} \left[Y\right]\right)\right]\\&=\operatorname {E} \left[XY-X\operatorname {E} \left[Y\right]-\operatorname {E} \left[X\right]Y+\operatorname {E} \left[X\right]\operatorname {E} \left[Y\right]\right]\\&=\operatorname {E} \left[XY\right]-\operatorname {E} \left[X\right]\operatorname {E} \left[Y\right]-\operatorname {E} \left[X\right]\operatorname {E} \left[Y\right]+\operatorname {E} \left[X\right]\operatorname {E} \left[Y\right]\\&=\operatorname {E} \left[XY\right]-\operatorname {E} \left[X\right]\operatorname {E} \left[Y\right] \end{aligned}} }$$Covariance Matrix
$${\displaystyle \begin{aligned} \text{auto-covariance matrix} \quad&& \operatorname {K} _{\mathbf {X} \mathbf {X} } &=\operatorname {cov} (\mathbf {X} ,\mathbf {X} ) \\ \; && \; &{\stackrel {\mathrm {def} }{=}}\ \operatorname {E} [(\mathbf {X} -\operatorname {E} [\mathbf {X} ])(\mathbf {X} -\operatorname {E} [\mathbf {X} ])^{\rm {T}}] \\ \; && \; &= \underbrace{\operatorname {E}[\mathbf {X} \mathbf {X}^{T}] }_{\operatorname {R} _{\mathbf {X} \mathbf {X} } \text{ :autocorrelation matrix}} -\operatorname {E} [\mathbf {X} ]\operatorname {E} [\mathbf {X} ]^{\rm {T}} \\ \text{cross-covariance matrix} \quad&& \operatorname {K} _{\mathbf {X} \mathbf {Y} }&=\operatorname {cov} (\mathbf {X} ,\mathbf {Y} ) \\ \; && \; &{\stackrel {\mathrm {def} }{=}}\ \operatorname {E} [(\mathbf {X} -\mathbf {\mu _{X}} )(\mathbf {Y} -\mathbf {\mu _{Y}} )^{\rm {T}}] \\ \; && \; &= \underbrace{\operatorname {E}[\mathbf {X} \mathbf {Y}^{T}] }_{\operatorname {R} _{\mathbf {X} \mathbf {Y} } \text{ :crosscorrelation matrix}} -\operatorname {E} [\mathbf {X} ]\operatorname {E} [\mathbf {Y} ]^{\rm {T}} \\ \text{block matrices} \quad&& \mathbf {\mu } &={\begin{bmatrix}\mathbf {\mu _{X}} \\\mathbf {\mu _{Y}} \end{bmatrix}},\qquad \mathbf {\Sigma } ={\begin{bmatrix}\operatorname {K} _{\mathbf {XX} }&\operatorname {K} _{\mathbf {XY} }\\\operatorname {K} _{\mathbf {YX} }&\operatorname {K} _{\mathbf {YY} }\end{bmatrix}}: \text{joint-covariance matrix} \\ \end{aligned} }$$Correlation Matrix
$${\displaystyle \begin{aligned} \text{correlation matrix} \quad \operatorname {corr} (\mathbf {X} ) &={\big (}\operatorname {diag} (\operatorname {K} _{\mathbf {X} \mathbf {X} }){\big )}^{-{\frac {1}{2}}}\,\operatorname {K} _{\mathbf {X} \mathbf {X} }\,{\big (}\operatorname {diag} (\operatorname {K} _{\mathbf {X} \mathbf {X} }){\big )}^{-{\frac {1}{2}}} \\ \; &= {\begin{bmatrix}1&{\frac {\operatorname {E} [(X_{1}-\mu _{1})(X_{2}-\mu _{2})]}{\sigma (X_{1})\sigma (X_{2})}}&\cdots &{\frac {\operatorname {E} [(X_{1}-\mu _{1})(X_{n}-\mu _{n})]}{\sigma (X_{1})\sigma (X_{n})}}\\\\{\frac {\operatorname {E} [(X_{2}-\mu _{2})(X_{1}-\mu _{1})]}{\sigma (X_{2})\sigma (X_{1})}}&1&\cdots &{\frac {\operatorname {E} [(X_{2}-\mu _{2})(X_{n}-\mu _{n})]}{\sigma (X_{2})\sigma (X_{n})}}\\\\\vdots &\vdots &\ddots &\vdots \\\\{\frac {\operatorname {E} [(X_{n}-\mu _{n})(X_{1}-\mu _{1})]}{\sigma (X_{n})\sigma (X_{1})}}&{\frac {\operatorname {E} [(X_{n}-\mu _{n})(X_{2}-\mu _{2})]}{\sigma (X_{n})\sigma (X_{2})}}&\cdots &1\end{bmatrix}} \end{aligned} }$$Inverse Covariance Matrix
$${\displaystyle \begin{aligned} \operatorname {K} _{\mathbf {X} \mathbf {X} } &={\begin{bmatrix}\sigma _{x_{1}}&&&0\\&\sigma _{x_{2}}\\&&\ddots \\0&&&\sigma _{x_{n}}\end{bmatrix}}{\begin{bmatrix}1&\rho _{x_{1},x_{2}}&\cdots &\rho _{x_{1},x_{n}}\\\rho _{x_{2},x_{1}}&1&\cdots &\rho _{x_{2},x_{n}}\\\vdots &\vdots &\ddots &\vdots \\\rho _{x_{n},x_{1}}&\rho _{x_{n},x_{2}}&\cdots &1\\\end{bmatrix}}{\begin{bmatrix}\sigma _{x_{1}}&&&0\\&\sigma _{x_{2}}\\&&\ddots \\0&&&\sigma _{x_{n}}\end{bmatrix}} \\ \operatorname {K} _{\mathbf {X} \mathbf {X} }^{-1} &={\begin{bmatrix}{\frac {1}{\sigma _{x_{1}|x_{2}...}}}&&&0\\&{\frac {1}{\sigma _{x_{2}|x_{1},x_{3}...}}}\\&&\ddots \\0&&&{\frac {1}{\sigma _{x_{n}|x_{1}...x_{n-1}}}}\end{bmatrix}} \\ \; & \quad\; {\begin{bmatrix}1&-\rho _{x_{1},x_{2}\mid x_{3}...}&\cdots &-\rho _{x_{1},x_{n}\mid x_{2}...x_{n-1}}\\-\rho _{x_{2},x_{1}\mid x_{3}...}&1&\cdots &-\rho _{x_{2},x_{n}\mid x_{1},x_{3}...x_{n-1}}\\\vdots &\vdots &\ddots &\vdots \\-\rho _{x_{n},x_{1}\mid x_{2}...x_{n-1}}&-\rho _{x_{n},x_{2}\mid x_{1},x_{3}...x_{n-1}}&\cdots &1\\\end{bmatrix}} \\ \; & \quad\; {\begin{bmatrix}{\frac {1}{\sigma _{x_{1}|x_{2}...}}}&&&0\\&{\frac {1}{\sigma _{x_{2}|x_{1},x_{3}...}}}\\&&\ddots \\0&&&{\frac {1}{\sigma _{x_{n}|x_{1}...x_{n-1}}}}\end{bmatrix}} \\ \end{aligned} }$$Multi-variate Gaussian Distribution
$${\displaystyle \begin{aligned} \mathbf {X} \ &\sim \ {\mathcal {N}}(\mathbf {\mu } ,{\boldsymbol {\Sigma }})\quad \iff \quad {\text{there exist }}\mathbf {\mu } \in \mathbb {R} ^{k},{\boldsymbol {A}}\in \mathbb {R} ^{k\times \ell }{\text{ such that }}\mathbf {X} ={\boldsymbol {A}}\mathbf {Z} +\mathbf {\mu } {\text{ and }}\forall n=1,\ldots ,l:Z_{n}\sim \ {\mathcal {N}}(0,1),{\text{i.i.d.}} \\ \end{aligned} }$$ $${\displaystyle \text{non-degenerate probability density function for positive definite covariance matrix} }$$ $${\displaystyle \begin{aligned} f_{\mathbf {X} }(x_{1},\ldots ,x_{k})&={\frac {\exp \left(-{\frac {1}{2}}\left({\mathbf {x} }-{\boldsymbol {\mu }}\right)^{\mathrm {T} }{\boldsymbol {\Sigma }}^{-1}\left({\mathbf {x} }-{\boldsymbol {\mu }}\right)\right)}{\sqrt {(2\pi )^{k}|{\boldsymbol {\Sigma }}|}}} \\ \end{aligned} }$$ $${\displaystyle \begin{cases} \mathbf {X} &=(X_{1},\ldots ,X_{k})^{\mathrm {T} } \\ {\boldsymbol {\mu }} &=\operatorname {E} [\mathbf {X} ]=(\operatorname {E} [X_{1}],\operatorname {E} [X_{2}],\ldots ,\operatorname {E} [X_{k}])^{\textbf {T}} \\ \Sigma _{i,j}&=\operatorname {E} [(X_{i}-\mu _{i})(X_{j}-\mu _{j})]=\operatorname {Cov} [X_{i},X_{j}] \\ \end{cases} }$$$${\displaystyle \text{Conditional distribution}}$$ $${\displaystyle \mathbf{X}_{1} \mid (\mathbf{X}_{2}=\mathbf{a} ) \sim \mathcal{N} (\bar{\boldsymbol\mu}, {\overline {\boldsymbol {\Sigma }}}) \\ }$$ $${\displaystyle \begin{aligned} \bar{\boldsymbol\mu} &= \boldsymbol\mu_1 + \boldsymbol\Sigma_{12} \boldsymbol\Sigma_{22}^{-1} \left( \mathbf{a} - \boldsymbol\mu_2 \right),\quad \text{for}\; \mathbf{X}_{1}\; \text{conditional on}\; \mathbf{X}_{2} =\mathbf{a} \\ {\overline {\boldsymbol {\Sigma }}} &={\boldsymbol {\Sigma }}_{11}-{\boldsymbol {\Sigma }}_{12}{\boldsymbol {\Sigma }}_{22}^{-1}{\boldsymbol {\Sigma }}_{21} \\ \end{aligned} }$$ $${\displaystyle \begin{aligned} \mathbf{X}&= {\begin{bmatrix} {\mathbf{x}}_{1} \\ {\mathbf{x}}_{2} \\ \end{bmatrix}}, &&{\text{ with sizes }} \begin{bmatrix} q\times 1 \\ (N-q)\times 1 \end{bmatrix} \\ \boldsymbol\mu &= {\begin{bmatrix} \boldsymbol\mu_1 \\ \boldsymbol\mu_2 \\ \end{bmatrix}}, &&\text{ with sizes } \begin{bmatrix} q \times 1 \\ (N-q) \times 1 \end{bmatrix} \\ \boldsymbol{\Sigma}&= {\begin{bmatrix} \boldsymbol\Sigma_{11} & \boldsymbol\Sigma_{12} \\ \boldsymbol\Sigma_{21} & \boldsymbol\Sigma_{22} \end{bmatrix}}, &&\text{ with sizes }\begin{bmatrix} q \times q & q \times (N-q) \\ (N-q) \times q & (N-q) \times (N-q) \end{bmatrix} \\ \end{aligned} }$$
import numpy as np
import pandas as pd
import seaborn as sns
mean_vector = np.r_[0,0]
cov_matrix = np.c_[[3**2,0], [0,3**2]]
X = pd.DataFrame(np.random.multivariate_normal(mean_vector, cov_matrix, size=3000))
sns.pairplot(X)
X.mean(axis=0), X.std(axis=0, ddof=1) # X.cov(ddof=1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
mean_vector = np.r_[0,0]
x1_var = 10**2
x2_var = 5**2
covariance = np.sqrt(x1_var*x2_var)
cov_matrix = np.c_[[x1_var,covariance], [covariance,x2_var]]
X = pd.DataFrame(np.random.multivariate_normal(mean_vector, cov_matrix, size=3000))
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.scatter(X[0], X[1])
Sample covariance estimation
import numpy as np
import pandas as pd
mean_vector = [0,0,0]
cov_matrix = [[1,0,0], [0,1,0], [0,0,1]]
X = np.random.multivariate_normal(mean_vector, cov_matrix, size=3000)
Z = (X - X.mean(axis=0)) # centering
estimated_cov = Z.T@Z / (Z.shape[0] - 1)
estimated_cov
import numpy as np
from sklearn import covariance
X = np.random.multivariate_normal(mean=np.zeros((2,)), cov=np.eye(2), size=500)
cov = covariance.EmpiricalCovariance().fit(X)
cov = covariance.EllipticEnvelope().fit(X)
cov = covariance.GraphicalLasso().fit(X)
cov = covariance.GraphicalLassoCV().fit(X)
cov = covariance.LedoitWolf().fit(X)
cov = covariance.MinCovDet().fit(X)
cov = covariance.OAS().fit(X)
cov = covariance.ShrunkCovariance().fit(X)
cov.location_
cov.covariance_
Correlation
import numpy as np
X = np.random.normal(size=1000)
Y = X + np.random.normal(size=1000)
np.cov(X, Y)
np.corrcoef(X, Y)
import numpy as np
import pandas as pd
relation_uncertainties = list()
for std in range(1, 10):
X = np.random.normal(0, 1, size=10000)
Y = X + np.random.normal(0, std, size=10000)
covariance_matrix = np.cov(X,Y)
var_X = covariance_matrix[0,0]
var_Y = covariance_matrix[1,1]
cov_XY = covariance_matrix[0,1]
relation_uncertainty = pd.Series(
data=[cov_XY/np.sqrt(var_X*var_Y), cov_XY**2, var_X*var_Y, cov_XY, var_X, var_Y],
index=['$Cov[X,Y] / \sqrt{Var[X]Var[Y]}$', '$Cov[X,Y]^{2}$', '$Var[X] \cdot Var[Y]$', '$Cov[X,Y]$', '$Var[X]$', '$Var[Y]$'],
name=f'{std}'
)
relation_uncertainties.append(relation_uncertainty)
relation_uncertainties = pd.concat(relation_uncertainties, axis=1).T
relation_uncertainties.index.name = r'$\sigma_{noise}$'
relation_uncertainties
Chaotic correlation
import numpy as np
import pandas as pd
import seaborn as sns
#y = np.linspace(1, 100, 3000)
x = np.random.normal(0, 1, size=500)
data = pd.DataFrame(
data=np.c_[
np.sin(1/x),
1/np.sin(x),
x
]
)
display(data.corr())
data.plot()
sns.pairplot(data)
import numpy as np
import pandas as pd
import seaborn as sns
#y = np.linspace(1, 100, 500)
y = np.random.normal(0, 1, size=500)
data = pd.DataFrame(
data=np.c_[
1/np.arcsin(y), # inverse: y=np.sin(1/x),
np.arcsin(1/y), # inverse: y=1/np.sin(x),
y
]
)
display(data.corr())
data.plot()
sns.pairplot(data)
Linearization
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures, SplineTransformer, Binarizer, KBinsDiscretizer, OrdinalEncoder, LabelBinarizer, LabelEncoder, OneHotEncoder
from sklearn.preprocessing import PowerTransformer, QuantileTransformer, StandardScaler, Normalizer, RobustScaler, MinMaxScaler, MaxAbsScaler
#y = np.linspace(1, 100, 500)
y = np.random.normal(0, 1, size=1000)
data = pd.DataFrame(
data=np.c_[
PolynomialFeatures(degree=2, interaction_only=False, include_bias=True, order=['C', 'F'][0]).fit_transform(y[:,np.newaxis]).squeeze(),
SplineTransformer(degree=3, n_knots=5, knots=['uniform', 'quantile'][0], extrapolation=['error', 'constant', 'linear', 'continue', 'periodic'][1], include_bias=True, order=['C', 'F'][0]).fit_transform(y[:,np.newaxis]).squeeze(),
QuantileTransformer(output_distribution='uniform').fit_transform(y[:,np.newaxis]).squeeze(),
QuantileTransformer(output_distribution='normal').fit_transform(y[:,np.newaxis]).squeeze(),
y
]
)
display(data.corr())
data.plot()
sns.pairplot(data)
Visualization
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.datasets import make_regression
X, y, coef = make_regression(n_samples=300, n_features=5, n_informative=2, n_targets=1, bias=100, effective_rank=None, tail_strength=0.5, noise=50.0, shuffle=True, coef=True, random_state=np.random.randint(1000))
df = pd.DataFrame(data=np.c_[X, y], columns=list(map(lambda x: 'X'+str(x), range(1,6))) + ['Y'])
with pd.option_context('display.float_format', '{:0.2f}'.format):
display(df.cov().style.background_gradient().set_properties(**{'font-size': '10pt'})) # np.cov(df.T.values)
display(df.corr().style.background_gradient().set_properties(**{'font-size': '10pt'})) # np.corrcoef(df.T.values)
with warnings.catch_warnings():
warnings.simplefilter(action='ignore', category=FutureWarning)
sns.pairplot(df)
Propagation of uncertainty
Cauchy–Schwarz inequality
$${\displaystyle \begin{aligned} \left|\langle \mathbf {u} ,\mathbf {v} \rangle \right|^{2} &\leq \langle \mathbf {u} ,\mathbf {u} \rangle \cdot \langle \mathbf {v} ,\mathbf {v} \rangle \\ \left(\sum _{i=1}^{n}u_{i} \bar{v}_{i}\right)^{2} &\leq \left(\sum _{i=1}^{n}u_{i} \bar{u}_{i}\right)\left(\sum _{i=1}^{n}v_{i}\bar{v}_{i}\right) \\ \left|\int _{\mathbb {R} ^{n}}f(x){\overline {g(x)}}\,dx\right|^{2} &\leq \int _{\mathbb {R} ^{n}}|f(x)|^{2}\,dx\int _{\mathbb {R} ^{n}}|g(x)|^{2}\,dx \\ \operatorname {Cov} (X,Y)^{2} &\leq \operatorname {Var} (X) \operatorname {Var} (Y) \\ \end{aligned} }$$
Correlation
$${\displaystyle \begin{aligned} \operatorname {Cov} (X,Y)^{2} &\leq \operatorname {Var} (X) \operatorname {Var} (Y) \\ \frac{\operatorname {Cov} (X,Y)^{2}} {\operatorname {Var} (X) \operatorname {Var} (Y)} &\leq 1 \\ -1 \leq \frac{\operatorname {Cov} (X,Y)} {\sqrt{\operatorname{Var}(X)} \sqrt{\operatorname {Var} (Y)}} &\leq 1, \qquad \qquad \rho_{X,Y} \equiv \frac{\operatorname {Cov} (X,Y)} {\sqrt{\operatorname{Var}(X)} \sqrt{\operatorname {Var} (Y)}} = \frac{\sigma_{X,Y}} { \sigma _{X}\sigma _{Y}} \\ \end{aligned} }$$ $${\displaystyle \begin{aligned} \therefore \quad -1 \leq \rho_{X,Y} &\leq 1 \\ - \sigma_{X}\sigma_{Y} \le \sigma_{X,Y} & \le \sigma_{X}\sigma_{Y}, \qquad \sigma_{X,Y} = \rho_{X,Y} \sigma_{X}\sigma_{Y} \\ \end{aligned} }$$$${\displaystyle \begin{aligned} \rho _{X,Y} &= \frac{\sigma_{X,Y}} { \sigma _{X}\sigma _{Y}} \\ \; &={\operatorname {E} [(X-\mu _{X})(Y-\mu _{Y})] \over \sigma _{X}\sigma _{Y}},\quad {\text{if}}\ \sigma _{X}\sigma _{Y}>0 \\ \; &= {\operatorname {E} (XY)-\operatorname {E} (X)\operatorname {E} (Y) \over {\sqrt {\operatorname {E} (X^{2})-\operatorname {E} (X)^{2}}}\cdot {\sqrt {\operatorname {E} (Y^{2})-\operatorname {E} (Y)^{2}}}} \\ \end{aligned} }$$ $${\displaystyle r_{xy}\quad \equiv \quad {\frac {\sum \limits _{i=1}^{n}(x_{i}-{\bar {x}})(y_{i}-{\bar {y}})}{(n-1)s_{x}s_{y}}}={\frac {\sum \limits _{i=1}^{n}(x_{i}-{\bar {x}})(y_{i}-{\bar {y}})}{\sqrt {\sum \limits _{i=1}^{n}(x_{i}-{\bar {x}})^{2}\sum \limits _{i=1}^{n}(y_{i}-{\bar {y}})^{2}}}}}$$
Endogeneity and Instrumental Variable
#
Covariance Analysis Applications
Feature Engineering
$${\displaystyle \begin{align} \text{Mean Centering} \qquad &: \operatorname {cov} (X-{\bar{X}},Y-{\bar{Y}}) &&= \; \operatorname {cov} (X,Y) \\ \text{Scaling} \qquad &: \operatorname {cov} (C_{x}X, C_{y}Y) &&= \;C_{x}C_{y}\operatorname {cov} (X, Y) \\ \text{Standardizing} \qquad &: \operatorname {cov} \left( \frac{X-{\bar{X}}}{\sigma_{x}}, \frac{Y-{\bar{Y}}}{\sigma_{y}} \right) && = \; \frac{\operatorname {cov} (X, Y)}{\sigma_{x}\sigma_{y}} \\ \end{align} }$$Mean Centering
#
Scaling
#
Standardizing
#
Eigen decomposition
import numpy as np
import pandas as pd
from scipy import stats
size = 100
data = pd.DataFrame(np.c_[
# stats.bernoulli.rvs(p=2/3, size=size),
# stats.binom.rvs(n=5, p=2/3, size=size),
# stats.poisson.rvs(mu=5, size=size),
# stats.multinomial.rvs(n=5, p=[.3, .3, .3], size=size),
stats.multivariate_normal.rvs(mean=[0,0,0], cov=[[9,0,0], [0,4,0], [0,0,1]], size=(size,))])
Z = (data - data.mean(axis=0)).values
COV_Z = Z.T@Z / (Z.shape[0] - 1)
eigen_values, eigen_vectors = np.linalg.eig(COV_Z)
# eigen_values[i], eigen_vectors[:, i]
Q = eigen_vectors
L = np.diag(eigen_values)
R = np.linalg.inv(Q)
Q@L@R # ~ COV_Z
Singular value decomposition (SVD)
import numpy as np
import pandas as pd
from scipy import stats
size = 100
data = pd.DataFrame(np.c_[
# stats.bernoulli.rvs(p=2/3, size=size),
# stats.binom.rvs(n=5, p=2/3, size=size),
# stats.poisson.rvs(mu=5, size=size),
# stats.multinomial.rvs(n=5, p=[.3, .3, .3], size=size),
stats.multivariate_normal.rvs(mean=[0,0,0], cov=[[9,0,0], [0,4,0], [0,0,1]], size=(size,))])
Z = (data - data.mean()).values
COV_Z = Z.T@Z / (Z.shape[0] - 1)
# s: scaled eigen values
# s**2 / (Z.shape[0] - 1): eigen values
# vh: eigen vectors; vh[i, :]
u, s, vh = np.linalg.svd(Z, full_matrices=False)
u@np.diag(s)@vh # ~ Z
np.diag(s**2).sum() # ~ np.diag(Z.T@Z).sum()
Z@vh.T # ~ PC scores
(vh**2).sum(axis=0) # eigen norm
# orthogonality
vh[0]@vh[1]
vh[1]@vh[2]
vh[2]@vh[0]
Principle Component Analysis (PCA)
$${\displaystyle \begin{aligned} \mathbf{T} = \mathbf{X} \mathbf{W} \end{aligned} }$$ $${\displaystyle \begin{aligned} \mathbf {Q} &\propto \mathbf {X} ^{\mathsf {T}}\mathbf {X} =\mathbf {W} \mathbf {\Lambda } \mathbf {W} ^{\mathsf {T}} \\ \mathbf {W} ^{\mathsf {T}}\mathbf {Q} \mathbf {W} &\propto \mathbf {W} ^{\mathsf {T}}\mathbf {W} \,\mathbf {\Lambda } \,\mathbf {W} ^{\mathsf {T}}\mathbf {W} =\mathbf {\Lambda } \\ \end{aligned} }$$import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from statsmodels.multivariate.pca import PCA
size = 100
data = pd.DataFrame(np.c_[
# stats.bernoulli.rvs(p=2/3, size=size),
# stats.binom.rvs(n=5, p=2/3, size=size),
# stats.poisson.rvs(mu=5, size=size),
# stats.multinomial.rvs(n=5, p=[.3, .3, .3, .3], size=size),
stats.multivariate_normal.rvs(mean=[0,0,0,0], cov=[[16,0,0,0], [0,9,0,0], [0,0,4,0], [0,0,0,1]], size=(size,))])
Z = (data - data.mean()).values
COV_Z = Z.T@Z / (Z.shape[0] - 1)
pca = PCA(data=Z, ncomp=4, standardize=False, normalize=False, method='svd')
eigen_4d = np.round(pca.eigenvecs, 0)
loadings_4d = pca.loadings
pca = PCA(data=Z, ncomp=3, standardize=False, normalize=False, method='svd')
eigen_3d = np.round(pca.eigenvecs, 0)
loadings_3d = pca.loadings
print(eigen_4d) # sns.heatmap(pd.DataFrame(data=np.round(loadings_4d, 0), index=data.columns, columns=map(lambda x: 'Factor'+str(x), range(4))), cmap="Blues", annot=True, fmt='.2f')
print(eigen_3d) # sns.heatmap(pd.DataFrame(data=np.round(loadings_3d, 0), index=data.columns, columns=map(lambda x: 'Factor'+str(x), range(3))), cmap="Blues", annot=True, fmt='.2f')
new_Z = pca.factors # Z@pca.eigenvecs, similarity between data and factors when standardize=False, normalize=False
print(pca.eigenvals) # eigen values: pca.eigenvals/(Z.shape[0] - 1)
print(pca.eigenvecs) # eigen vector: pca.eigenvecs[:, i]
print(pca.loadings) # loadings: pca.loadings[:, i]
(pca.eigenvecs**2).sum(axis=0) # eigen norm
(pca.eigenvecs**2).sum(axis=1) # standardized
(new_Z**2).sum(axis=1).sum() # normalized
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from statsmodels.multivariate.pca import PCA
size = 100
data = pd.DataFrame(np.c_[
# stats.bernoulli.rvs(p=2/3, size=size),
# stats.binom.rvs(n=5, p=2/3, size=size),
stats.poisson.rvs(mu=5, size=size),
stats.multinomial.rvs(n=5, p=[.3, .3, .3, .3], size=size),
# stats.multivariate_normal.rvs(mean=[0,0,0,0], cov=[[16,0,0,0], [0,9,0,0], [0,0,4,0], [0,0,0,1]], size=(size,))
])
Z = (data - data.mean()).values
COV_Z = Z.T@Z / (Z.shape[0] - 1)
n_factors = 5
pca = PCA(data=Z, ncomp=n_factors, standardize=False, normalize=False, method='svd')
new_Z = pca.factors # Z @ pca.eigenvecs # similarity between data and factors when standardize=False, normalize=False
display(new_Z[:10])
sns.heatmap(pd.DataFrame(data=np.round(pca.loadings, 1), index=data.columns, columns=map(lambda x: 'Factor'+str(x), range(n_factors))), cmap="Blues", annot=True, fmt='.2f')
Factor Analysis
$${\displaystyle \begin{aligned} x_{i,m}-\mu _{i} &=l_{i,1}f_{1,m}+\dots +l_{i,k}f_{k,m}+\varepsilon _{i,m} \\ \mathbf{X} - \mathbf{M} &= \mathbf{L}\mathbf{F} + \boldsymbol{\varepsilon}, \qquad \qquad \qquad \operatorname{E}[ \mathbf{F} ] = 0 \; \And \; \operatorname{cov}[ \mathbf{F} ] = \mathbf{I} \\ \operatorname{cov} [ \mathbf{X}-\mathbf{M} ] &= \operatorname{cov}[ \mathbf{L} \mathbf{F} + \boldsymbol{\varepsilon} ], \qquad \qquad \boldsymbol{\Sigma} \equiv \operatorname{cov} [ \mathbf{X}-\mathbf{M} ] \\ \boldsymbol{\Sigma} &= \mathbf{L} \operatorname{cov}[ \mathbf{F} ] \mathbf{L}^{T} + \operatorname{cov}[ \boldsymbol{\varepsilon} ], \qquad \qquad \boldsymbol{\Psi} \equiv \operatorname{cov}[ \boldsymbol{\varepsilon} ] \\ \; &= \mathbf{L}\mathbf{L}^{T}+ \boldsymbol{\Psi} \\ \; &= \mathbf{L^{\prime}}\mathbf{L^{\prime}}^{T}+ \boldsymbol{\Psi}, \qquad \mathbf{L^{\prime}} \equiv \mathbf{L}\mathbf{Q} \; \And \; \mathbf{F^{\prime}} \equiv \mathbf{Q}^{T}\mathbf{F} , \quad \mathbf{Q} \text{ is orthogonal matrix} \\ \end{aligned} }$$import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
from factor_analyzer import FactorAnalyzer
size = 100
data = pd.DataFrame(np.c_[
stats.bernoulli.rvs(p=2/3, size=size),
stats.binom.rvs(n=5, p=2/3, size=size),
stats.poisson.rvs(mu=5, size=size),
stats.multinomial.rvs(n=5, p=[.3, .3, .3, .3], size=size),
stats.multivariate_normal.rvs(mean=[0,0,0,0], cov=[[16,0,0,0], [0,9,0,0], [0,0,4,0], [0,0,0,1]], size=(size,))
])
# dependency
data.loc[data.index[data[0] == 1].to_series().sample(frac=.8).index] = 10
data.loc[data.index[data[1] == 4].to_series().sample(frac=.8).index] = 10
#data.loc[data.index[data[0] == 1].to_series().sample(frac=.8).index, [2,3,4]] = 10
Z = (data - data.mean()).values
COV_Z = Z.T@Z / (Z.shape[0] - 1)
n_factors = 5
fa = FactorAnalyzer(n_factors=n_factors, method="ml", rotation="varimax")
fa.fit(Z)
sns.heatmap(pd.DataFrame(data=fa.loadings_, index=data.columns, columns=map(lambda x: 'Factor'+str(x), range(n_factors))), cmap="Blues", annot=True, fmt='.2f')
Reference
- https://en.wikipedia.org/wiki/Multivariate_normal_distribution
- https://en.wikipedia.org/wiki/Definite_matrix
- https://en.wikipedia.org/wiki/Covariance_matrix
- https://en.wikipedia.org/wiki/Estimation_of_covariance_matrices
- https://en.wikipedia.org/wiki/WKB_approximation
- https://en.wikipedia.org/wiki/Taylor_expansions_for_the_moments_of_functions_of_random_variables
- https://en.wikipedia.org/wiki/Propagation_of_uncertainty
- https://en.wikipedia.org/wiki/Uncertainty
- https://en.wikipedia.org/wiki/Uncertainty_quantification
- https://en.wikipedia.org/wiki/Quantification_of_margins_and_uncertainties
- https://en.wikipedia.org/wiki/Measurement_uncertainty
- https://en.wikipedia.org/wiki/Sensitivity_analysis
- https://en.wikipedia.org/wiki/Variance
- https://en.wikipedia.org/wiki/Law_of_total_variance
- https://en.wikipedia.org/wiki/Law_of_total_covariance
- https://en.wikipedia.org/wiki/Law_of_total_cumulance
- https://en.wikipedia.org/wiki/Delta_method
- https://en.wikipedia.org/wiki/Algebra_of_random_variables
- https://en.wikipedia.org/wiki/Relationships_among_probability_distributions
- https://en.wikipedia.org/wiki/Compound_probability_distribution