n = 200
k = 2
x = rng.uniform(size=(n, k))
beta = [10, -2]
y = x.dot(beta) + rng.standard_cauchy(size=n)


from sklearn import linear_model
line_reg = linear_model.LinearRegression(fit_intercept=True)
line_reg.fit(x, y)
line_reg.coef_

array([ 0.04686807, -9.46323921])


y_hat = line_reg.predict(x)

fig, ax = plt.subplots()
ax.scatter(y, y_hat)
ax.set_xlim(-20, 20)
ax.set_xlabel("observed value")
ax.set_ylabel("predicted value")
ax.set_title(f"MSE = {np.median((y - y_hat)**2):.2f}")
ax.axline((0,0), slope=1);


from scipy.optimize import minimize

def logl(u):
    beta = u[:-1]
    sigma = u[-1]
    yhat = x.dot(beta)
    return np.sum(np.log(1 + ((y - yhat) / sigma)**2)) + n * np.log(np.pi * sigma)

res = minimize(logl, [1, 1, 1])
est_beta = res['x'][:-1]
est_sigma = res['x'][-1]


new_yhat = x.dot(est_beta)

fig, ax = plt.subplots()
ax.scatter(y, new_yhat)
ax.set_xlim(-20, 20)
ax.set_xlabel("observed value")
ax.set_ylabel("predicted value")
ax.set_title(f"MSE = {np.median((y - new_yhat)**2):.2f}")
ax.axline((0,0), slope=1);


fig, ax = plt.subplots()
ax.scatter(y, new_yhat)

<matplotlib.collections.PathCollection at 0x7f1bfe04a460>


from scipy.stats import norm, cauchy
fig, ax = plt.subplots()
xvals = np.linspace(-5, 5, 101)
ax.plot(xvals, norm.pdf(xvals), label='Normal')
ax.plot(xvals, cauchy.pdf(xvals), label='Cauchy')
ax.legend();


from scipy.stats import norm, cauchy
fig, ax = plt.subplots()
xvals = np.linspace(-5, 5, 101)
ax.plot(xvals, -1 * norm.logpdf(xvals), label='Normal')
ax.plot(xvals, -1 * cauchy.logpdf(xvals), label='Cauchy')
ax.legend();

Robust models¶

A second, robust linear model¶

The Cauchy¶

What did we do there?¶