plt.scatter(df['t'], df['y'])
plt.xlabel("time (t)"); plt.ylabel("response (y)");


plt.scatter(df['t'], df['y'])
plt.plot(df['t'], df['y'])
plt.xlabel("time (t)"); plt.ylabel("response (y)");


ey = 10 * (1.2 * (x < np.pi) + 2 * np.logical_and(x > np.pi, x < 8) + (10 - x) * (x > 8)) - 12
plt.plot(df['t'], ey)
plt.scatter(df['t'], df['y'])
plt.xlabel("time (t)"); plt.ylabel("response (y)");


plt.plot(df['t'], np.convolve(ey, np.ones(11)/11)[5:-5])
plt.scatter(df['t'], df['y'])
plt.xlabel("time (t)"); plt.ylabel("response (y)");


s = np.hstack([
    (10/k) * patsy.dmatrix(f"bs(t, df=k, degree=3, include_intercept=True) - 1", df)
    for k in (4, 8, 12, 16, 24, 32, 48, 64)
])
 
plt.plot(df['t'], s);


plt.plot(df['t'], s.dot(rng.normal(scale=(np.arange(s.shape[1])[:,None] > 0), size=(s.shape[1], 4))));


sfit, _, _, _ = np.linalg.lstsq(s, y, rcond=None)

plt.scatter(df['t'], df['y']);
plt.plot(df['t'], s.dot(sfit));


from sklearn.linear_model import Ridge

plt.scatter(df['t'], df['y'])
plt.plot(df['t'], s.dot(sfit), label='unpenalized')
for a in [0.1, 2, 50, 1000]:
    rfit = Ridge(alpha=a).fit(s, y)
    rpred = s.dot(rfit.coef_) + rfit.intercept_
    plt.plot(df['t'], rpred, label=f'ridge (alpha={a})');
plt.legend();


def do_xval(alpha, test):
    rfit = Ridge(alpha=alpha).fit(s[~test,:], y[~test])
    rpred = s.dot(rfit.coef_)
    return np.sqrt(np.mean((y[test] - rpred[test])**2))

def xval(alpha, folds):
    return np.mean([do_xval(alpha, folds==j) for j in np.unique(folds)])

folds = rng.choice(10, size=df.shape[0], replace=True)
rng.shuffle(folds)

avals = np.linspace(0.1, 0.5, 31)
mse = np.array([xval(a, folds) for a in avals])
a_min = avals[np.argmin(mse)]

plt.plot(avals, mse);
plt.scatter(a_min, mse[np.argmin(mse)])
plt.xlabel("alpha"); plt.ylabel("root mean squared testing error");


plt.scatter(df['t'], df['y'])
plt.plot(df['t'], s.dot(sfit), label='unpenalized')
rfit = Ridge(alpha=a_min).fit(s, y)
rpred = s.dot(rfit.coef_) + rfit.intercept_
plt.plot(df['t'], rpred, label=f'ridge (alpha={a_min:.4})');
plt.legend();

When there's too many right answers¶

A motivating problem: interpolation¶

A quick introduction to splines¶

Too many knobs¶

Regularization, again¶

What happened there?¶