Homepage › Solution manuals › Yaser Abu-Mostafa › Learning from Data › Exercise 9.18
Exercise 9.18
Answers
def calc_Eout_ex_918(Qf, train_xs, train_ys, test_xs, test_ys, lambda_t = 0): #Fit with models of degree Qf and compute the E_{out} Z = data.polynomial_transform(Qf, train_xs) w = np.matmul(np.linalg.inv(np.matmul(Z.transpose(), Z) + lambda_t), np.matmul(Z.transpose(), train_ys)) #print(f"w.shape: {w.shape}") Ein = data.calc_Eout(w, train_xs, train_ys) Eout = data.calc_Eout(w, test_xs, test_ys) return Ein, Eout def calc_permutation_estimate(d_eff, train_ys, Ein): #Compute the Permutation Estimate for Linear Regression, equation (9.6) N = train_ys.shape[0] sigma_hat2 = N * np.var(train_ys)/ (N-1) est = 2 * sigma_hat2 * d_eff/N est += Ein return est def calc_fpe_estimate(N, d_eff, Ein): #Final prediction error, section 9.5.4 p = N/d_eff est = (p+1)*Ein/(p-1) return est def calc_vc_estimate(N, d_eff, Ein): p = N/d_eff r1 = np.sqrt(p)*Ein r2 = np.sqrt(p) - np.sqrt(1 + np.log(p)+ np.log(N)/2/d_eff) est = r1/r2 return est def compute_leave_one_out_estimate(H, y_hat, y): N = y_hat.shape[0] est = 0 for n in np.arange(N): yhat_n = y_hat[n] yn = y[n] Hnn = H[n,n] r = (yhat_n - yn)/(1-Hnn) est += r**2 est = est / N return est def run_one_experiment_ex_918(N, test_N, max_target_degree, max_hypo_degree, lambda_t): sigma_square = data.generate_random_numbers01(1, 1) Qf = np.random.choice(max_target_degree+1, 1) normalized_aqs = data.generate_target_coefficients(Qf) train_xs, train_ys = data.generate_data_set(N, normalized_aqs, sigma_square) test_xs, test_ys = data.generate_data_set(test_N, normalized_aqs, sigma_square) orders = np.arange(0,max_hypo_degree+1) Eins, Eouts = [], [] Est_vcs, Est_loocvs, Est_perms, Est_fpes = [], [], [], [] for order in orders: Ein, Eout = calc_Eout_ex_918(order, train_xs, train_ys, test_xs, test_ys) Z = data.polynomial_transform(order, train_xs) H = lm.compute_H(Z, lambda_t) #full H, Need use transformed Z to compute y_hat = lm.compute_y_hat(H, train_ys) eff_dof = lm.calc_effective_dof(H) #effective degree of freedom #print(f"{eff_dof}") if eff_dof <= 1.0e-18: eff_dof = 1.0e-18 est_vc = calc_vc_estimate(N, eff_dof, Ein) est_loocv = compute_leave_one_out_estimate(H, y_hat, train_ys) est_perm = calc_permutation_estimate(eff_dof, train_ys, Ein) est_fpe = calc_fpe_estimate(N, eff_dof, Ein) Eins.append(Ein) Eouts.append(Eout) Est_vcs.append(est_vc) Est_loocvs.append(est_loocv) Est_perms.append(est_perm) Est_fpes.append(est_fpe) return Eouts, Est_vcs, Est_loocvs, Est_perms, Est_fpes, Eins
N = 100 test_N = 100 max_target_degree = 30 max_hypo_degree = 20 lambda_t = 0 max_exps = 10000 Eins, Eouts = [], [] Est_vcs, Est_loocvs, Est_perms, Est_fpes = [], [], [], [] best_outs, best_ins, best_vcs, best_cvs, best_perms, best_fpes = [], [], [], [], [], [] reg_ins, reg_vcs, reg_cvs, reg_perms, reg_fpes = [], [], [], [], [] for e in np.arange(max_exps): if e %1000 == 1: print(f"--- Current iteration : {e}") Eouts1, Est_vcs1, Est_loocvs1, Est_perms1, Est_fpes1, Eins1 = run_one_experiment_ex_918(N, test_N, max_target_degree, max_hypo_degree, lambda_t) best_out = np.argmin(np.array(Eouts1)) optimal = np.min(np.array(Eouts1)) best_in = np.argmin(np.array(Eins1)) best_vc = np.argmin(np.array(Est_vcs1)) best_cv = np.argmin(np.array(Est_loocvs1)) best_perm = np.argmin(np.array(Est_perms1)) best_fpe = np.argmin(np.array(Est_fpes1)) best_outs.append(best_out) best_ins.append(best_in) best_vcs.append(best_vc) best_cvs.append(best_cv) best_perms.append(best_perm) best_fpes.append(best_fpe) regret_out = 0 regret_in = (Eouts1[best_in] - optimal)/optimal regret_vc = (Eouts1[best_vc] - optimal)/optimal regret_cv = (Eouts1[best_cv] - optimal)/optimal regret_perm = (Eouts1[best_perm] - optimal)/optimal regret_fpe = (Eouts1[best_fpe] - optimal)/optimal reg_ins.append(regret_in) reg_vcs.append(regret_vc) reg_cvs.append(regret_cv) reg_perms.append(regret_perm) reg_fpes.append(regret_fpe) Eouts.append(Eouts1) Est_vcs.append(Est_vcs1) Est_loocvs.append(Est_loocvs1) Est_perms.append(Est_perms1) Est_fpes.append(Est_fpes1) Eins.append(Eins1) Eouts = np.mean(np.array(Eouts), axis=0) Est_vcs = np.mean(np.array(Est_vcs), axis=0) Est_loocvs = np.mean(np.array(Est_loocvs), axis=0) Est_perms = np.mean(np.array(Est_perms), axis=0) Est_fpes = np.mean(np.array(Est_fpes), axis=0) b_out = np.mean(np.array(best_outs)) b_in = np.mean(np.array(best_ins)) b_vc = np.mean(np.array(best_vcs)) b_cv = np.mean(np.array(best_cvs)) b_perm = np.mean(np.array(best_perms)) b_fpe = np.mean(np.array(best_fpes)) reg_in = np.mean(np.array(reg_ins)) reg_vc = np.mean(np.array(reg_vcs)) reg_cv = np.mean(np.array(reg_cvs)) reg_perm = np.mean(np.array(reg_perms)) reg_fpe = np.mean(np.array(reg_fpes))
--- Current iteration : 1 --- Current iteration : 1001 --- Current iteration : 2001 --- Current iteration : 3001 --- Current iteration : 4001 --- Current iteration : 5001 --- Current iteration : 6001 --- Current iteration : 7001 --- Current iteration : 8001 --- Current iteration : 9001
orders = np.arange(max_hypo_degree+1) yub = 1.5 Est_loocvs_f = np.clip(Est_loocvs, 0, yub) Eouts_f = np.clip(Eouts, 0, yub) xxs = [orders, orders, orders, orders, orders] yys = [Eouts_f, Est_vcs, Est_loocvs_f, Est_perms, Est_fpes] myplot.plt_plot(xxs, yys, 'plot', ['k', 'g', 'r', 'b', 'y'], ['o', 'x', '.', ',', 's'], ['Out-of-Sample Error', 'VC Penalty', 'LOO-CV', 'Permutation Estimate', 'FPE'], title = 'Exercise 9.18 (a) E_out/Validation Estimates vs. Model Order', yscale = None, ylb = 0, yub = 1.2, xlb = None, xub = None, xlabel = 'Model Order', ylabel = 'E_out/Validation Estimates', legends = ['']*6, legendx = None, legendy = None, marker_sizes=[15]*5)
#### Exercise 9.18 (d) Order Selection names = ['Eout', 'Ein', 'Ecv', 'Eperm', 'Efpe', 'Evc'] regs = [0, reg_in, reg_cv, reg_perm, reg_fpe, reg_vc] orders = [b_out, b_in, b_cv, b_perm, b_fpe, b_vc] df = pd.DataFrame({'name':names, 'Regret': regs, 'Avg. Order': orders}) df
#### Exercise 9.18 (b) def run_one_experiment_918_b(N, test_N, max_target_degree, order, lambdas): sigma_square = data.generate_random_numbers01(1, 1) Qf = np.random.choice(max_target_degree+1, 1) normalized_aqs = data.generate_target_coefficients(Qf) train_xs, train_ys = data.generate_data_set(N, normalized_aqs, sigma_square) test_xs, test_ys = data.generate_data_set(test_N, normalized_aqs, sigma_square) #print(f"sigma_squre = {sigma_square}, Qf={Qf}, order = {order}") Eins, Eouts = [], [] Est_vcs, Est_loocvs, Est_perms, Est_fpes = [], [], [], [] for lambda_t in lambdas: Ein, Eout = calc_Eout_ex_918(order, train_xs, train_ys, test_xs, test_ys, lambda_t) Z = data.polynomial_transform(order, train_xs) H = lm.compute_H(Z, lambda_t) #full H, Need use transformed Z to compute y_hat = lm.compute_y_hat(H, train_ys) #print(f"lambda = {lambda_t}, Ein = {Ein}, Eout = {Eout}") eff_dof = lm.calc_effective_dof(H) #effective degree of freedom #print(f"effective dof: {eff_dof}") if eff_dof <= 1.0e-18: eff_dof = 1.0e-18 est_vc = calc_vc_estimate(N, eff_dof, Ein) est_loocv = compute_leave_one_out_estimate(H, y_hat, train_ys) est_perm = calc_permutation_estimate(eff_dof, train_ys, Ein) est_fpe = calc_fpe_estimate(N, eff_dof, Ein) Eins.append(Ein) Eouts.append(Eout) Est_vcs.append(est_vc) Est_loocvs.append(est_loocv) Est_perms.append(est_perm) Est_fpes.append(est_fpe) return Eouts, Est_vcs, Est_loocvs, Est_perms, Est_fpes, Eins
N = 100 test_N = 100 max_target_degree = 10 hypo_degree = 5 max_exps = 10000 Eins, Eouts = [], [] Est_vcs, Est_loocvs, Est_perms, Est_fpes = [], [], [], [] best_outs, best_ins, best_vcs, best_cvs, best_perms, best_fpes = [], [], [], [], [], [] reg_ins, reg_vcs, reg_cvs, reg_perms, reg_fpes = [], [], [], [], [] lambdas = np.hstack([np.arange(0, 0.01, 0.001), np.arange(0.01, 0.1, 0.01), np.arange(0.1, 1, 0.1), np.arange(1, 15, 1), np.arange(10, 310, 10)]) for e in np.arange(max_exps): if e %1000 == 1: print(f"--- Current iteration : {e}") Eouts1, Est_vcs1, Est_loocvs1, Est_perms1, Est_fpes1, Eins1 = run_one_experiment_918_b(N, test_N, max_target_degree, hypo_degree, lambdas) best_out = np.argmin(np.array(Eouts1)) optimal = np.min(np.array(Eouts1)) best_in = np.argmin(np.array(Eins1)) best_vc = np.argmin(np.array(Est_vcs1)) best_cv = np.argmin(np.array(Est_loocvs1)) best_perm = np.argmin(np.array(Est_perms1)) best_fpe = np.argmin(np.array(Est_fpes1)) best_outs.append(lambdas[best_out]) best_ins.append(lambdas[best_in]) best_vcs.append(lambdas[best_vc]) best_cvs.append(lambdas[best_cv]) best_perms.append(lambdas[best_perm]) best_fpes.append(lambdas[best_fpe]) regret_out = 0 regret_in = (Eouts1[best_in] - optimal)/optimal regret_vc = (Eouts1[best_vc] - optimal)/optimal regret_cv = (Eouts1[best_cv] - optimal)/optimal regret_perm = (Eouts1[best_perm] - optimal)/optimal regret_fpe = (Eouts1[best_fpe] - optimal)/optimal reg_ins.append(regret_in) reg_vcs.append(regret_vc) reg_cvs.append(regret_cv) reg_perms.append(regret_perm) reg_fpes.append(regret_fpe) Eouts.append(Eouts1) Est_vcs.append(Est_vcs1) Est_loocvs.append(Est_loocvs1) Est_perms.append(Est_perms1) Est_fpes.append(Est_fpes1) Eouts = np.mean(np.array(Eouts), axis=0) Est_vcs = np.mean(np.array(Est_vcs), axis=0) Est_loocvs = np.mean(np.array(Est_loocvs), axis=0) Est_perms = np.mean(np.array(Est_perms), axis=0) Est_fpes = np.mean(np.array(Est_fpes), axis=0) b_out = np.mean(np.array(best_outs)) b_in = np.mean(np.array(best_ins)) b_vc = np.mean(np.array(best_vcs)) b_cv = np.mean(np.array(best_cvs)) b_perm = np.mean(np.array(best_perms)) b_fpe = np.mean(np.array(best_fpes)) reg_in = np.mean(np.array(reg_ins)) reg_vc = np.mean(np.array(reg_vcs)) reg_cv = np.mean(np.array(reg_cvs)) reg_perm = np.mean(np.array(reg_perms)) reg_fpe = np.mean(np.array(reg_fpes))
--- Current iteration : 1 --- Current iteration : 1001 --- Current iteration : 2001 --- Current iteration : 3001 --- Current iteration : 4001 --- Current iteration : 5001 --- Current iteration : 6001 --- Current iteration : 7001 --- Current iteration : 8001 --- Current iteration : 9001
yub = 1.5 Eouts_f = np.clip(Eouts, 0, yub) Est_loocvs_f = np.clip(Est_loocvs, 0, yub) xxs = [lambdas, lambdas, lambdas, lambdas, lambdas] yys = [Eouts_f, Est_vcs, Est_loocvs_f, Est_perms, Est_fpes] myplot.plt_plot(xxs, yys, 'plot', ['k', 'g', 'r', 'b', 'y'], ['o', 'x', '.', ',', 's'], ['Out-of-Sample Error', 'VC Penalty', 'LOO-CV', 'Permutation Estimate', 'FPE'], title = 'Exercise 9.18 (b) E_out/Validation Estimates vs. Regularization', yscale = None, ylb = 0, yub = yub, xlb = None, xub = None, xlabel = 'Regularization Parameter lambda', ylabel = 'E_out/Validation Estimates', legends = ['']*6, legendx = None, legendy = None, marker_sizes=[1]*5)
#### Exercise 9.18 (d) Lambda Selection names = ['Eout', 'Ein', 'Ecv', 'Eperm', 'Efpe', 'Evc'] regs = [0, reg_in, reg_cv, reg_perm, reg_fpe, reg_vc] avg_lambdas = [b_out, b_in, b_cv, b_perm, b_fpe, b_vc] df = pd.DataFrame({'name':names, 'Regret': regs, 'Avg. Lambda': avg_lambdas}) df
(b)
Somehow I can’t get the same graph as in Figure 9.8(b), not sure where I did wrong. :(
(c)
The VC estimate is weaker but sufficient.