From f1f1bde2fbe6965710ae2935cd4657fb25d54f0d Mon Sep 17 00:00:00 2001 From: iliya <iliya.saroukhanian@etu.hesge.ch> Date: Sun, 5 May 2024 19:40:38 +0200 Subject: [PATCH] fix: momentum and nesterov working --- gd.py | 55 +++++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/gd.py b/gd.py index ae836bf..1f1b4fb 100644 --- a/gd.py +++ b/gd.py @@ -32,8 +32,9 @@ def base_gd(f: Function, init_pt: list[float], lr: float) -> \ f_call = callable_func(f) - # while np.linalg.norm(grad) > 1e-6: while iter < 1e4: + if np.linalg.norm(grad) < 1e-5: + break grad = np.array([partialx(x, y), partialy(x, y)]) df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)] @@ -59,20 +60,20 @@ def momentum_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\ f_call = callable_func(f) - prev_step = np.array([0, 0]) - curr_step = np.array([0, 0]) + step = np.array([0, 0]) - # while np.linalg.norm(grad) > 1e-6: while iter < 1e4: + if np.linalg.norm(grad) < 1e-5: + break + grad = np.array([partialx(x, y), partialy(x, y)]) df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)] - curr_step = -momentum * prev_step - lr * grad + step = momentum * step + lr * grad - x += curr_step[0] - y += curr_step[1] + x -= step[0] + y -= step[1] - prev_step = curr_step iter += 1 return df @@ -90,24 +91,22 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\ f_call = callable_func(f) - prev_step = np.array([0, 0]) - curr_step = np.array([0, 0]) + step = np.array([0, 0]) - # while np.linalg.norm(grad) > 1e-6: while iter < 1e4: grad = np.array([partialx(x, y), partialy(x, y)]) - df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)] + if np.linalg.norm(grad) < 1e-5: + break - momentum_prev = -momentum * prev_step - offset_grad = np.array([partialx(x - momentum_prev[0], y - momentum_prev[1]), - partialy(x - momentum_prev[0], y - momentum_prev[1])]) + grad_with_prev_step = np.array([partialx(x - step[0], y - step[1]), + partialy(x - step[0], y - step[1])]) + df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)] - curr_step = momentum_prev - lr * offset_grad + step = momentum * step + lr * grad_with_prev_step - x += curr_step[0] - y += curr_step[1] + x -= step[0] + y -= step[1] - prev_step = curr_step iter += 1 return df @@ -116,7 +115,7 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\ if __name__ == "__main__": x, y = symbols('x y') - # f: Function = x**2 + 5 * y**2 + f: Function = x**2 + 6 * y**2 # f: Function = 1 - exp(-10 * x**2 - y**2) # f: Function = x**2 * y - 2 * x * y**3 + 3 * x * y + 4 @@ -131,21 +130,21 @@ if __name__ == "__main__": # f: Function = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2 # Ackley(x, y) - f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \ - exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20 + # f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \ + # exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20 f_call = callable_func(f) - LR = 1e-1 - MOMENTUM = 1e-1 + LR = 1e-2 + MOMENTUM = 0.9 - plot_range = (10, 10) + plot_range = (30, 30) # init_pt = [9, -8] - # init_pt = [1, 1] + init_pt = [20, 30] - init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1), - np.random.randint(-plot_range[1], plot_range[1] + 1)]) + # init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1), + # np.random.randint(-plot_range[1], plot_range[1] + 1)]) base = base_gd(f, init_pt, LR) momentum = momentum_gd(f, init_pt, LR, MOMENTUM) -- GitLab