fix: momentum and nesterov working

f1f1bde2 · iliya.saroukha · 185c8f14 · f1f1bde2
Verified Commit f1f1bde2 authored 1 year ago by iliya.saroukha
--- a/gd.py
+++ b/gd.py
@@ -32,8 +32,9 @@ def base_gd(f: Function, init_pt: list[float], lr: float) -> \
    f_call = callable_func(f)
-    # while np.linalg.norm(grad) > 1e-6:
    while iter < 1e4:
+        if np.linalg.norm(grad) < 1e-5:
+            break
        grad = np.array([partialx(x, y), partialy(x, y)])
        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
@@ -59,20 +60,20 @@ def momentum_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
    f_call = callable_func(f)
-    prev_step = np.array([0, 0])
+    step = np.array([0, 0])
-    curr_step = np.array([0, 0])
-    # while np.linalg.norm(grad) > 1e-6:
    while iter < 1e4:
+        if np.linalg.norm(grad) < 1e-5:
+            break
        grad = np.array([partialx(x, y), partialy(x, y)])
        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
-        curr_step = -momentum * prev_step - lr * grad
+        step = momentum * step + lr * grad
-        x += curr_step[0]
+        x -= step[0]
-        y += curr_step[1]
+        y -= step[1]
-        prev_step = curr_step
        iter += 1
    return df
@@ -90,24 +91,22 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
    f_call = callable_func(f)
-    prev_step = np.array([0, 0])
+    step = np.array([0, 0])
-    curr_step = np.array([0, 0])
-    # while np.linalg.norm(grad) > 1e-6:
    while iter < 1e4:
        grad = np.array([partialx(x, y), partialy(x, y)])
-        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
+        if np.linalg.norm(grad) < 1e-5:
+            break
-        momentum_prev = -momentum * prev_step
+        grad_with_prev_step = np.array([partialx(x - step[0], y - step[1]),
-        offset_grad = np.array([partialx(x - momentum_prev[0], y - momentum_prev[1]),
+                                        partialy(x - step[0], y - step[1])])
-                               partialy(x - momentum_prev[0], y - momentum_prev[1])])
+        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
-        curr_step = momentum_prev - lr * offset_grad
+        step = momentum * step + lr * grad_with_prev_step
-        x += curr_step[0]
+        x -= step[0]
-        y += curr_step[1]
+        y -= step[1]
-        prev_step = curr_step
        iter += 1
    return df
@@ -116,7 +115,7 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
 if __name__ == "__main__":
    x, y = symbols('x y')
-    # f: Function = x**2 + 5 * y**2
+    f: Function = x**2 + 6 * y**2
    # f: Function = 1 - exp(-10 * x**2 - y**2)
    # f: Function = x**2 * y - 2 * x * y**3 + 3 * x * y + 4
@@ -131,21 +130,21 @@ if __name__ == "__main__":
    # f: Function = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2
    # Ackley(x, y)
-    f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
+    # f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
-        exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
+    #     exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
    f_call = callable_func(f)
-    LR = 1e-1
+    LR = 1e-2
-    MOMENTUM = 1e-1
+    MOMENTUM = 0.9
-    plot_range = (10, 10)
+    plot_range = (30, 30)
    # init_pt = [9, -8]
-    # init_pt = [1, 1]
+    init_pt = [20, 30]
-    init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
+    # init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
-                        np.random.randint(-plot_range[1], plot_range[1] + 1)])
+    #                     np.random.randint(-plot_range[1], plot_range[1] + 1)])
    base = base_gd(f, init_pt, LR)
    momentum = momentum_gd(f, init_pt, LR, MOMENTUM)