From f1f1bde2fbe6965710ae2935cd4657fb25d54f0d Mon Sep 17 00:00:00 2001
From: iliya <iliya.saroukhanian@etu.hesge.ch>
Date: Sun, 5 May 2024 19:40:38 +0200
Subject: [PATCH] fix: momentum and nesterov working

---
 gd.py | 55 +++++++++++++++++++++++++++----------------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/gd.py b/gd.py
index ae836bf..1f1b4fb 100644
--- a/gd.py
+++ b/gd.py
@@ -32,8 +32,9 @@ def base_gd(f: Function, init_pt: list[float], lr: float) -> \
 
     f_call = callable_func(f)
 
-    # while np.linalg.norm(grad) > 1e-6:
     while iter < 1e4:
+        if np.linalg.norm(grad) < 1e-5:
+            break
         grad = np.array([partialx(x, y), partialy(x, y)])
         df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
 
@@ -59,20 +60,20 @@ def momentum_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
 
     f_call = callable_func(f)
 
-    prev_step = np.array([0, 0])
-    curr_step = np.array([0, 0])
+    step = np.array([0, 0])
 
-    # while np.linalg.norm(grad) > 1e-6:
     while iter < 1e4:
+        if np.linalg.norm(grad) < 1e-5:
+            break
+
         grad = np.array([partialx(x, y), partialy(x, y)])
         df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
 
-        curr_step = -momentum * prev_step - lr * grad
+        step = momentum * step + lr * grad
 
-        x += curr_step[0]
-        y += curr_step[1]
+        x -= step[0]
+        y -= step[1]
 
-        prev_step = curr_step
         iter += 1
 
     return df
@@ -90,24 +91,22 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
 
     f_call = callable_func(f)
 
-    prev_step = np.array([0, 0])
-    curr_step = np.array([0, 0])
+    step = np.array([0, 0])
 
-    # while np.linalg.norm(grad) > 1e-6:
     while iter < 1e4:
         grad = np.array([partialx(x, y), partialy(x, y)])
-        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
+        if np.linalg.norm(grad) < 1e-5:
+            break
 
-        momentum_prev = -momentum * prev_step
-        offset_grad = np.array([partialx(x - momentum_prev[0], y - momentum_prev[1]),
-                               partialy(x - momentum_prev[0], y - momentum_prev[1])])
+        grad_with_prev_step = np.array([partialx(x - step[0], y - step[1]),
+                                        partialy(x - step[0], y - step[1])])
+        df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
 
-        curr_step = momentum_prev - lr * offset_grad
+        step = momentum * step + lr * grad_with_prev_step
 
-        x += curr_step[0]
-        y += curr_step[1]
+        x -= step[0]
+        y -= step[1]
 
-        prev_step = curr_step
         iter += 1
 
     return df
@@ -116,7 +115,7 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
 if __name__ == "__main__":
     x, y = symbols('x y')
 
-    # f: Function = x**2 + 5 * y**2
+    f: Function = x**2 + 6 * y**2
     # f: Function = 1 - exp(-10 * x**2 - y**2)
     # f: Function = x**2 * y - 2 * x * y**3 + 3 * x * y + 4
 
@@ -131,21 +130,21 @@ if __name__ == "__main__":
     # f: Function = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2
 
     # Ackley(x, y)
-    f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
-        exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
+    # f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
+    #     exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
 
     f_call = callable_func(f)
 
-    LR = 1e-1
-    MOMENTUM = 1e-1
+    LR = 1e-2
+    MOMENTUM = 0.9
 
-    plot_range = (10, 10)
+    plot_range = (30, 30)
 
     # init_pt = [9, -8]
-    # init_pt = [1, 1]
+    init_pt = [20, 30]
 
-    init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
-                        np.random.randint(-plot_range[1], plot_range[1] + 1)])
+    # init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
+    #                     np.random.randint(-plot_range[1], plot_range[1] + 1)])
 
     base = base_gd(f, init_pt, LR)
     momentum = momentum_gd(f, init_pt, LR, MOMENTUM)
-- 
GitLab