Skip to content
Snippets Groups Projects
Verified Commit f1f1bde2 authored by iliya.saroukha's avatar iliya.saroukha :first_quarter_moon:
Browse files

fix: momentum and nesterov working

parent 185c8f14
Branches
No related tags found
No related merge requests found
......@@ -32,8 +32,9 @@ def base_gd(f: Function, init_pt: list[float], lr: float) -> \
f_call = callable_func(f)
# while np.linalg.norm(grad) > 1e-6:
while iter < 1e4:
if np.linalg.norm(grad) < 1e-5:
break
grad = np.array([partialx(x, y), partialy(x, y)])
df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
......@@ -59,20 +60,20 @@ def momentum_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
f_call = callable_func(f)
prev_step = np.array([0, 0])
curr_step = np.array([0, 0])
step = np.array([0, 0])
# while np.linalg.norm(grad) > 1e-6:
while iter < 1e4:
if np.linalg.norm(grad) < 1e-5:
break
grad = np.array([partialx(x, y), partialy(x, y)])
df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
curr_step = -momentum * prev_step - lr * grad
step = momentum * step + lr * grad
x += curr_step[0]
y += curr_step[1]
x -= step[0]
y -= step[1]
prev_step = curr_step
iter += 1
return df
......@@ -90,24 +91,22 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
f_call = callable_func(f)
prev_step = np.array([0, 0])
curr_step = np.array([0, 0])
step = np.array([0, 0])
# while np.linalg.norm(grad) > 1e-6:
while iter < 1e4:
grad = np.array([partialx(x, y), partialy(x, y)])
df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
if np.linalg.norm(grad) < 1e-5:
break
momentum_prev = -momentum * prev_step
offset_grad = np.array([partialx(x - momentum_prev[0], y - momentum_prev[1]),
partialy(x - momentum_prev[0], y - momentum_prev[1])])
grad_with_prev_step = np.array([partialx(x - step[0], y - step[1]),
partialy(x - step[0], y - step[1])])
df.loc[iter] = [x, y, f_call(x, y), np.linalg.norm(grad)]
curr_step = momentum_prev - lr * offset_grad
step = momentum * step + lr * grad_with_prev_step
x += curr_step[0]
y += curr_step[1]
x -= step[0]
y -= step[1]
prev_step = curr_step
iter += 1
return df
......@@ -116,7 +115,7 @@ def nesterov_gd(f: Function, init_pt: list[float], lr: float, momentum: float)\
if __name__ == "__main__":
x, y = symbols('x y')
# f: Function = x**2 + 5 * y**2
f: Function = x**2 + 6 * y**2
# f: Function = 1 - exp(-10 * x**2 - y**2)
# f: Function = x**2 * y - 2 * x * y**3 + 3 * x * y + 4
......@@ -131,21 +130,21 @@ if __name__ == "__main__":
# f: Function = (x + 2 * y - 7)**2 + (2 * x + y - 5)**2
# Ackley(x, y)
f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
# f: Function = -20.0 * exp(-0.2 * sqrt(0.5 * (x**2 + y**2))) - \
# exp(0.5 * (cos(2 * pi * x) + cos(2 * pi * y))) + exp(1) + 20
f_call = callable_func(f)
LR = 1e-1
MOMENTUM = 1e-1
LR = 1e-2
MOMENTUM = 0.9
plot_range = (10, 10)
plot_range = (30, 30)
# init_pt = [9, -8]
# init_pt = [1, 1]
init_pt = [20, 30]
init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
np.random.randint(-plot_range[1], plot_range[1] + 1)])
# init_pt = np.array([np.random.randint(-plot_range[0], plot_range[0] + 1),
# np.random.randint(-plot_range[1], plot_range[1] + 1)])
base = base_gd(f, init_pt, LR)
momentum = momentum_gd(f, init_pt, LR, MOMENTUM)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment