From 2e555b5b79df100a3731eaa14b8f4cadc7b81c19 Mon Sep 17 00:00:00 2001
From: "iliya.saroukha" <iliya.saroukhanian@etu.hesge.ch>
Date: Fri, 18 Apr 2025 17:27:24 +0200
Subject: [PATCH] fix: cleaned up impl

---
 dyn_huffman.py | 194 +++++++++++++++++++++++++++++--------------------
 1 file changed, 117 insertions(+), 77 deletions(-)

diff --git a/dyn_huffman.py b/dyn_huffman.py
index e7e737b..84e383a 100644
--- a/dyn_huffman.py
+++ b/dyn_huffman.py
@@ -7,16 +7,43 @@ import argparse
 import string
 
 LIST_ALPHABET = list(string.ascii_lowercase) + list(string.digits)
-# LIST_ALPHABET = list(string.ascii_lowercase)
 DICT_ALPHABET = {item: index + 1 for index, item in enumerate(LIST_ALPHABET)}
 
+
+def compute_vitter_params(m: int) -> tuple[int, int]:
+    e = math.floor(np.log2(m))
+    r = m - (2 ** e)
+    return (e, r)
+
+
+def compute_k(char: str) -> int:
+    return DICT_ALPHABET[char]
+
+
+def compute_code(char: str):
+    e, r = compute_vitter_params(len(LIST_ALPHABET))
+    k_char = compute_k(char)
+
+    if k_char <= 2*r:
+        bin_val = k_char - 1
+        n_bits = e + 1
+        return bin(bin_val)[2:].zfill(n_bits)
+    else:
+        bin_val = k_char - r - 1
+        return bin(bin_val)[2:].zfill(e)
+
+
+FIXED_CODES = {compute_code(item): item for index,
+               item in enumerate(LIST_ALPHABET)}
+
 swap_count = 0
 
+
 @dataclass
 class Node:
     weight: int = 0
     nyt_code: str | None = None
-    value: str | None = "NYT"
+    value: str = "NYT"
     left: Node | None = None
     right: Node | None = None
 
@@ -25,45 +52,27 @@ def swap(tree: Node):
     global swap_count
     if tree.left and tree.right:
         if tree.left.weight > tree.right.weight:
-            # print(f'({tree.left.weight}, {tree.right.weight})')
             swap_count += 1
             tmp = tree.right
             tree.right = tree.left
             tree.left = tmp
 
 
-# def compute_prefix(tree: Node, char: str) -> str:
-#     prefix = ''
-#
-#     if tree:
-#         if tree.value == char:
-#             return ''
-#
-#         if tree.left:
-#             prefix = '0' + compute_prefix(tree.left, char)
-#
-#         if tree.right:
-#             prefix = '1' + compute_prefix(tree.right, char)
-#
-#     return prefix
-
 def compute_prefix(tree: Node, char: str) -> str:
     if tree.value == char:
         return ''
-    
+
     if tree.left:
         left_result = compute_prefix(tree.left, char)
         if left_result is not None:
             return '0' + left_result
-    
+
     if tree.right:
         right_result = compute_prefix(tree.right, char)
         if right_result is not None:
             return '1' + right_result
 
-    return None  # char not found in this subtree
-
-
+    return None
 
 
 def find_char(tree: Node, char: str) -> Node | None:
@@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None:
                 return n
 
 
-
-def insert_char(tree: Node, char: str, nyt_code = ''):
+def insert_char(tree: Node, char: str, nyt_code=''):
     if tree.left:
         if tree.left.value == char:
             tree.left.weight += 1
             tree.weight += 1
             return
 
-    # Leaf case
-    if tree.left == None and tree.right == None:
+    if tree.right:
+        if tree.right.value == char:
+            tree.right.weight += 1
+            tree.weight += 1
+            return
+
+    if tree.left is None and tree.right is None:
         tree.left = Node()
         tree.right = Node(value=char, weight=1, nyt_code=nyt_code)
         tree.weight += 1
         return
-    
+
     if tree.left:
         if tree.left.value == 'NYT':
             insert_char(tree.left, char, nyt_code + '0')
@@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''):
     swap(tree)
 
 
-def compute_vitter_params(m: int) -> tuple[int, int]:
-    e = math.floor(np.log2(m))
-    r = m - (2 ** e)
-    return (e, r)
-
-
-def compute_k(char: str) -> int:
-    return DICT_ALPHABET[char]
-
-
-def compute_code(char: str):
+def compute_char(fixed: str) -> str:
     e, r = compute_vitter_params(len(LIST_ALPHABET))
-    k_char = compute_k(char)
 
-    if k_char <= 2*r:
-        bin_val = k_char - 1
-        n_bits = e + 1
-        return bin(bin_val)[2:].zfill(n_bits)
-    else:
-        bin_val = k_char - r - 1
-        return bin(bin_val)[2:].zfill(e)
+    return int(fixed, 2)
 
 
-def encode(string: str) -> str:
+def encode(input: str) -> str:
     tree = Node()
     encoded: list[str] = []
     seen: set[str] = set()
 
-    for c in string:
+    for c in input:
         insert_char(tree, c)
         prefix = find_char(tree, c)
 
@@ -145,45 +141,89 @@ def encode(string: str) -> str:
             encoded.append(prefix.nyt_code)
             encoded.append(compute_code(c))
         else:
-            print(f'{c} already seen')
             encoded.append(compute_prefix(tree, c))
-
+        pprint.pp(tree)
 
     pprint.pp(tree)
+
     print(f'Nombre de swaps: {swap_count}')
 
     return ' '.join(encoded)[1:]
 
 
-def main(string: str) -> None:
-    msg = encode(string)
-    print(f'Code final: {msg}')
-
-# def main(string: str) -> None:
-#     tree = Node()
-#     fixed_code = []
-#
-#     for c in string:
-#         fixed_code.append(compute_code(c))
-#
-#     pprint.pp(fixed_code)
-#
-#     for c in string:
-#         pprint.pp(tree)
-#         print(f'\n====== Inserting \'{c}\' ======\n')
-#         insert_char(tree, c)
-#
-#     pprint.pp(tree)
-#
-#     n = find_char(tree, 'o')
-#
-#     pprint.pp(n)
+def decode(encoded: str) -> (str, str):
+    bits = encoded.replace(' ', '')
+    tree = Node()
+    result = ''
+    seen = set()
+    i = 0
+
+    message_with_nyt: list[str] = []
+    message_without_nyt: list[str] = []
+
+    while i < len(bits):
+        node = tree
+
+        while node.left or node.right:
+            if bits[i] == '0':
+                node = node.left
+            else:
+                node = node.right
+            i += 1
+
+        if node.value == 'NYT':
+            e, r = compute_vitter_params(len(LIST_ALPHABET))
+            prefix_bits = bits[i:i+e]
+            val = int(prefix_bits, 2)
+
+            if val < r:
+                code_bits = bits[i:i+e+1]
+                k = int(code_bits, 2) + 1
+                i += e + 1
+            else:
+                k = val + r + 1
+                i += e
+
+            char = LIST_ALPHABET[k - 1]
+            result += char
+            seen.add(char)
+            insert_char(tree, char)
+
+            message_with_nyt.append('NYT')
+            message_with_nyt.append(char)
+            message_without_nyt.append(char)
 
+        else:
+            char = node.value
+            result += char
+            insert_char(tree, char)
 
-if __name__ == "__main__":
+            message_with_nyt.append(char)
+            message_without_nyt.append(char)
+        pprint.pp(tree)
+
+    pprint.pp(tree)
+
+    return ' '.join(message_with_nyt[1:]), ''.join(message_without_nyt)
+
+
+def main() -> None:
     parser = argparse.ArgumentParser(prog='dyn_huffman.py')
-    parser.add_argument('string')
+    parser.add_argument('input')
 
     args = parser.parse_args()
 
-    main(args.string)
+    print('=============== Encoding ===============')
+    encoded_input = encode(args.input)
+    print(f'Code final: {encoded_input}')
+
+    print()
+
+    print('=============== Decoding ===============')
+    with_nyt, without_nyt = decode(encoded_input)
+    print(f'Message initial avec NYT : {with_nyt}')
+    print(f'Message initial sans NYT : {without_nyt}')
+
+
+if __name__ == "__main__":
+    main()
-- 
GitLab