From 2e555b5b79df100a3731eaa14b8f4cadc7b81c19 Mon Sep 17 00:00:00 2001 From: "iliya.saroukha" <iliya.saroukhanian@etu.hesge.ch> Date: Fri, 18 Apr 2025 17:27:24 +0200 Subject: [PATCH] fix: cleaned up impl --- dyn_huffman.py | 194 +++++++++++++++++++++++++++++-------------------- 1 file changed, 117 insertions(+), 77 deletions(-) diff --git a/dyn_huffman.py b/dyn_huffman.py index e7e737b..84e383a 100644 --- a/dyn_huffman.py +++ b/dyn_huffman.py @@ -7,16 +7,43 @@ import argparse import string LIST_ALPHABET = list(string.ascii_lowercase) + list(string.digits) -# LIST_ALPHABET = list(string.ascii_lowercase) DICT_ALPHABET = {item: index + 1 for index, item in enumerate(LIST_ALPHABET)} + +def compute_vitter_params(m: int) -> tuple[int, int]: + e = math.floor(np.log2(m)) + r = m - (2 ** e) + return (e, r) + + +def compute_k(char: str) -> int: + return DICT_ALPHABET[char] + + +def compute_code(char: str): + e, r = compute_vitter_params(len(LIST_ALPHABET)) + k_char = compute_k(char) + + if k_char <= 2*r: + bin_val = k_char - 1 + n_bits = e + 1 + return bin(bin_val)[2:].zfill(n_bits) + else: + bin_val = k_char - r - 1 + return bin(bin_val)[2:].zfill(e) + + +FIXED_CODES = {compute_code(item): item for index, + item in enumerate(LIST_ALPHABET)} + swap_count = 0 + @dataclass class Node: weight: int = 0 nyt_code: str | None = None - value: str | None = "NYT" + value: str = "NYT" left: Node | None = None right: Node | None = None @@ -25,45 +52,27 @@ def swap(tree: Node): global swap_count if tree.left and tree.right: if tree.left.weight > tree.right.weight: - # print(f'({tree.left.weight}, {tree.right.weight})') swap_count += 1 tmp = tree.right tree.right = tree.left tree.left = tmp -# def compute_prefix(tree: Node, char: str) -> str: -# prefix = '' -# -# if tree: -# if tree.value == char: -# return '' -# -# if tree.left: -# prefix = '0' + compute_prefix(tree.left, char) -# -# if tree.right: -# prefix = '1' + compute_prefix(tree.right, char) -# -# return prefix - def compute_prefix(tree: Node, char: str) -> str: if tree.value == char: return '' - + if tree.left: left_result = compute_prefix(tree.left, char) if left_result is not None: return '0' + left_result - + if tree.right: right_result = compute_prefix(tree.right, char) if right_result is not None: return '1' + right_result - return None # char not found in this subtree - - + return None def find_char(tree: Node, char: str) -> Node | None: @@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None: return n - -def insert_char(tree: Node, char: str, nyt_code = ''): +def insert_char(tree: Node, char: str, nyt_code=''): if tree.left: if tree.left.value == char: tree.left.weight += 1 tree.weight += 1 return - # Leaf case - if tree.left == None and tree.right == None: + if tree.right: + if tree.right.value == char: + tree.right.weight += 1 + tree.weight += 1 + return + + if tree.left is None and tree.right is None: tree.left = Node() tree.right = Node(value=char, weight=1, nyt_code=nyt_code) tree.weight += 1 return - + if tree.left: if tree.left.value == 'NYT': insert_char(tree.left, char, nyt_code + '0') @@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''): swap(tree) -def compute_vitter_params(m: int) -> tuple[int, int]: - e = math.floor(np.log2(m)) - r = m - (2 ** e) - return (e, r) - - -def compute_k(char: str) -> int: - return DICT_ALPHABET[char] - - -def compute_code(char: str): +def compute_char(fixed: str) -> str: e, r = compute_vitter_params(len(LIST_ALPHABET)) - k_char = compute_k(char) - if k_char <= 2*r: - bin_val = k_char - 1 - n_bits = e + 1 - return bin(bin_val)[2:].zfill(n_bits) - else: - bin_val = k_char - r - 1 - return bin(bin_val)[2:].zfill(e) + return int(fixed, 2) -def encode(string: str) -> str: +def encode(input: str) -> str: tree = Node() encoded: list[str] = [] seen: set[str] = set() - for c in string: + for c in input: insert_char(tree, c) prefix = find_char(tree, c) @@ -145,45 +141,89 @@ def encode(string: str) -> str: encoded.append(prefix.nyt_code) encoded.append(compute_code(c)) else: - print(f'{c} already seen') encoded.append(compute_prefix(tree, c)) - + pprint.pp(tree) pprint.pp(tree) + print(f'Nombre de swaps: {swap_count}') return ' '.join(encoded)[1:] -def main(string: str) -> None: - msg = encode(string) - print(f'Code final: {msg}') - -# def main(string: str) -> None: -# tree = Node() -# fixed_code = [] -# -# for c in string: -# fixed_code.append(compute_code(c)) -# -# pprint.pp(fixed_code) -# -# for c in string: -# pprint.pp(tree) -# print(f'\n====== Inserting \'{c}\' ======\n') -# insert_char(tree, c) -# -# pprint.pp(tree) -# -# n = find_char(tree, 'o') -# -# pprint.pp(n) +def decode(encoded: str) -> (str, str): + bits = encoded.replace(' ', '') + tree = Node() + result = '' + seen = set() + i = 0 + + message_with_nyt: list[str] = [] + message_without_nyt: list[str] = [] + + while i < len(bits): + node = tree + + while node.left or node.right: + if bits[i] == '0': + node = node.left + else: + node = node.right + i += 1 + + if node.value == 'NYT': + e, r = compute_vitter_params(len(LIST_ALPHABET)) + prefix_bits = bits[i:i+e] + val = int(prefix_bits, 2) + + if val < r: + code_bits = bits[i:i+e+1] + k = int(code_bits, 2) + 1 + i += e + 1 + else: + k = val + r + 1 + i += e + + char = LIST_ALPHABET[k - 1] + result += char + seen.add(char) + insert_char(tree, char) + + message_with_nyt.append('NYT') + message_with_nyt.append(char) + message_without_nyt.append(char) + else: + char = node.value + result += char + insert_char(tree, char) -if __name__ == "__main__": + message_with_nyt.append(char) + message_without_nyt.append(char) + pprint.pp(tree) + + pprint.pp(tree) + + return ' '.join(message_with_nyt[1:]), ''.join(message_without_nyt) + + +def main() -> None: parser = argparse.ArgumentParser(prog='dyn_huffman.py') - parser.add_argument('string') + parser.add_argument('input') args = parser.parse_args() - main(args.string) + print('=============== Encoding ===============') + encoded_input = encode(args.input) + print(f'Code final: {encoded_input}') + + print() + + print('=============== Decoding ===============') + with_nyt, without_nyt = decode(encoded_input) + print(f'Message initial avec NYT : {with_nyt}') + print(f'Message initial sans NYT : {without_nyt}') + + +if __name__ == "__main__": + main() -- GitLab