Skip to content
Snippets Groups Projects
Verified Commit 2e555b5b authored by iliya.saroukha's avatar iliya.saroukha :first_quarter_moon:
Browse files

fix: cleaned up impl

parent fcad2681
No related branches found
No related tags found
No related merge requests found
...@@ -7,16 +7,43 @@ import argparse ...@@ -7,16 +7,43 @@ import argparse
import string import string
LIST_ALPHABET = list(string.ascii_lowercase) + list(string.digits) LIST_ALPHABET = list(string.ascii_lowercase) + list(string.digits)
# LIST_ALPHABET = list(string.ascii_lowercase)
DICT_ALPHABET = {item: index + 1 for index, item in enumerate(LIST_ALPHABET)} DICT_ALPHABET = {item: index + 1 for index, item in enumerate(LIST_ALPHABET)}
def compute_vitter_params(m: int) -> tuple[int, int]:
e = math.floor(np.log2(m))
r = m - (2 ** e)
return (e, r)
def compute_k(char: str) -> int:
return DICT_ALPHABET[char]
def compute_code(char: str):
e, r = compute_vitter_params(len(LIST_ALPHABET))
k_char = compute_k(char)
if k_char <= 2*r:
bin_val = k_char - 1
n_bits = e + 1
return bin(bin_val)[2:].zfill(n_bits)
else:
bin_val = k_char - r - 1
return bin(bin_val)[2:].zfill(e)
FIXED_CODES = {compute_code(item): item for index,
item in enumerate(LIST_ALPHABET)}
swap_count = 0 swap_count = 0
@dataclass @dataclass
class Node: class Node:
weight: int = 0 weight: int = 0
nyt_code: str | None = None nyt_code: str | None = None
value: str | None = "NYT" value: str = "NYT"
left: Node | None = None left: Node | None = None
right: Node | None = None right: Node | None = None
...@@ -25,45 +52,27 @@ def swap(tree: Node): ...@@ -25,45 +52,27 @@ def swap(tree: Node):
global swap_count global swap_count
if tree.left and tree.right: if tree.left and tree.right:
if tree.left.weight > tree.right.weight: if tree.left.weight > tree.right.weight:
# print(f'({tree.left.weight}, {tree.right.weight})')
swap_count += 1 swap_count += 1
tmp = tree.right tmp = tree.right
tree.right = tree.left tree.right = tree.left
tree.left = tmp tree.left = tmp
# def compute_prefix(tree: Node, char: str) -> str:
# prefix = ''
#
# if tree:
# if tree.value == char:
# return ''
#
# if tree.left:
# prefix = '0' + compute_prefix(tree.left, char)
#
# if tree.right:
# prefix = '1' + compute_prefix(tree.right, char)
#
# return prefix
def compute_prefix(tree: Node, char: str) -> str: def compute_prefix(tree: Node, char: str) -> str:
if tree.value == char: if tree.value == char:
return '' return ''
if tree.left: if tree.left:
left_result = compute_prefix(tree.left, char) left_result = compute_prefix(tree.left, char)
if left_result is not None: if left_result is not None:
return '0' + left_result return '0' + left_result
if tree.right: if tree.right:
right_result = compute_prefix(tree.right, char) right_result = compute_prefix(tree.right, char)
if right_result is not None: if right_result is not None:
return '1' + right_result return '1' + right_result
return None # char not found in this subtree return None
def find_char(tree: Node, char: str) -> Node | None: def find_char(tree: Node, char: str) -> Node | None:
...@@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None: ...@@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None:
return n return n
def insert_char(tree: Node, char: str, nyt_code=''):
def insert_char(tree: Node, char: str, nyt_code = ''):
if tree.left: if tree.left:
if tree.left.value == char: if tree.left.value == char:
tree.left.weight += 1 tree.left.weight += 1
tree.weight += 1 tree.weight += 1
return return
# Leaf case if tree.right:
if tree.left == None and tree.right == None: if tree.right.value == char:
tree.right.weight += 1
tree.weight += 1
return
if tree.left is None and tree.right is None:
tree.left = Node() tree.left = Node()
tree.right = Node(value=char, weight=1, nyt_code=nyt_code) tree.right = Node(value=char, weight=1, nyt_code=nyt_code)
tree.weight += 1 tree.weight += 1
return return
if tree.left: if tree.left:
if tree.left.value == 'NYT': if tree.left.value == 'NYT':
insert_char(tree.left, char, nyt_code + '0') insert_char(tree.left, char, nyt_code + '0')
...@@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''): ...@@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''):
swap(tree) swap(tree)
def compute_vitter_params(m: int) -> tuple[int, int]: def compute_char(fixed: str) -> str:
e = math.floor(np.log2(m))
r = m - (2 ** e)
return (e, r)
def compute_k(char: str) -> int:
return DICT_ALPHABET[char]
def compute_code(char: str):
e, r = compute_vitter_params(len(LIST_ALPHABET)) e, r = compute_vitter_params(len(LIST_ALPHABET))
k_char = compute_k(char)
if k_char <= 2*r: return int(fixed, 2)
bin_val = k_char - 1
n_bits = e + 1
return bin(bin_val)[2:].zfill(n_bits)
else:
bin_val = k_char - r - 1
return bin(bin_val)[2:].zfill(e)
def encode(string: str) -> str: def encode(input: str) -> str:
tree = Node() tree = Node()
encoded: list[str] = [] encoded: list[str] = []
seen: set[str] = set() seen: set[str] = set()
for c in string: for c in input:
insert_char(tree, c) insert_char(tree, c)
prefix = find_char(tree, c) prefix = find_char(tree, c)
...@@ -145,45 +141,89 @@ def encode(string: str) -> str: ...@@ -145,45 +141,89 @@ def encode(string: str) -> str:
encoded.append(prefix.nyt_code) encoded.append(prefix.nyt_code)
encoded.append(compute_code(c)) encoded.append(compute_code(c))
else: else:
print(f'{c} already seen')
encoded.append(compute_prefix(tree, c)) encoded.append(compute_prefix(tree, c))
pprint.pp(tree)
pprint.pp(tree) pprint.pp(tree)
print(f'Nombre de swaps: {swap_count}') print(f'Nombre de swaps: {swap_count}')
return ' '.join(encoded)[1:] return ' '.join(encoded)[1:]
def main(string: str) -> None: def decode(encoded: str) -> (str, str):
msg = encode(string) bits = encoded.replace(' ', '')
print(f'Code final: {msg}') tree = Node()
result = ''
# def main(string: str) -> None: seen = set()
# tree = Node() i = 0
# fixed_code = []
# message_with_nyt: list[str] = []
# for c in string: message_without_nyt: list[str] = []
# fixed_code.append(compute_code(c))
# while i < len(bits):
# pprint.pp(fixed_code) node = tree
#
# for c in string: while node.left or node.right:
# pprint.pp(tree) if bits[i] == '0':
# print(f'\n====== Inserting \'{c}\' ======\n') node = node.left
# insert_char(tree, c) else:
# node = node.right
# pprint.pp(tree) i += 1
#
# n = find_char(tree, 'o') if node.value == 'NYT':
# e, r = compute_vitter_params(len(LIST_ALPHABET))
# pprint.pp(n) prefix_bits = bits[i:i+e]
val = int(prefix_bits, 2)
if val < r:
code_bits = bits[i:i+e+1]
k = int(code_bits, 2) + 1
i += e + 1
else:
k = val + r + 1
i += e
char = LIST_ALPHABET[k - 1]
result += char
seen.add(char)
insert_char(tree, char)
message_with_nyt.append('NYT')
message_with_nyt.append(char)
message_without_nyt.append(char)
else:
char = node.value
result += char
insert_char(tree, char)
if __name__ == "__main__": message_with_nyt.append(char)
message_without_nyt.append(char)
pprint.pp(tree)
pprint.pp(tree)
return ' '.join(message_with_nyt[1:]), ''.join(message_without_nyt)
def main() -> None:
parser = argparse.ArgumentParser(prog='dyn_huffman.py') parser = argparse.ArgumentParser(prog='dyn_huffman.py')
parser.add_argument('string') parser.add_argument('input')
args = parser.parse_args() args = parser.parse_args()
main(args.string) print('=============== Encoding ===============')
encoded_input = encode(args.input)
print(f'Code final: {encoded_input}')
print()
print('=============== Decoding ===============')
with_nyt, without_nyt = decode(encoded_input)
print(f'Message initial avec NYT : {with_nyt}')
print(f'Message initial sans NYT : {without_nyt}')
if __name__ == "__main__":
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment