Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dyn_huffman
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ISC3
compression
dyn_huffman
Commits
2e555b5b
Verified
Commit
2e555b5b
authored
1 month ago
by
iliya.saroukha
Browse files
Options
Downloads
Patches
Plain Diff
fix: cleaned up impl
parent
fcad2681
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
dyn_huffman.py
+117
-77
117 additions, 77 deletions
dyn_huffman.py
with
117 additions
and
77 deletions
dyn_huffman.py
+
117
−
77
View file @
2e555b5b
...
@@ -7,16 +7,43 @@ import argparse
...
@@ -7,16 +7,43 @@ import argparse
import
string
import
string
LIST_ALPHABET
=
list
(
string
.
ascii_lowercase
)
+
list
(
string
.
digits
)
LIST_ALPHABET
=
list
(
string
.
ascii_lowercase
)
+
list
(
string
.
digits
)
# LIST_ALPHABET = list(string.ascii_lowercase)
DICT_ALPHABET
=
{
item
:
index
+
1
for
index
,
item
in
enumerate
(
LIST_ALPHABET
)}
DICT_ALPHABET
=
{
item
:
index
+
1
for
index
,
item
in
enumerate
(
LIST_ALPHABET
)}
def
compute_vitter_params
(
m
:
int
)
->
tuple
[
int
,
int
]:
e
=
math
.
floor
(
np
.
log2
(
m
))
r
=
m
-
(
2
**
e
)
return
(
e
,
r
)
def
compute_k
(
char
:
str
)
->
int
:
return
DICT_ALPHABET
[
char
]
def
compute_code
(
char
:
str
):
e
,
r
=
compute_vitter_params
(
len
(
LIST_ALPHABET
))
k_char
=
compute_k
(
char
)
if
k_char
<=
2
*
r
:
bin_val
=
k_char
-
1
n_bits
=
e
+
1
return
bin
(
bin_val
)[
2
:].
zfill
(
n_bits
)
else
:
bin_val
=
k_char
-
r
-
1
return
bin
(
bin_val
)[
2
:].
zfill
(
e
)
FIXED_CODES
=
{
compute_code
(
item
):
item
for
index
,
item
in
enumerate
(
LIST_ALPHABET
)}
swap_count
=
0
swap_count
=
0
@dataclass
@dataclass
class
Node
:
class
Node
:
weight
:
int
=
0
weight
:
int
=
0
nyt_code
:
str
|
None
=
None
nyt_code
:
str
|
None
=
None
value
:
str
|
None
=
"
NYT
"
value
:
str
=
"
NYT
"
left
:
Node
|
None
=
None
left
:
Node
|
None
=
None
right
:
Node
|
None
=
None
right
:
Node
|
None
=
None
...
@@ -25,45 +52,27 @@ def swap(tree: Node):
...
@@ -25,45 +52,27 @@ def swap(tree: Node):
global
swap_count
global
swap_count
if
tree
.
left
and
tree
.
right
:
if
tree
.
left
and
tree
.
right
:
if
tree
.
left
.
weight
>
tree
.
right
.
weight
:
if
tree
.
left
.
weight
>
tree
.
right
.
weight
:
# print(f'({tree.left.weight}, {tree.right.weight})')
swap_count
+=
1
swap_count
+=
1
tmp
=
tree
.
right
tmp
=
tree
.
right
tree
.
right
=
tree
.
left
tree
.
right
=
tree
.
left
tree
.
left
=
tmp
tree
.
left
=
tmp
# def compute_prefix(tree: Node, char: str) -> str:
# prefix = ''
#
# if tree:
# if tree.value == char:
# return ''
#
# if tree.left:
# prefix = '0' + compute_prefix(tree.left, char)
#
# if tree.right:
# prefix = '1' + compute_prefix(tree.right, char)
#
# return prefix
def
compute_prefix
(
tree
:
Node
,
char
:
str
)
->
str
:
def
compute_prefix
(
tree
:
Node
,
char
:
str
)
->
str
:
if
tree
.
value
==
char
:
if
tree
.
value
==
char
:
return
''
return
''
if
tree
.
left
:
if
tree
.
left
:
left_result
=
compute_prefix
(
tree
.
left
,
char
)
left_result
=
compute_prefix
(
tree
.
left
,
char
)
if
left_result
is
not
None
:
if
left_result
is
not
None
:
return
'
0
'
+
left_result
return
'
0
'
+
left_result
if
tree
.
right
:
if
tree
.
right
:
right_result
=
compute_prefix
(
tree
.
right
,
char
)
right_result
=
compute_prefix
(
tree
.
right
,
char
)
if
right_result
is
not
None
:
if
right_result
is
not
None
:
return
'
1
'
+
right_result
return
'
1
'
+
right_result
return
None
# char not found in this subtree
return
None
def
find_char
(
tree
:
Node
,
char
:
str
)
->
Node
|
None
:
def
find_char
(
tree
:
Node
,
char
:
str
)
->
Node
|
None
:
...
@@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None:
...
@@ -82,21 +91,25 @@ def find_char(tree: Node, char: str) -> Node | None:
return
n
return
n
def
insert_char
(
tree
:
Node
,
char
:
str
,
nyt_code
=
''
):
def
insert_char
(
tree
:
Node
,
char
:
str
,
nyt_code
=
''
):
if
tree
.
left
:
if
tree
.
left
:
if
tree
.
left
.
value
==
char
:
if
tree
.
left
.
value
==
char
:
tree
.
left
.
weight
+=
1
tree
.
left
.
weight
+=
1
tree
.
weight
+=
1
tree
.
weight
+=
1
return
return
# Leaf case
if
tree
.
right
:
if
tree
.
left
==
None
and
tree
.
right
==
None
:
if
tree
.
right
.
value
==
char
:
tree
.
right
.
weight
+=
1
tree
.
weight
+=
1
return
if
tree
.
left
is
None
and
tree
.
right
is
None
:
tree
.
left
=
Node
()
tree
.
left
=
Node
()
tree
.
right
=
Node
(
value
=
char
,
weight
=
1
,
nyt_code
=
nyt_code
)
tree
.
right
=
Node
(
value
=
char
,
weight
=
1
,
nyt_code
=
nyt_code
)
tree
.
weight
+=
1
tree
.
weight
+=
1
return
return
if
tree
.
left
:
if
tree
.
left
:
if
tree
.
left
.
value
==
'
NYT
'
:
if
tree
.
left
.
value
==
'
NYT
'
:
insert_char
(
tree
.
left
,
char
,
nyt_code
+
'
0
'
)
insert_char
(
tree
.
left
,
char
,
nyt_code
+
'
0
'
)
...
@@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''):
...
@@ -108,35 +121,18 @@ def insert_char(tree: Node, char: str, nyt_code = ''):
swap
(
tree
)
swap
(
tree
)
def
compute_vitter_params
(
m
:
int
)
->
tuple
[
int
,
int
]:
def
compute_char
(
fixed
:
str
)
->
str
:
e
=
math
.
floor
(
np
.
log2
(
m
))
r
=
m
-
(
2
**
e
)
return
(
e
,
r
)
def
compute_k
(
char
:
str
)
->
int
:
return
DICT_ALPHABET
[
char
]
def
compute_code
(
char
:
str
):
e
,
r
=
compute_vitter_params
(
len
(
LIST_ALPHABET
))
e
,
r
=
compute_vitter_params
(
len
(
LIST_ALPHABET
))
k_char
=
compute_k
(
char
)
if
k_char
<=
2
*
r
:
return
int
(
fixed
,
2
)
bin_val
=
k_char
-
1
n_bits
=
e
+
1
return
bin
(
bin_val
)[
2
:].
zfill
(
n_bits
)
else
:
bin_val
=
k_char
-
r
-
1
return
bin
(
bin_val
)[
2
:].
zfill
(
e
)
def
encode
(
string
:
str
)
->
str
:
def
encode
(
input
:
str
)
->
str
:
tree
=
Node
()
tree
=
Node
()
encoded
:
list
[
str
]
=
[]
encoded
:
list
[
str
]
=
[]
seen
:
set
[
str
]
=
set
()
seen
:
set
[
str
]
=
set
()
for
c
in
string
:
for
c
in
input
:
insert_char
(
tree
,
c
)
insert_char
(
tree
,
c
)
prefix
=
find_char
(
tree
,
c
)
prefix
=
find_char
(
tree
,
c
)
...
@@ -145,45 +141,89 @@ def encode(string: str) -> str:
...
@@ -145,45 +141,89 @@ def encode(string: str) -> str:
encoded
.
append
(
prefix
.
nyt_code
)
encoded
.
append
(
prefix
.
nyt_code
)
encoded
.
append
(
compute_code
(
c
))
encoded
.
append
(
compute_code
(
c
))
else
:
else
:
print
(
f
'
{
c
}
already seen
'
)
encoded
.
append
(
compute_prefix
(
tree
,
c
))
encoded
.
append
(
compute_prefix
(
tree
,
c
))
pprint
.
pp
(
tree
)
pprint
.
pp
(
tree
)
pprint
.
pp
(
tree
)
print
(
f
'
Nombre de swaps:
{
swap_count
}
'
)
print
(
f
'
Nombre de swaps:
{
swap_count
}
'
)
return
'
'
.
join
(
encoded
)[
1
:]
return
'
'
.
join
(
encoded
)[
1
:]
def
main
(
string
:
str
)
->
None
:
def
decode
(
encoded
:
str
)
->
(
str
,
str
):
msg
=
encode
(
string
)
bits
=
encoded
.
replace
(
'
'
,
''
)
print
(
f
'
Code final:
{
msg
}
'
)
tree
=
Node
()
result
=
''
# def main(string: str) -> None:
seen
=
set
()
# tree = Node()
i
=
0
# fixed_code = []
#
message_with_nyt
:
list
[
str
]
=
[]
# for c in string:
message_without_nyt
:
list
[
str
]
=
[]
# fixed_code.append(compute_code(c))
#
while
i
<
len
(
bits
):
# pprint.pp(fixed_code)
node
=
tree
#
# for c in string:
while
node
.
left
or
node
.
right
:
# pprint.pp(tree)
if
bits
[
i
]
==
'
0
'
:
# print(f'\n====== Inserting \'{c}\' ======\n')
node
=
node
.
left
# insert_char(tree, c)
else
:
#
node
=
node
.
right
# pprint.pp(tree)
i
+=
1
#
# n = find_char(tree, 'o')
if
node
.
value
==
'
NYT
'
:
#
e
,
r
=
compute_vitter_params
(
len
(
LIST_ALPHABET
))
# pprint.pp(n)
prefix_bits
=
bits
[
i
:
i
+
e
]
val
=
int
(
prefix_bits
,
2
)
if
val
<
r
:
code_bits
=
bits
[
i
:
i
+
e
+
1
]
k
=
int
(
code_bits
,
2
)
+
1
i
+=
e
+
1
else
:
k
=
val
+
r
+
1
i
+=
e
char
=
LIST_ALPHABET
[
k
-
1
]
result
+=
char
seen
.
add
(
char
)
insert_char
(
tree
,
char
)
message_with_nyt
.
append
(
'
NYT
'
)
message_with_nyt
.
append
(
char
)
message_without_nyt
.
append
(
char
)
else
:
char
=
node
.
value
result
+=
char
insert_char
(
tree
,
char
)
if
__name__
==
"
__main__
"
:
message_with_nyt
.
append
(
char
)
message_without_nyt
.
append
(
char
)
pprint
.
pp
(
tree
)
pprint
.
pp
(
tree
)
return
'
'
.
join
(
message_with_nyt
[
1
:]),
''
.
join
(
message_without_nyt
)
def
main
()
->
None
:
parser
=
argparse
.
ArgumentParser
(
prog
=
'
dyn_huffman.py
'
)
parser
=
argparse
.
ArgumentParser
(
prog
=
'
dyn_huffman.py
'
)
parser
.
add_argument
(
'
string
'
)
parser
.
add_argument
(
'
input
'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
main
(
args
.
string
)
print
(
'
=============== Encoding ===============
'
)
encoded_input
=
encode
(
args
.
input
)
print
(
f
'
Code final:
{
encoded_input
}
'
)
print
()
print
(
'
=============== Decoding ===============
'
)
with_nyt
,
without_nyt
=
decode
(
encoded_input
)
print
(
f
'
Message initial avec NYT :
{
with_nyt
}
'
)
print
(
f
'
Message initial sans NYT :
{
without_nyt
}
'
)
if
__name__
==
"
__main__
"
:
main
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment