Skip to content
Snippets Groups Projects
Commit a51e72da authored by Florian Burgener's avatar Florian Burgener
Browse files

Remove duplicates support

parent 94870ad9
Branches
No related tags found
No related merge requests found
# t = minimum degree import random
# minimum of t-1 keys for every node except root
# 2t - 1 keys at most for every node
class Node: class Node:
...@@ -11,10 +9,6 @@ class Node: ...@@ -11,10 +9,6 @@ class Node:
self.children = [] self.children = []
def node_is_full(node):
return len(node.keys) == 2 * node.order
# lower_bound # lower_bound
def array_binary_search(array, value): def array_binary_search(array, value):
low = 0 low = 0
...@@ -22,12 +16,14 @@ def array_binary_search(array, value): ...@@ -22,12 +16,14 @@ def array_binary_search(array, value):
while low <= high: while low <= high:
m = (low + high) // 2 m = (low + high) // 2
if array[m] < value: if array[m] < value:
low = m + 1 low = m + 1
elif array[m] > value: elif array[m] > value:
high = m - 1 high = m - 1
else: else:
return m return m
return low return low
...@@ -40,13 +36,19 @@ def array_insert_sorted(array, value): ...@@ -40,13 +36,19 @@ def array_insert_sorted(array, value):
def find_leaf(root, key): def find_leaf(root, key):
parents = [] parents = []
current = root current = root
while not current.is_leaf: while not current.is_leaf:
parents.append(current) parents.append(current)
children_index = array_binary_search(current.keys, key) children_index = array_binary_search(current.keys, key)
current = current.children[children_index] current = current.children[children_index]
return parents, current return parents, current
def node_is_full(node):
return len(node.keys) == 2 * node.order
def insert(root, key): def insert(root, key):
parents, leaf = find_leaf(root, key) parents, leaf = find_leaf(root, key)
...@@ -56,41 +58,48 @@ def insert(root, key): ...@@ -56,41 +58,48 @@ def insert(root, key):
insert_non_full(leaf, key, None) insert_non_full(leaf, key, None)
def insert_non_full(node, key, right_child_node): def redistribute_items(left_node, right_node, left_index, right_index):
inserted_at_index = array_insert_sorted(node.keys, key) right_node.keys = left_node.keys[right_index:]
left_node.keys = left_node.keys[:left_index]
if right_child_node is not None:
node.children.insert(inserted_at_index + 1, right_child_node)
def split_leaf(node, key):
virtual_insertion_index = array_binary_search(node.keys, key)
median_index = len(node.keys) // 2
def insert_full(root, parents, node, key, right_child_node): right_node = Node(node.order)
if node.is_leaf: right_node.is_leaf = node.is_leaf
abc, split_right = split_leaf(node, key)
else:
abc, split_right = split_internal(node, key, right_child_node)
if node == root: if virtual_insertion_index < median_index:
increase_height(root, abc, split_right) median_value = node.keys[median_index - 1]
redistribute_items(node, right_node, median_index - 1, median_index - 1)
array_insert_sorted(node.keys, key)
else: else:
parent = parents.pop() if virtual_insertion_index > median_index:
median_value = node.keys[median_index]
if node_is_full(parent):
insert_full(root, parents, parent, abc, split_right)
else: else:
insert_non_full(parent, abc, split_right) median_value = key
redistribute_items(node, right_node, median_index, median_index)
array_insert_sorted(right_node.keys, key)
# if key == node.keys[len(node.keys) // 2]:
# # I don't like it but without it the duplicates can't work.
# split_index = len(node.keys) // 2
return median_value, right_node
def split_internal(node, key, right_child_node): def split_internal(node, key, right_child_node):
index = array_binary_search(node.keys, key) virtual_insertion_index = array_binary_search(node.keys, key)
split_index = len(node.keys) // 2 median_index = len(node.keys) // 2
left_index = split_index left_index = median_index
right_index = split_index right_index = median_index
if index < split_index: if virtual_insertion_index < median_index:
abc = node.keys[split_index - 1] abc = node.keys[median_index - 1]
left_index -= 1 left_index -= 1
elif index > split_index: elif virtual_insertion_index > median_index:
abc = node.keys[split_index] abc = node.keys[median_index]
right_index += 1 right_index += 1
else: else:
abc = key abc = key
...@@ -105,59 +114,54 @@ def split_internal(node, key, right_child_node): ...@@ -105,59 +114,54 @@ def split_internal(node, key, right_child_node):
elif key > abc: elif key > abc:
inserted_at_index = array_insert_sorted(split_right.keys, key) inserted_at_index = array_insert_sorted(split_right.keys, key)
if index < split_index: if virtual_insertion_index < median_index:
split_right.children = node.children[split_index:] split_right.children = node.children[median_index:]
node.children = node.children[:split_index] node.children = node.children[:median_index]
node.children.insert(inserted_at_index + 1, right_child_node) node.children.insert(inserted_at_index + 1, right_child_node)
elif index > split_index: elif virtual_insertion_index > median_index:
split_right.children = node.children[split_index + 1 :] split_right.children = node.children[median_index + 1 :]
node.children = node.children[: split_index + 1] node.children = node.children[: median_index + 1]
split_right.children.insert(inserted_at_index + 1, right_child_node) split_right.children.insert(inserted_at_index + 1, right_child_node)
else: else:
split_right.children = node.children[split_index + 1 :] split_right.children = node.children[median_index + 1 :]
node.children = node.children[: split_index + 1] node.children = node.children[: median_index + 1]
split_right.children.insert(0, right_child_node) split_right.children.insert(0, right_child_node)
return abc, split_right return abc, split_right
def split_leaf(node, key): def tree_grow(root, median_value, split_right_node):
index = array_binary_search(node.keys, key) left_node = Node(root.order)
split_index = len(node.keys) // 2 left_node.is_leaf = split_right_node.is_leaf
left_node.keys = root.keys
if index < split_index: left_node.children = root.children
split_index -= 1 root.is_leaf = False
abc = node.keys[split_index] root.keys = [median_value]
elif index > split_index: root.children = [left_node, split_right_node]
abc = node.keys[split_index]
else:
abc = key
if key == node.keys[len(node.keys) // 2]:
# I don't like it but without it the duplicates can't work.
split_index = len(node.keys) // 2
split_right = Node(node.order) def insert_full(root, parents, node, key, previous_split_right_node):
split_right.is_leaf = node.is_leaf if node.is_leaf:
split_right.keys = node.keys[split_index:] median_value, split_right_node = split_leaf(node, key)
node.keys = node.keys[:split_index] else:
median_value, split_right_node = split_internal(node, key, previous_split_right_node)
if key < abc: if node == root:
array_insert_sorted(node.keys, key) tree_grow(root, median_value, split_right_node)
else: else:
array_insert_sorted(split_right.keys, key) parent = parents.pop()
return abc, split_right if node_is_full(parent):
insert_full(root, parents, parent, median_value, split_right_node)
else:
insert_non_full(parent, median_value, split_right_node)
def increase_height(root, key, right_child_node): def insert_non_full(node, key, previous_split_right_node):
left_child_node = Node(root.order) inserted_at_index = array_insert_sorted(node.keys, key)
left_child_node.is_leaf = right_child_node.is_leaf
left_child_node.keys = root.keys if previous_split_right_node is not None:
left_child_node.children = root.children node.children.insert(inserted_at_index + 1, previous_split_right_node)
root.is_leaf = False
root.keys = [key]
root.children = [left_child_node, right_child_node]
def tree_print(root, depth=0): def tree_print(root, depth=0):
...@@ -168,23 +172,30 @@ def tree_print(root, depth=0): ...@@ -168,23 +172,30 @@ def tree_print(root, depth=0):
tree_print(child, depth + 1) tree_print(child, depth + 1)
def generate_random_keys(length, min_key, max_key):
keys = []
for _ in range(length):
while True:
random_key = random.randint(min_key, max_key)
if random_key not in keys:
keys.append(random_key)
break
return keys
def main(): def main():
random.seed(0)
order = 2 order = 2
root = Node(order) root = Node(order)
# keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110, 80, 15, 54, 42, 41, 12, 14, 16, 19, 20, 17, 18] keys = generate_random_keys(30, 1, 99)
keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110] print(keys)
for key in keys: for key in keys:
insert(root, key) insert(root, key)
insert(root, 80)
# insert(root, 42)
# insert(root, 42)
# insert(root, 42)
insert(root, 20)
insert(root, 20)
insert(root, 20)
# insert(root, 20)
tree_print(root) tree_print(root)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment