Skip to content
Snippets Groups Projects
Commit a51e72da authored by Florian Burgener's avatar Florian Burgener
Browse files

Remove duplicates support

parent 94870ad9
No related branches found
No related tags found
No related merge requests found
# t = minimum degree
# minimum of t-1 keys for every node except root
# 2t - 1 keys at most for every node
import random
class Node:
......@@ -11,10 +9,6 @@ class Node:
self.children = []
def node_is_full(node):
return len(node.keys) == 2 * node.order
# lower_bound
def array_binary_search(array, value):
low = 0
......@@ -22,12 +16,14 @@ def array_binary_search(array, value):
while low <= high:
m = (low + high) // 2
if array[m] < value:
low = m + 1
elif array[m] > value:
high = m - 1
else:
return m
return low
......@@ -40,13 +36,19 @@ def array_insert_sorted(array, value):
def find_leaf(root, key):
parents = []
current = root
while not current.is_leaf:
parents.append(current)
children_index = array_binary_search(current.keys, key)
current = current.children[children_index]
return parents, current
def node_is_full(node):
return len(node.keys) == 2 * node.order
def insert(root, key):
parents, leaf = find_leaf(root, key)
......@@ -56,41 +58,48 @@ def insert(root, key):
insert_non_full(leaf, key, None)
def insert_non_full(node, key, right_child_node):
inserted_at_index = array_insert_sorted(node.keys, key)
def redistribute_items(left_node, right_node, left_index, right_index):
right_node.keys = left_node.keys[right_index:]
left_node.keys = left_node.keys[:left_index]
if right_child_node is not None:
node.children.insert(inserted_at_index + 1, right_child_node)
def split_leaf(node, key):
virtual_insertion_index = array_binary_search(node.keys, key)
median_index = len(node.keys) // 2
def insert_full(root, parents, node, key, right_child_node):
if node.is_leaf:
abc, split_right = split_leaf(node, key)
else:
abc, split_right = split_internal(node, key, right_child_node)
right_node = Node(node.order)
right_node.is_leaf = node.is_leaf
if node == root:
increase_height(root, abc, split_right)
if virtual_insertion_index < median_index:
median_value = node.keys[median_index - 1]
redistribute_items(node, right_node, median_index - 1, median_index - 1)
array_insert_sorted(node.keys, key)
else:
parent = parents.pop()
if node_is_full(parent):
insert_full(root, parents, parent, abc, split_right)
if virtual_insertion_index > median_index:
median_value = node.keys[median_index]
else:
insert_non_full(parent, abc, split_right)
median_value = key
redistribute_items(node, right_node, median_index, median_index)
array_insert_sorted(right_node.keys, key)
# if key == node.keys[len(node.keys) // 2]:
# # I don't like it but without it the duplicates can't work.
# split_index = len(node.keys) // 2
return median_value, right_node
def split_internal(node, key, right_child_node):
index = array_binary_search(node.keys, key)
split_index = len(node.keys) // 2
left_index = split_index
right_index = split_index
virtual_insertion_index = array_binary_search(node.keys, key)
median_index = len(node.keys) // 2
left_index = median_index
right_index = median_index
if index < split_index:
abc = node.keys[split_index - 1]
if virtual_insertion_index < median_index:
abc = node.keys[median_index - 1]
left_index -= 1
elif index > split_index:
abc = node.keys[split_index]
elif virtual_insertion_index > median_index:
abc = node.keys[median_index]
right_index += 1
else:
abc = key
......@@ -105,59 +114,54 @@ def split_internal(node, key, right_child_node):
elif key > abc:
inserted_at_index = array_insert_sorted(split_right.keys, key)
if index < split_index:
split_right.children = node.children[split_index:]
node.children = node.children[:split_index]
if virtual_insertion_index < median_index:
split_right.children = node.children[median_index:]
node.children = node.children[:median_index]
node.children.insert(inserted_at_index + 1, right_child_node)
elif index > split_index:
split_right.children = node.children[split_index + 1 :]
node.children = node.children[: split_index + 1]
elif virtual_insertion_index > median_index:
split_right.children = node.children[median_index + 1 :]
node.children = node.children[: median_index + 1]
split_right.children.insert(inserted_at_index + 1, right_child_node)
else:
split_right.children = node.children[split_index + 1 :]
node.children = node.children[: split_index + 1]
split_right.children = node.children[median_index + 1 :]
node.children = node.children[: median_index + 1]
split_right.children.insert(0, right_child_node)
return abc, split_right
def split_leaf(node, key):
index = array_binary_search(node.keys, key)
split_index = len(node.keys) // 2
if index < split_index:
split_index -= 1
abc = node.keys[split_index]
elif index > split_index:
abc = node.keys[split_index]
else:
abc = key
def tree_grow(root, median_value, split_right_node):
left_node = Node(root.order)
left_node.is_leaf = split_right_node.is_leaf
left_node.keys = root.keys
left_node.children = root.children
root.is_leaf = False
root.keys = [median_value]
root.children = [left_node, split_right_node]
if key == node.keys[len(node.keys) // 2]:
# I don't like it but without it the duplicates can't work.
split_index = len(node.keys) // 2
split_right = Node(node.order)
split_right.is_leaf = node.is_leaf
split_right.keys = node.keys[split_index:]
node.keys = node.keys[:split_index]
def insert_full(root, parents, node, key, previous_split_right_node):
if node.is_leaf:
median_value, split_right_node = split_leaf(node, key)
else:
median_value, split_right_node = split_internal(node, key, previous_split_right_node)
if key < abc:
array_insert_sorted(node.keys, key)
if node == root:
tree_grow(root, median_value, split_right_node)
else:
array_insert_sorted(split_right.keys, key)
parent = parents.pop()
return abc, split_right
if node_is_full(parent):
insert_full(root, parents, parent, median_value, split_right_node)
else:
insert_non_full(parent, median_value, split_right_node)
def increase_height(root, key, right_child_node):
left_child_node = Node(root.order)
left_child_node.is_leaf = right_child_node.is_leaf
left_child_node.keys = root.keys
left_child_node.children = root.children
root.is_leaf = False
root.keys = [key]
root.children = [left_child_node, right_child_node]
def insert_non_full(node, key, previous_split_right_node):
inserted_at_index = array_insert_sorted(node.keys, key)
if previous_split_right_node is not None:
node.children.insert(inserted_at_index + 1, previous_split_right_node)
def tree_print(root, depth=0):
......@@ -168,23 +172,30 @@ def tree_print(root, depth=0):
tree_print(child, depth + 1)
def generate_random_keys(length, min_key, max_key):
keys = []
for _ in range(length):
while True:
random_key = random.randint(min_key, max_key)
if random_key not in keys:
keys.append(random_key)
break
return keys
def main():
random.seed(0)
order = 2
root = Node(order)
# keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110, 80, 15, 54, 42, 41, 12, 14, 16, 19, 20, 17, 18]
keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110]
keys = generate_random_keys(30, 1, 99)
print(keys)
for key in keys:
insert(root, key)
insert(root, 80)
# insert(root, 42)
# insert(root, 42)
# insert(root, 42)
insert(root, 20)
insert(root, 20)
insert(root, 20)
# insert(root, 20)
tree_print(root)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment