diff --git a/__main__.py b/__main__.py index 9d5cd69ab2d0b296859ff4942575f24ee2def387..c1d0f4f03014076c4f2008921a59772c73a559c1 100644 --- a/__main__.py +++ b/__main__.py @@ -1,6 +1,4 @@ -# t = minimum degree -# minimum of t-1 keys for every node except root -# 2t - 1 keys at most for every node +import random class Node: @@ -11,10 +9,6 @@ class Node: self.children = [] -def node_is_full(node): - return len(node.keys) == 2 * node.order - - # lower_bound def array_binary_search(array, value): low = 0 @@ -22,12 +16,14 @@ def array_binary_search(array, value): while low <= high: m = (low + high) // 2 + if array[m] < value: low = m + 1 elif array[m] > value: high = m - 1 else: return m + return low @@ -40,13 +36,19 @@ def array_insert_sorted(array, value): def find_leaf(root, key): parents = [] current = root + while not current.is_leaf: parents.append(current) children_index = array_binary_search(current.keys, key) current = current.children[children_index] + return parents, current +def node_is_full(node): + return len(node.keys) == 2 * node.order + + def insert(root, key): parents, leaf = find_leaf(root, key) @@ -56,41 +58,48 @@ def insert(root, key): insert_non_full(leaf, key, None) -def insert_non_full(node, key, right_child_node): - inserted_at_index = array_insert_sorted(node.keys, key) +def redistribute_items(left_node, right_node, left_index, right_index): + right_node.keys = left_node.keys[right_index:] + left_node.keys = left_node.keys[:left_index] - if right_child_node is not None: - node.children.insert(inserted_at_index + 1, right_child_node) +def split_leaf(node, key): + virtual_insertion_index = array_binary_search(node.keys, key) + median_index = len(node.keys) // 2 -def insert_full(root, parents, node, key, right_child_node): - if node.is_leaf: - abc, split_right = split_leaf(node, key) - else: - abc, split_right = split_internal(node, key, right_child_node) + right_node = Node(node.order) + right_node.is_leaf = node.is_leaf - if node == root: - increase_height(root, abc, split_right) + if virtual_insertion_index < median_index: + median_value = node.keys[median_index - 1] + redistribute_items(node, right_node, median_index - 1, median_index - 1) + array_insert_sorted(node.keys, key) else: - parent = parents.pop() - - if node_is_full(parent): - insert_full(root, parents, parent, abc, split_right) + if virtual_insertion_index > median_index: + median_value = node.keys[median_index] else: - insert_non_full(parent, abc, split_right) + median_value = key + redistribute_items(node, right_node, median_index, median_index) + array_insert_sorted(right_node.keys, key) + + # if key == node.keys[len(node.keys) // 2]: + # # I don't like it but without it the duplicates can't work. + # split_index = len(node.keys) // 2 + + return median_value, right_node def split_internal(node, key, right_child_node): - index = array_binary_search(node.keys, key) - split_index = len(node.keys) // 2 - left_index = split_index - right_index = split_index + virtual_insertion_index = array_binary_search(node.keys, key) + median_index = len(node.keys) // 2 + left_index = median_index + right_index = median_index - if index < split_index: - abc = node.keys[split_index - 1] + if virtual_insertion_index < median_index: + abc = node.keys[median_index - 1] left_index -= 1 - elif index > split_index: - abc = node.keys[split_index] + elif virtual_insertion_index > median_index: + abc = node.keys[median_index] right_index += 1 else: abc = key @@ -105,59 +114,54 @@ def split_internal(node, key, right_child_node): elif key > abc: inserted_at_index = array_insert_sorted(split_right.keys, key) - if index < split_index: - split_right.children = node.children[split_index:] - node.children = node.children[:split_index] + if virtual_insertion_index < median_index: + split_right.children = node.children[median_index:] + node.children = node.children[:median_index] node.children.insert(inserted_at_index + 1, right_child_node) - elif index > split_index: - split_right.children = node.children[split_index + 1 :] - node.children = node.children[: split_index + 1] + elif virtual_insertion_index > median_index: + split_right.children = node.children[median_index + 1 :] + node.children = node.children[: median_index + 1] split_right.children.insert(inserted_at_index + 1, right_child_node) else: - split_right.children = node.children[split_index + 1 :] - node.children = node.children[: split_index + 1] + split_right.children = node.children[median_index + 1 :] + node.children = node.children[: median_index + 1] split_right.children.insert(0, right_child_node) return abc, split_right -def split_leaf(node, key): - index = array_binary_search(node.keys, key) - split_index = len(node.keys) // 2 - - if index < split_index: - split_index -= 1 - abc = node.keys[split_index] - elif index > split_index: - abc = node.keys[split_index] - else: - abc = key +def tree_grow(root, median_value, split_right_node): + left_node = Node(root.order) + left_node.is_leaf = split_right_node.is_leaf + left_node.keys = root.keys + left_node.children = root.children + root.is_leaf = False + root.keys = [median_value] + root.children = [left_node, split_right_node] - if key == node.keys[len(node.keys) // 2]: - # I don't like it but without it the duplicates can't work. - split_index = len(node.keys) // 2 - split_right = Node(node.order) - split_right.is_leaf = node.is_leaf - split_right.keys = node.keys[split_index:] - node.keys = node.keys[:split_index] +def insert_full(root, parents, node, key, previous_split_right_node): + if node.is_leaf: + median_value, split_right_node = split_leaf(node, key) + else: + median_value, split_right_node = split_internal(node, key, previous_split_right_node) - if key < abc: - array_insert_sorted(node.keys, key) + if node == root: + tree_grow(root, median_value, split_right_node) else: - array_insert_sorted(split_right.keys, key) + parent = parents.pop() - return abc, split_right + if node_is_full(parent): + insert_full(root, parents, parent, median_value, split_right_node) + else: + insert_non_full(parent, median_value, split_right_node) -def increase_height(root, key, right_child_node): - left_child_node = Node(root.order) - left_child_node.is_leaf = right_child_node.is_leaf - left_child_node.keys = root.keys - left_child_node.children = root.children - root.is_leaf = False - root.keys = [key] - root.children = [left_child_node, right_child_node] +def insert_non_full(node, key, previous_split_right_node): + inserted_at_index = array_insert_sorted(node.keys, key) + + if previous_split_right_node is not None: + node.children.insert(inserted_at_index + 1, previous_split_right_node) def tree_print(root, depth=0): @@ -168,23 +172,30 @@ def tree_print(root, depth=0): tree_print(child, depth + 1) +def generate_random_keys(length, min_key, max_key): + keys = [] + + for _ in range(length): + while True: + random_key = random.randint(min_key, max_key) + + if random_key not in keys: + keys.append(random_key) + break + + return keys + + def main(): + random.seed(0) + order = 2 root = Node(order) - # keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110, 80, 15, 54, 42, 41, 12, 14, 16, 19, 20, 17, 18] - keys = [10, 20, 50, 70, 1, 11, 40, 30, 90, 60, 110] + keys = generate_random_keys(30, 1, 99) + print(keys) for key in keys: insert(root, key) - insert(root, 80) - # insert(root, 42) - # insert(root, 42) - # insert(root, 42) - insert(root, 20) - insert(root, 20) - - insert(root, 20) - # insert(root, 20) tree_print(root)