You are on page 1of 3

# based on the idea of invariant heap

import tempfile
import sys
from typing import List

# class InvariantHeapNode:
# def __init__(self, element, i, j):
# self.element = element
# self.i = i
# self.j = j

class InvariantHeapNode:
def __init__(self, element, file):
self.element = element
self.file = file

class ExternalSort():
def __init__(self):
self.sorted_temp_files = []

def make_runs(self, filename, files_size):


temp_list = []
sorted_temp_files = []
file_size = 0
with open(filename) as f:
while True:
number = f.readline()
if not number:
break
temp_list.append(number)
file_size += 1
if file_size % files_size == 0:
temp_list = sorted(temp_list, key=lambda no: int(no.strip()))
with tempfile.NamedTemporaryFile('w') as temp_file:
temp_file.writelines(temp_list)
temp_file.seek(0)
sorted_temp_files.append(temp_file)
temp_list.clear()

class InvariantHeap():
def __init__(self, my_heap: List[InvariantHeapNode]):
super().__init__()
self.my_heap = my_heap
size = len(my_heap)
middle = int((size - 1) / 2)
while middle >= 0:
self.heapify(middle)
middle -= 1

def heapify(self, i):


l = 2*i + 1
r = 2*i + 2
smallest = i
size = len(self.my_heap)
if l < size and self.my_heap[l].element < self.my_heap[i].element:
smallest = l
if r < size and self.my_heap[r].element < self.my_heap[smallest].element:
smallest = r
if smallest != i:
self.my_heap[smallest], self.my_heap[i] = self.my_heap[i],
self.my_heap[smallest]
self.heapify(smallest)

def get_min(self):
if len(self.my_heap) < 1:
return f'Underflow {None}'
return self.my_heap[0] # first element is min in invariant heap

def replace_min(self, root):


self.my_heap[0] = root
self.heapify(0)

def merge_files():
array = []
result = []
obj = ExternalSort()
for temp_file in obj.sorted_temp_files:
node = temp_file.readlines().strip()
array.append(InvariantHeapNode(node, temp_file))

inv_heap = InvariantHeap(array)
while True:
root = inv_heap.get_min()
if root.element == sys.maxsize:
break
result.append(root)
file = root.file
element = file.readline().strip()
if not element:
element = sys.maxsize
else:
element = int(element)
inv_heap.replace_min(root)
return result

# def merge_k_runs(runs: Matrix, k: int):


# array = []
# result_size = 0
# for i in range(len(runs)):
# node = InvariantHeapNode(runs[i][0], i, 1)
# array.append(node)
# result_size += len(runs[i])
#
# inv_heap = InvariantHeap(array, k)
# result = [0]*result_size
# for i in range(result_size):
# root = inv_heap.get_min()
# result[i] = root.element
# if root.j < len(runs[root.i]):
# root.element = runs[root.i][root.j]
# root.j += 1
# else:
# root.element = sys.maxsize
# inv_heap.replace_min(root)
# return result

def main():
# runs_num = 10
# runs_size = 1000
#
# with open("/home/max/input.txt") as f:
# num_array = [int(x) for x in f.writelines().split()]

# runs = [[3, 2, 1], [9, 8, 6], [5, 2, 1]]


# runs = [
# [2, 6, 12, 34],
# [1, 9, 20, 1000],
# [23, 34, 90, 2000]
# ]
# a = merge_k_runs(runs, len(runs))
# print(a)

filename = '/home/max/ext_sort.txt'
files_size = 5
obj = ExternalSort()
obj.make_runs(filename, files_size)
merge_files()

if __name__ == '__main__':
main()

You might also like