"""
算法面試:10億個數中取TOP-1000個數
堆的性質:每一個節點比它的左右子節點小,
先取前N個數,構成小頂堆,即在內存中維護一個1000數的小頂堆
然後對文件中讀取數據,和堆頂比較:
if 比堆頂小,則丟棄
if 比堆頂大,替換根節點,並且調整堆,保持小頂堆的性質
所有數據處理完,得到的即是Top-N
"""
class TopN:
def parent(self, n):
return int((n - 1) / 2)
def left(self, n):
return 2 * n + 1
def right(self, n):
return 2 * n + 2
def buildHeap(self, n, data):
for i in range(1, n):
t = i
while t != 0 and data[t] < data[self.parent(t)]:
temp = data[t]
data[t] = data[self.parent(t)]
data[self.parent(t)] = temp
t = self.parent(t)
def adjust(self, i, n, data):
if data[i] <= data[0]:
return
temp = data[i]
data[i] = data[0]
data[0] = temp
t = 0
while (self.left(t) < n and data[self.left(t)] < data[t]) or (
self.right(t) < n and data[self.right(t)] < data[t]):
if self.right(t) < n and data[self.right(t)] < data[self.left(t)]:
temp = data[t]
data[t] = data[self.right(t)]
data[self.right(t)] = temp
t = self.right(t)
else:
temp = data[t]
data[t] = data[self.left(t)]
data[self.left(t)] = temp
t = self.left(t)
def findTopN(self, n, data):
self.buildHeap(n, data);
for i in range(n, len(data)):
self.adjust(i, n, data)
return data
arr1 = [58, 26, 45, 18, 22, 39, 96, 75, 80, 65, 63, 28]
print("原數組:" + str(arr1))
topn = TopN()
result = topn.findTopN(5, arr1)
print("數組進行Top-N調整:" + str(result))
import random
N = 10
tempList = []
for i in range(20):
temp = random.randint(0, 1000)
tempList.append(temp)
print("原數組:" + str(tempList))
topn = TopN()
result = topn.findTopN(N, tempList)
temp = result[:N]
temp.sort(key=None, reverse=True)
print("數組進行Top-{0}調整:{1}".format(N,temp))
tempList.sort(key=None, reverse=True)
print("數組進行Top-{0}排序:{1}".format(N,tempList[:N]))
原數組:[58, 26, 45, 18, 22, 39, 96, 75, 80, 65, 63, 28]
數組進行Top-N調整:[63, 65, 80, 75, 96, 18, 22, 26, 39, 45, 58, 28]
原數組:[676, 931, 685, 823, 889, 428, 681, 445, 497, 940, 113, 102, 299, 224, 811, 157, 889, 41, 959, 250]
數組進行Top-10調整:[959, 940, 931, 889, 889, 823, 811, 685, 681, 676]
數組進行Top-10排序:[959, 940, 931, 889, 889, 823, 811, 685, 681, 676]