哈希樹的簡單實現(A simple implementation of Hash Tree)

# coding=utf-8

"""
A simple implementation of Hash Tree
哈希樹的簡單實現
"""
from functools import reduce


class HashTreeNode(object):
    def __init__(self, name=''):
        self.val = 0
        self.name = name
        self.level = 0
        self.children = {}

    def addBag(self, bag):
        """
        Note that bag must be sorted
        """
        if bag:
            node = self.children.get(bag[0], HashTreeNode(name=bag[0]))
            node.addBag(bag[1:])
            self.children[bag[0]] = node
            self.level = len(bag)

    def count(self, transaction):
        """
        count the child who matches bag, suppose that current node matches
        """
        if self.level == 0:
            self.val += 1
        elif self.level == 1:
            for t in transaction:
                if t in self.children: self.children[t].val += 1
        else:
            for i in range(0, len(transaction)):
                t = transaction[i]
                if t in self.children:
                    self.children[t].count(transaction[i:])

    def get(self, theta):
        """
        if self.level == 0:
            return [[self.name]] if self.val >= theta else None
        else:
            children_res = [self.children[i].get(theta) for i in sorted(self.children.keys())]
            total = reduce(lambda res, x: res + x, filter(lambda x: x, children_res), [])
            return map(lambda c: [self.name] + c, total)
        """
        return [[c.name for c in items] for items in self.getNodes(theta)]

    def getNodes(self, theta):
        if self.level == 0:
            return [[self]] if self.val >= theta else None
        else:
            children_res = [self.children[i].getNodes(theta) for i in sorted(self.children.keys())]
            total = reduce(lambda res, x: res + x, [x for x in children_res if x], [])
            return [[self] + c for c in total]

    def __str__(self):
        return '(%s : %s)' % (self.name, '; '.join([str(i) for i in list(self.children.values())]))


def sameNode(node1, node2):
    return node1.name == node2.name


def sameNodes(nodes1, nodes2):
    func = lambda n: n.name
    return list(map(func, nodes1)) == list(map(func, nodes2))


class HashTree(object):
    """
    Note that all bags must be sorted
    """

    def __init__(self, bags):
        self.root = HashTreeNode()
        self.root.val = 0
        for b in bags:
            if b: self.root.addBag(b)

    def count(self, transactions):
        for t in transactions: self.root.count(t)

    def get(self, theta):
        res = [c[1:] for c in self.root.get(theta)]
        return [] if res == [[]] else res

    def getNodes(self, theta):
        res = [c[1:] for c in self.root.getNodes(theta)]
        return [] if res == [[]] else res

    def __str__(self):
        return str(self.root)


if __name__ == '__main__':
    to_count = [[1, 2], [2, 4], [1, 3], [1, 5], [3, 4], [2, 7], [6, 8]]
    tree = HashTree(to_count)
    transactions = [[1, 2, 3], [1, 2, 4], [2, 4, 6, 8], [1, 3, 5, 7]]
    tree.count(transactions)
    print('Frequency with transactions', transactions)
    print(tree.get(2))
    print(tree.get(1))

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章