Whoosh初次使用

1.Python代碼

1.1 構建索引

def build_index(file_path):
        start_time = time.clock()
        schema  = Schema(title=TEXT(stored=True), path=ID(stored=False), content=TEXT(stored=True))
        ix = create_in("indexdir", schema)
        writer = ix.writer()
        with open(file_path, 'r') as filereader:
                num = 1
                for row in filereader:
                        title_num = str(num) + " line"
                        writer.add_document(title=title_num.decode("utf-8"), path=file_path.decode("utf-8"), content=row.decode("utf-8"))
                        num += 1
        writer.commit()
        elapsed = (time.clock() - start_time)
        return "Step One:  索引已經構建完成--------------------- " + str(elapsed)

1.2  檢索

def search_word(search):
        start_time = time.clock()
        ix = open_dir("indexdir")
        with ix.searcher() as searcher:
                query = QueryParser("content", ix.schema).parse(search)
                results = searcher.search(query, limit = None)
                elapsed = (time.clock() - start_time)
                print "Step Two: 搜索已經完成--------------------- " + str(elapsed)
                pprint.pprint(results[:])
        return "Finished."

1.3 測試

if __name__ == '__main__':
        file_name = raw_input("Please input the path of file you want to build index: ")
        step_result = "Something went wrong......."
        try:
                step_result = build_index(file_name)
        finally:
                print step_result
        search = raw_input("Please input the word you want to search: ")
        step_result = "Something went wrong......"
        try:
                step_result = search_word(search)
        finally:
                print step_result

2.參考資料

1.https://whoosh.readthedocs.io/en/latest/releases/index.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章