原创 elmo調試練習

import tensorflow_hub as hub import tensorflow as tf import re import numpy as np import pickle import pandas as pd fr

原创 ELMO

最近重溫了下elmo模型,主要有幾點:   1- 相比於word2vec這些多了上下文的理解。   2 - 基本單元是一個兩層的基於字符卷積的網絡.   3 - 內部狀態的組合構成新的詞彙向量表示.   4-elmo採用了雙向bi-lst

原创 gensim fasttext

from nltk import word_tokenize,WordNetLemmatizer import pandas as pd from nltk.corpus import stopwords import re from

原创 python快速排序

def quick_sort(list): list_3 = [] list_1 = [] list_2 = [] if len(list) <=1: return list el

原创 Bert文本分類 run_classifier內容

# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.

原创 PDF轉換txt

# -*- coding: utf-8 -*- import sys #reload(sys) #sys.setdefaultencoding('utf-8') from pdfminer.pdfparser import PDF

原创 python選擇排序

def select_sort(list): len_list = len(list) for i in range(len_list): min = i for j in range(i

原创 python堆排序

import heapq import random def heapsort(li): h = [] for v in li: heapq.heappush(h,v) return [heapq

原创 Ner

import codecs import random import numpy as np from gensim import corpora from keras.layers import Dense,GRU,Bidirecti

原创 word處理

#讀取docx中的文本代碼示例 import docx from win32com import client as wc import re import os import os.path def getListFiles(path

原创 python動態規劃之揹包問題

import numpy as np def bag(weight,values,weight_cont): num = len(weight) weight.insert(0,0) values.insert(

原创 excel轉換txt

import xlrd import os import sys def getListFiles(path): ret = [] for root, dirs, files in os.walk(path):

原创 gensim中的word2vec與faxttext

from nltk import word_tokenize,WordNetLemmatizer import pandas as pd from nltk.corpus import stopwords import re from

原创 python歸併排序

def merge_sort(list): left_p = 0 right_p = 0 result = [] len_list = len(list) if len_list <= 1:

原创 python冒泡排序

def bubble_sort(list): len_list = len(list) for i in range(len_list): for j in range(i+1,len_list):