原创 elmo調試練習
import tensorflow_hub as hub import tensorflow as tf import re import numpy as np import pickle import pandas as pd fr
原创 ELMO
最近重溫了下elmo模型,主要有幾點: 1- 相比於word2vec這些多了上下文的理解。 2 - 基本單元是一個兩層的基於字符卷積的網絡. 3 - 內部狀態的組合構成新的詞彙向量表示. 4-elmo採用了雙向bi-lst
原创 gensim fasttext
from nltk import word_tokenize,WordNetLemmatizer import pandas as pd from nltk.corpus import stopwords import re from
原创 python快速排序
def quick_sort(list): list_3 = [] list_1 = [] list_2 = [] if len(list) <=1: return list el
原创 Bert文本分類 run_classifier內容
# coding=utf-8 # Copyright 2018 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.
原创 PDF轉換txt
# -*- coding: utf-8 -*- import sys #reload(sys) #sys.setdefaultencoding('utf-8') from pdfminer.pdfparser import PDF
原创 python選擇排序
def select_sort(list): len_list = len(list) for i in range(len_list): min = i for j in range(i
原创 python堆排序
import heapq import random def heapsort(li): h = [] for v in li: heapq.heappush(h,v) return [heapq
原创 Ner
import codecs import random import numpy as np from gensim import corpora from keras.layers import Dense,GRU,Bidirecti
原创 word處理
#讀取docx中的文本代碼示例 import docx from win32com import client as wc import re import os import os.path def getListFiles(path
原创 python動態規劃之揹包問題
import numpy as np def bag(weight,values,weight_cont): num = len(weight) weight.insert(0,0) values.insert(
原创 excel轉換txt
import xlrd import os import sys def getListFiles(path): ret = [] for root, dirs, files in os.walk(path):
原创 gensim中的word2vec與faxttext
from nltk import word_tokenize,WordNetLemmatizer import pandas as pd from nltk.corpus import stopwords import re from
原创 python歸併排序
def merge_sort(list): left_p = 0 right_p = 0 result = [] len_list = len(list) if len_list <= 1:
原创 python冒泡排序
def bubble_sort(list): len_list = len(list) for i in range(len_list): for j in range(i+1,len_list):