生成激活碼
#!/usr/bin/env python #encoding:utf-8 #Author:sean import string import random #激活碼中的字符和數字 field = string.letters + string.digits #獲得四個字母和數字的隨機組合 def getRandom(): return ''.join(random.sample(field,4)) #生成的每個激活碼中有幾組 def concatenate(group): return '-'.join([getRandom() for i in range(group)]) #生成n組激活碼 def generate(n): return [concatenate(4) for i in range(n)] if __name__ == '__main__': print generate(10)
統計單詞
#!/usr/bin/env python #encoding:utf-8 import re from collections import Counter FileSource = './media/abc.txt' def getMostCommonWord(articlefilesource): '''輸入一個英文的純文本文件,統計其中的單詞出現的個數''' pattern = r'[A-Za-z]+|\$?\d+%?$' with open(articlefilesource) as f: r = re.findall(pattern,f.read()) return Counter(r).most_common() if __name__ == '__main__': print getMostCommonWord(FileSource)
提取網頁正文
#!/usr/bin/env python #encoding:utf-8 from goose import Goose from goose.text import StopWordsChinese import sys #要分析的網頁url url = ' def extract(url): ''' 提取網頁正文 ''' g = Goose({'stopwords_class':StopWordsChinese}) artlcle = g.extract(url=url) return artlcle.cleaned_text if __name__ == '__main__': print extract(url)