這個是(Python核心編程)裏面的一個例子,修改了一下用文件來存取
# -*- coding: utf8 -*-
from random import randint,choice
from string import lowercase
from sys import maxint
from time import ctime
from os import path
from re import match,search
def createdata(path):
doms = ('com','edu','net','org','gov')
print path
dataFile = open(path,'a')
#打印隨機5到十條數據
for i in range(randint(5,10)):
dtint = randint(0,maxint-1)
#print dtint
dtstr = ctime(dtint)#將一個時間戳轉換爲時間的字符串,無參數則爲當前系統時間
#print dtstr
shorter = randint(4,7)
#print shorter
em = ''#@前面的部分
for j in range(shorter):
#常量string.lowercase包含了所有小寫的英文字母;常量string.uppercase包
#含了所有大寫的英文字母;常量string.digits包含了從0到9的數字
em += choice(lowercase)#從序列中獲取一個隨機元素
#print em
longer = randint(shorter,12)
dn = ''#@後面的部分
for j in range(longer):
dn +=choice(lowercase)
dataFile.write('%s::%s@%s.%s::%d-%d-%d\n' % (dtstr,em,dn,choice(doms),dtint,shorter,longer))
if __name__ =='__main__':
datapath = 'c://Users//xiaoyizong//Desktop//re_data_test'
if(path.exists(datapath)):
print ' now read data!!!!'
with open('c://Users//xiaoyizong//Desktop//re_data_test') as re_data_test:
datastr = re_data_test.readlines()
#找出data裏面的所有郵箱,輸出
patt = '[a-zA-Z](([a-zA-Z0-9]*\.[a-zA-Z0-9]*)|[a-zA-Z0-9]*)[a-zA-Z]@([a-z0-9A-Z]+\.)+[a-zA-Z]{2,}'
for str in datastr:
#print patt, str
print search(patt,str).group()
else:
print ' data not exist and now create data'
createdata(datapath)
生成的 數據樣式:
運行的結果是通過找出源數據裏面的郵箱: