python3.0 糗事百科

import urllib.request
import re

def getSentence(data):
	partern = r'<span>(.*?)</span>'
	strlist = re.findall(partern,data)
	return strlist

url = "http://www.qiushibaike.com/"
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'

headers = { 'User-Agent' : user_agent,
			'If-None-Match' : "6e0f605c36f86beeee986d350bf78be89d606ee5"}
try:
	req = urllib.request.Request(url,headers = headers)
	result = urllib.request.urlopen(req).read()	
	strlist = getSentence(result.decode() )
	foo = open("qsbk.txt","w")
	for i in strlist:
		if(i.startswith('<img') == False):
			foo.write(i)
			foo.write("\n\n")
	
except urllib.request.URLError as e:
	if hasattr(e,"code"):
		print(e.code)
	if hasattr(e,"reason"):
		print(e.reason)


	
參照希望姐的筆記寫的,自己改動了一下
http://cuiqingcai.com/990.html


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章