python簡單爬蟲,Beautifulsoup4解析,爬取直播吧部分熱門新聞及鏈接

import urllib.request
from bs4 import BeautifulSoup

# 爬取直播吧新聞
url = "http://news.zhibo8.cc/zuqiu/"

def __getPage(url):
	print("now crawler coming")
	page = urllib.request.urlopen(url)
	if page.getcode()!=200:
		return None
	data = page.read()
	return data

def __writeToFile(data):
	f = open("G://page1.txt","w")
	f.write(data)
	f.flush()
	f.close()

def __htmlParser(data):
	soup = BeautifulSoup(data,"html.parser")
	title = soup.find("div",attrs={'class':'topleftbox'}).find_all("a")
	return title
def __showHtml(title):
	#print(title)
	if title is None:
		return 'Error'
	html = open("G://dongqiudi.html","w")
	html.write("<html>")
	html.write("<body>")
	html.write("<div>")
	for data in title:
		html.write("</br>%s"%data)
	html.write("</div>")
	html.write("</body>")
	html.write("</html>")
	html.close()

if __name__ == '__main__':
	pageHtml = __getPage(url)
	title = __htmlParser(pageHtml)
	__showHtml(title)




最後的輸出頁面:


發佈了67 篇原創文章 · 獲贊 63 · 訪問量 34萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章