import urllib.request
from bs4 import BeautifulSoup
# 爬取直播吧新聞
url = "http://news.zhibo8.cc/zuqiu/"
def __getPage(url):
print("now crawler coming")
page = urllib.request.urlopen(url)
if page.getcode()!=200:
return None
data = page.read()
return data
def __writeToFile(data):
f = open("G://page1.txt","w")
f.write(data)
f.flush()
f.close()
def __htmlParser(data):
soup = BeautifulSoup(data,"html.parser")
title = soup.find("div",attrs={'class':'topleftbox'}).find_all("a")
return title
def __showHtml(title):
#print(title)
if title is None:
return 'Error'
html = open("G://dongqiudi.html","w")
html.write("<html>")
html.write("<body>")
html.write("<div>")
for data in title:
html.write("</br>%s"%data)
html.write("</div>")
html.write("</body>")
html.write("</html>")
html.close()
if __name__ == '__main__':
pageHtml = __getPage(url)
title = __htmlParser(pageHtml)
__showHtml(title)
最後的輸出頁面: