from urllib.request import urlopen
from urllib.error import HTTPError
from urllib.error import URLError
from bs4 import BeautifulSoup
def getHTML(url):
try:
html = urlopen(url)
except HTTPError as e:
print(e)
return None
except URLError as e:
print(e)
return None
try:
bsObj = BeautifulSoup(html.read())
print("Html Information Get!")
return bsObj
except AttributeError as e:
print(e)
return None
try有點像中斷 只要有錯 立馬調到except裏面
獲取一個網頁有兩種錯誤:
1,網址出錯 也就是URLError 解析出錯
2,沒有這個網址 404