from urllib.request import urlopen
from urllib.error import HTTPError
from urllib.error import URLError
from bs4 import BeautifulSoup
def getHTML(url):
try:
html = urlopen(url)
except HTTPError as e:
print(e)
return None
except URLError as e:
print(e)
return None
try:
bsObj = BeautifulSoup(html.read())
print("Html Information Get!")
return bsObj
except AttributeError as e:
print(e)
return None
try有点像中断 只要有错 立马调到except里面
获取一个网页有两种错误:
1,网址出错 也就是URLError 解析出错
2,没有这个网址 404