圖片處理 - 使用beautifulSoup模塊
標籤(空格分隔): python
使用beautifulSoup模塊
使用BeautifulSoup對HTML內容解析之後,HTML內容就變成了結構化數據,可以輕易對其中的DOM元素進行操作,比如獲取數據,修改,刪除等。
from bs4 import BeautifulSoup
import urllib,urllib2
def getContext(url):
'''
獲取html
'''
html = urllib2.urlopen(url).read()
return html
def getImages(html):
soup = BeautifulSoup(html)
allPic = soup.findAll('img',class_="BDE_Image")
for imgUrl in allPic:
print imgUrl['src']
# return a list
return [imgUrl['src'] for imgUrl in allPic]
def downLoadPicture(urlList):
i = 1
for item in urlList:
urllib.urlretrieve(item, '%s.jpg' % i)
i +=1
url = 'http://tieba.baidu.com/p/3932177087'
if __name__=='__main__':
html = getContext(url)
urls = getImages(html)
downLoadPicture(urls)