爬取圖片
- 基於
requests
模塊的get
請求
- 爬取指定
url
的圖片
import requests
if __name__ == '__main__':
url="https://pic.qiushibaike.com/system/pictures/12296/122960119/medium/8L45TQR77BQYY1C6.jpg"
response = requests.get(url)
img_data = response.content
with open('./a.jpg','wb') as fp:
fp.write(img_data)
print('爬取數據結束!')
糗事百科
- 爬取 糗事百科 指定頁面的 糗圖
- 爬取鏈接:
https://www.qiushibaike.com/imgrank/
import requests
import os
import re
if __name__ == '__main__':
if not os.path.exists('./qiutu'):
os.mkdir('./qiutu')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/78.0.3904.108 Safari/537.36 '
}
url = "https://www.qiushibaike.com/imgrank/page/%d/"
for pageNum in range(1, 3):
new_url = format(url % pageNum)
response = requests.get(new_url, headers=headers)
page_text = response.text
ex = '<div class="thumb">.*?<img src="(.*?)" alt.*?</div>'
img_src_list = re.findall(ex, page_text, re.S)
for src in img_src_list:
src = 'https:' + src
img_data = requests.get(url=src, headers=headers).content
img_name = src.split('/')[-1]
imgPath = './qiutu/' + img_name
with open(imgPath, 'wb') as fp:
fp.write(img_data)
print(img_name + '下載成功!')
print('爬取數據結束!')
來源:爬蟲開發入門丨老男孩IT教育