一、request庫的簡介
request庫是一個優雅簡潔的Python HTTP庫,專爲人類而構建。
request的使用方式非常的簡單,直接,人性化,讓程序員的精力從庫的使用中解放出來。
request庫的作者是Kenneth Reitz
特性:就是其風格簡單直接優雅。無論是請求方式就,還是響應結果的處理,還是Cookies,url參數,post提交數據,都體現出這種風格。
例如:
>>>import requests
>>>res = requests.get('https://www.baidu.com')
>>>resp.status_code
200
>>>res.headers['content-type']
'application/json;charset=utf-8'
>>>resp.encoding
'utf-8'
resp.text
u'{"type : "User"...}'
二、request總結
# request使用方法的代碼
# 獲取url
url = "http://httpbin.org/get"
# form表單數據 post提交
data = {"Lu":"子寧", "python":"study"}
# 請求參數
params = {"name":"jay zhou"}
# 請求頭
headers = {"sex":"boy"}
#Cookie
cookies = {"sessionid":"jayzhou","sex":"boy"}
# json串
json = "{'copy':'cut'}"
# 代理
proxy1 = {"http":"http://172.16.210.240:3838"}
# GET 請求
res = request.get(url,params=params,headers=headers,cookies=cookies,timeout=2,proxies=proxy1)
# POST
url = "http://httpbin.org/post"
res = request.post(url,data=data,json=json)
# allow_redirects允許重定向
res = request.get("http://github.com/",allow_redirects=False)
# 忽略SSL驗證
resquest.packages.urllibb3.disable_warnings()
res = request.get("http://image.baidu.com/search/index?tn=baiduimage",verify=False)
三、實例,爬取baidu圖片妹子圖3頁
1.尋找翻頁的規律
import requests
import re
import os
# 1.訪問 含有url的頁面資源
first_url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1584591556060_R&pv=&ic=0&nc=1&z=&hd=&latest=©right=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&sid=&word=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90'
res = requests.get(first_url)
data = res.text
# 2.匹配圖片的URL
img_urls = re.findall(r'thumbURL":"(.*?)"', data)
print(img_urls)
# 3.逐個訪問圖片的URL,獲取圖片資源
# 3.1優化儲存位置
if not os.path.exists("faceshow"):
os.mkdir("faceshow")
for index, img_url in enumerate(img_urls):
res = requests.get(img_url)
img_data = res.content
imgname = "faceshow/img"+str(index)+".jpg"
with open(imgname, "wb") as f:
f.write(img_data)
img_url2 = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn={}&rn=30&gsm=3c&1584625416427="
# img_url3 = "Request URL: https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=60&rn=30&gsm=3c&1584933095233="
# img_url4 = "Request URL: https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E9%AB%98%E6%B8%85%E6%B8%85%E7%BA%AF%E5%A6%B9%E5%AD%90&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=90&rn=30&gsm=5a&1584933097579="
# 此處發現對比發現url的fn處的數字30,60,90,決定頁數
# 利用for循環循環上面發現的規律
for num in range(1,4):
pn = num*30
first_url = img_url2.format(pn) # 相當於我們的first_url,裏面有30個圖片
res = requests.get(first_url)
data = res.text
# 2.匹配圖片的URL
img_urls = re.findall(r'thumbURL":"(.*?)"', data)
print(img_urls)
# 3.逐個訪問圖片的URL,獲取圖片資源
# 3.1優化儲存位置,給文件起名字
facename = "妹子"+str(num)
if not os.path.exists(facename):
os.mkdir(facename)
# 利用index,使文件有順序的保存
for index, img_url in enumerate(img_urls):
res = requests.get(img_url)
img_data = res.content
imgname = facename+"/img" + str(index) + ".jpg"
with open(imgname, "wb") as f:
f.write(img_data)
四、結果展示