requests應用step1

爬取說明

爬取的是小黃鴨的圖片並保存到本地

使用模塊主要作用說明

import requests
from urllib.request import urlretrieve
import re
import os

urlretrieve:保存下載的圖片
os:判斷文件目錄是否存在和文件目錄的創建
re:正則模塊,查找需要的內容

代碼解釋

設置了請求頭:

url="http://www.ivsky.com/tupian/xiaohuangren_t21343/"
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Referer': url,
        'Connection': 'Keep-alive'
    }

提交請求:

s=requests.get(url,headers=headers)
# print(s.url)
s=s.text
# print(s)

使用re匹配需要的數據:

pattern = r'<div class="il_img".*?<img src="(.*?.jpg)" width'
pa=re.compile(pattern)
uls=re.findall(pattern=pa,string=s)

使用urlretrieve保存圖片:

for item in uls:
    # print(item)
    #http://img.ivsky.com/img/tupian/t/201411/01/xiaohuangren-004.jpg
    path = re.split("\/[0-9]{2}(\/.*?\.jpg)",item,2)[1]
    path = '/root/python/python/taobao%s'%path
    # print(os.path.exists(os.path.split(path)[0]))
    if not (os.path.exists(os.path.split(path)[0])):
        os.mkdir(os.path.split(path)[0])

    print(path)
    urlretrieve(item,path)

使用文件流保存圖片:

for item in uls:
    path = re.split("\/[0-9]{2}(\/.*?\.jpg)", item, 2)[1]
    path = '/root/python/python/taobao%s' % path
    imgedata=requests.get(item).content
    print(path)
    with open(path,"wb") as f:
        f.write(imgedata)

總結:兩種保存方式,文件流比urlretrieve快

完整代碼

#coding:utf-8
import requests
from urllib.request import urlretrieve
import re
import os


url="http://www.ivsky.com/tupian/xiaohuangren_t21343/"
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Referer': url,
        'Connection': 'Keep-alive'
    }

s=requests.get(url,headers=headers)
# print(s.url)
s=s.text
# print(s)
pattern = r'<div class="il_img".*?<img src="(.*?.jpg)" width'
pa=re.compile(pattern)
uls=re.findall(pattern=pa,string=s)

'''urlretrieve
for item in uls:
    # print(item)
    #http://img.ivsky.com/img/tupian/t/201411/01/xiaohuangren-004.jpg
    path = re.split("\/[0-9]{2}(\/.*?\.jpg)",item,2)[1]
    path = '/root/python/python/taobao%s'%path
    # print(os.path.exists(os.path.split(path)[0]))
    if not (os.path.exists(os.path.split(path)[0])):
        os.mkdir(os.path.split(path)[0])

    print(path)
    urlretrieve(item,path)

# print(len(uls))'''

for item in uls:
    path = re.split("\/[0-9]{2}(\/.*?\.jpg)", item, 2)[1]
    path = '/root/python/python/taobao%s' % path
    imgedata=requests.get(item).content
    print(path)
    with open(path,"wb") as f:
        f.write(imgedata)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章