urllib使用及代理池

 

import urllib.request
import re
import time
import os

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    'Referer': 'http://www.mm131.com/xiaohua/'
}
for page in range(1, 7):
    print('這是第%s頁' % page)
    if page == 1:
        url = 'http://www.mm131.com/xiaohua/'
    elif page >= 2:
        url = 'http://www.mm131.com/xiaohua/list_2_{}.html'.format(page)
    # print(url)
    # 構建一個request請求,其中包含請求頭與url
    request = urllib.request.Request(url=url, headers=headers)
    # 發送請求得到響應
    response = urllib.request.urlopen(request)
    # print(response.read().decode('gbk'))
    # 拿到響應內容
    content = response.read().decode('gbk')
    # print(content)
    # # 正則匹配
    # '''<a target="_blank" href="http://www.mm131.com/xiaohua/634.html"><img src="http://img1.mm131.me/pic/634/m634.jpg
    # " alt="性感校花路子瀅 爆乳沐浴私房寫真" width="120" height="160">性感校花路子瀅 爆乳沐</a>'''
    ret = re.compile(r'<a target="_blank" href=".*?"><img src="(.*?)" alt="(.*?)" width=".*?" height=".*?">.*?</a>',
                     re.S)
    result = ret.findall(content)
    # print(result)
    dirname = '校花圖'
    if not os.path.exists(dirname):
        os.mkdir(dirname)
    for img in result:
        # 圖片src
        image = img[0]

        print(image)
        # 圖片的名字
        filename = img[1] + '.' + image.split('.')[-1]
        # 保存圖片的路徑,拼接
        # print(filename)
        filepath = os.path.join(dirname, filename)
        # print(filepath)
        result1 = urllib.request.Request(url=image, headers=headers)
        response1 = urllib.request.urlopen(result1)
        with open(filepath, 'wb') as fp:
            fp.write(response1.read())

        # print('正在下載...%s' % filename)
        # 下載圖片並保存相應路徑
        # urllib.request.urlretrieve(image, filepath)
        time.sleep(2)
        # print('結束下載')
    time.sleep(2)

代理池案例:

import urllib.request
import os

for i in range(4200, 4461):
    os.mkdir('tupian/' + str(i))
    for j in range(60):
        try:
            url = 'http://img1.mm131.me/pic/' + str(i) + '/' + str(j) + '.jpg'
            print(url)
            # urllib.request.urlretrieve(url, 'lala.jpg')
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
                'Referer': 'https://www.sogou.com/link?url=DSOYnZeCC_o7btUgpK402wmc9YOcsOr4cOOT57O29F8'
            }
            request = urllib.request.Request(url=url, headers=headers)
            response = urllib.request.urlopen(request)
            with open('tupian/' + str(i) + '/' + str(j) + '.jpg', 'wb') as fp:
                fp.write(response.read())
        except Exception as e:
            print('下載失敗,下載下一條')
            break

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章