import urllib.request
import re
import time
import os
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
'Referer': 'http://www.mm131.com/xiaohua/'
}
for page in range(1, 7):
print('這是第%s頁' % page)
if page == 1:
url = 'http://www.mm131.com/xiaohua/'
elif page >= 2:
url = 'http://www.mm131.com/xiaohua/list_2_{}.html'.format(page)
# print(url)
# 構建一個request請求,其中包含請求頭與url
request = urllib.request.Request(url=url, headers=headers)
# 發送請求得到響應
response = urllib.request.urlopen(request)
# print(response.read().decode('gbk'))
# 拿到響應內容
content = response.read().decode('gbk')
# print(content)
# # 正則匹配
# '''<a target="_blank" href="http://www.mm131.com/xiaohua/634.html"><img src="http://img1.mm131.me/pic/634/m634.jpg
# " alt="性感校花路子瀅 爆乳沐浴私房寫真" width="120" height="160">性感校花路子瀅 爆乳沐</a>'''
ret = re.compile(r'<a target="_blank" href=".*?"><img src="(.*?)" alt="(.*?)" width=".*?" height=".*?">.*?</a>',
re.S)
result = ret.findall(content)
# print(result)
dirname = '校花圖'
if not os.path.exists(dirname):
os.mkdir(dirname)
for img in result:
# 圖片src
image = img[0]
print(image)
# 圖片的名字
filename = img[1] + '.' + image.split('.')[-1]
# 保存圖片的路徑,拼接
# print(filename)
filepath = os.path.join(dirname, filename)
# print(filepath)
result1 = urllib.request.Request(url=image, headers=headers)
response1 = urllib.request.urlopen(result1)
with open(filepath, 'wb') as fp:
fp.write(response1.read())
# print('正在下載...%s' % filename)
# 下載圖片並保存相應路徑
# urllib.request.urlretrieve(image, filepath)
time.sleep(2)
# print('結束下載')
time.sleep(2)
代理池案例:
import urllib.request
import os
for i in range(4200, 4461):
os.mkdir('tupian/' + str(i))
for j in range(60):
try:
url = 'http://img1.mm131.me/pic/' + str(i) + '/' + str(j) + '.jpg'
print(url)
# urllib.request.urlretrieve(url, 'lala.jpg')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36',
'Referer': 'https://www.sogou.com/link?url=DSOYnZeCC_o7btUgpK402wmc9YOcsOr4cOOT57O29F8'
}
request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
with open('tupian/' + str(i) + '/' + str(j) + '.jpg', 'wb') as fp:
fp.write(response.read())
except Exception as e:
print('下載失敗,下載下一條')
break