05_運行百度爬取圖片

原創

kinbridge

2020-06-19 22:45

測試代碼：

import requests #首先導入庫

import re

#設置默認配置

MaxSearchPage = 20 # 收索頁數

CurrentPage = 0 # 當前正在搜索的頁數

DefaultPath = "pictures" # 默認儲存位置

NeedSave = 0 # 是否需要儲存

#圖片鏈接正則和下一頁的鏈接正則

def imageFiler(content): # 通過正則獲取當前頁面的圖片地址數組

return re.findall('"objURL":"(.*?)"',content,re.S)

def nextSource(content): # 通過正則獲取下一頁的網址

next = re.findall('<div id="page">.*<a href="(.*?)" class="n">',content,re.S)[0]

print("---------" + "http://image.baidu.com" + next)

return next

#爬蟲主體

def spidler(source):

content = requests.get(source).text # 通過鏈接獲取內容

imageArr = imageFiler(content) # 獲取圖片數組

global CurrentPage

print("Current page:" + str(CurrentPage) + "**********************************")

for imageUrl in imageArr:

print(imageUrl)

global NeedSave

if NeedSave: # 如果需要保存圖片則下載圖片，否則不下載圖片

global DefaultPath

try:

# 下載圖片並設置超時時間,如果圖片地址錯誤就不繼續等待了

picture = requests.get(imageUrl,timeout=10)

except:

print("Download image error! errorUrl:" + imageUrl)

continue

# 創建圖片保存的路徑

imageUrl = imageUrl.replace('/','').replace(':','').replace('?','')

pictureSavePath = DefaultPath + imageUrl

fp = open(pictureSavePath,'wb') # 以寫入二進制的方式打開文件

fp.write(picture.content)

fp.close()

global MaxSearchPage

if CurrentPage <= MaxSearchPage: #繼續下一頁爬取

if nextSource(content):

CurrentPage += 1

# 爬取完畢後通過下一頁地址繼續爬取

spidler("http://image.baidu.com" + nextSource(content))

#爬蟲的開啓方法

def beginSearch(page=1,save=0,savePath=" D:/pictures/"):

# (page:爬取頁數,save:是否儲存,savePath:默認儲存路徑)

global MaxSearchPage,NeedSave,DefaultPath

MaxSearchPage = page

NeedSave = save #是否保存，值0不保存，1保存

DefaultPath = savePath #圖片保存的位置

key = input("Please input you want search：")

StartSource = "http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=" + str(key) + "&ct=201326592&v=flip" # 分析鏈接可以得到,替換其`word`值後面的數據來搜索關鍵詞

spidler(StartSource)

#調用開啓的方法就可以通過關鍵詞搜索圖片了

beginSearch(page=5,save=1) # page=5是下載前5頁，save=1保存圖片

運行報錯

解決辦法鏈接：

https://mp.csdn.net/postedit/100067179

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

05_運行百度爬取圖片

sm4加密工具類

05_運行百度爬取圖片

06_Python遊戲編程(Pygame)

04_判斷循環語句

自定義代碼生成工具

java正則表達式詳解與Spring.split()使用

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結