【Python】從堆糖爬取圖片Demo

利用Python堆糖爬取圖片的Demo,從圖片詳細查看頁進行操作

圖片詳細查看頁:
Alt

完整code:

import requests
import os
import traceback
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

def download(url, filename):
    if os.path.exists(filename):
        print('file exists!')
        return
    try:
        r = requests.get(url, stream=True, timeout=60)
        r.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:  # filter out keep-alive new chunks
                    f.write(chunk)
                    f.flush()
        return filename
    except KeyboardInterrupt:
        if os.path.exists(filename):
            os.remove(filename)
        raise KeyboardInterrupt
    except Exception:
        traceback.print_exc()
        if os.path.exists(filename):
            os.remove(filename)

# 創建保存目錄
if os.path.exists('yugui') is False:
    os.makedirs('yugui')

# 打開瀏覽器    
browser = webdriver.Chrome()
#進入圖片詳細查看頁
url = 'https://www.duitang.com/blog/?id=1005406113'
browser.get(url)

# 設置下載數量
start = 1
end = 133
for i in range(start,end + 1):
#	定位圖片
    img = browser.find_elements_by_xpath("//img[@id='mbpho-img']")
    for ele in img:
        target_url = ele.get_attribute("src")
        print(target_url)
        img_name = target_url.split('/')[-1]
        filename = os.path.join('yugui', img_name[-25:])
        download(target_url, filename)
#	顯示進度
    print('%d / %d' % (i, end))    
#   下一頁
    if i - end == 0 :
        break
    next_page = browser.find_element_by_class_name("shownext").click()
    time.sleep(3)
    
# 關閉瀏覽器    
browser.quit()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章