python爬取圖片(妹子圖,漏腿的那種)附源碼

用python爬取妹子圖,各種類型的上萬張,老司機趕緊上車!

爲了保證列車能夠順利到達終點站,請準備以下環境:

python 3,安裝requests、bs4等模塊

簡單的小爬蟲,供新手學習參考,只實現了圖片的爬取,關於超時、異常處理等都沒有,根據具體需求可自己更改。轉載請標明地址,謝謝。

沒上車的趕緊,車要開了。。。。。

網站地址:https://www.keke234.com

 

源碼:


# 導入requests庫
import requests
# 導入文件操作庫
import os
import bs4
from bs4 import BeautifulSoup
import sys
import importlib
importlib.reload(sys)


# 給請求指定一個請求頭來模擬chrome瀏覽器
global headers
# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3352.181 Safari/537.36'
           # ,'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
           # ,'accept-encoding': 'gzip, deflate, br','accept-language': 'zh-CN,zh;q=0.9'
           # ,'cache-control': 'max-age=0','upgrade-insecure-requests': '1'
           }
# 爬圖地址
globalUrl = 'https://www.keke234.com/'
# 定義存儲位置
global save_path

path = 'D:\pyPictures'


# 創建文件夾
def createFile(file_path):
    if os.path.exists(file_path) is False:
        os.makedirs(file_path)
    else:
        print("文件已存在")
    # 切換路徑至上面創建的文件夾
    os.chdir(file_path)

def get(url):
    res = requests.get(url, headers=headers)
    res.encoding = 'GBK';
    soup = BeautifulSoup(res.text, 'html.parser')
    return soup;

# 主方法
def main():
    createFile(path)
    soup=get(globalUrl);
    #獲取所有類別
    allCategory=soup.find('ul', id='menu').find_all('a');
    for i in range (1,len(allCategory)-1):
        filePath=path+"/"+allCategory[i].attrs['title'];
        createFile(filePath)
        #根據類型打開鏈接
        soup1=get(allCategory[i].attrs['href']);
        #獲取頁碼
        pars1=soup1.find('span',class_='pageinfo').find('strong').text
        #每個類別所有頁
        allPage=int(pars1);
        #抓取所有頁連接
        for j in range(allPage):
            url2='';
            if 'gaoqing/cn' in allCategory[i].attrs['href']:
                url2 = allCategory[i].attrs['href'].replace("index.html", "list_1_"+str(j+1)+".html");
            if 'gaoqing/rihan' in allCategory[i].attrs['href']:
                url2 = allCategory[i].attrs['href'].replace("index.html", "list_2_" + str(j + 1) + ".html");
            if 'gaoqing/oumei' in allCategory[i].attrs['href']:
                url2 = allCategory[i].attrs['href'].replace("index.html", "list_3_" + str(j + 1) + ".html");
            soup2 = get(url2)
            pars2 = soup2.find_all(class_='heart nologin')
           #詳情連接
            for k1 in range(1,len(pars2)):
                   url3 = pars2[k1].attrs['href'];
                   path1=url3.split('/')[-1][0:-5];
                   createFile(filePath+'/'+path1)
                   soup3 =get(url3)
                   if soup3.find('div', class_='page').text!='':
                       pars3 = soup3.find('div', class_='page').find('a').text
                       #打開連接詳情,循環所有頁
                       for t in range(1,int(str(pars3)[1:2])):
                           if t==1:
                             url4 = url3;
                           else:
                             url4 = url3[0:-5]+"_"+str(t)+".html";
                            #按照頁開始爬取
                           soup4 = get(url4)
                           pars4 =soup4.find('div', class_='content').find_all('img')
                           for t1 in range(len(pars4)):
                               imgUrl=pars4[t1].attrs['src'];
                               print(imgUrl.split('/')[-1])
                               file_name=imgUrl.split('/')[-1]
                               if os.path.exists(file_name):
                                   pass;
                               else:
                                   img = requests.get(imgUrl, headers=headers)
                                   f = open(file_name, 'ab')
                                   f.write(img.content)
                                   f.close()


if __name__ == '__main__':
    main()

爬取結果:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章