python項目之 爬蟲爬取煎蛋jandan的妹子圖-下

python項目之 爬蟲爬取煎蛋jandan的妹子圖-下

函數如下

  1. 讀取全部單個txt組合成一個TXT文件,並把網址保存在all_imag_urls中
    read_write_txt_to_main()
  2. 讀取單個TXT件的網址
    get_url()
  3. 每一個圖片保存在本地
    get_imags(all_imag_urls)

最終結果如下

效果圖

源碼如下

# coding:utf-8
####################################################
# coding by 劉雲飛
####################################################

import requests
import os
import time
import random
from bs4 import BeautifulSoup
import threading

ips = []
all_imag_urls = []

with open('ip2.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        ip_one = "http://" + line.strip()
        ips.append(ip_one)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/42.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, sdch',
    'Accept-Language': 'zh-CN,zh;q=0.8',
    'Referer': 'http://jandan.net/ooxx/',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
}


def read_write_txt_to_main():
    for i in range(1520, 1881):
        filename = str(i) + ".txt"
        if os.path.exists(filename):
            print(filename + "OK")
            with open(filename, 'r') as f:
                urls = f.readlines()
                for url in urls:
                    all_imag_urls.append(url)
    with open("all_imgs.txt", 'w+') as fw:
        for url in all_imag_urls:
            fw.write(url + "")
    print("write file ok!!!!!")


def get_url():
    with open("all_imgs.txt", 'r') as fw:
        urls = fw.readlines()
        for url in urls:
            all_imag_urls.append(url.strip("\n"))


def get_imags(urls):
    for url in urls:
        url_a = url.strip("\n")
        filename = url_a[28:]
        if os.path.exists(filename):
            pass
        else:
            host = url_a[7:21]
            headers['Host'] = host
            single_ip_addr = random.choice(ips)
            proxies = {'http': single_ip_addr}
            try:
                res = requests.get(url, headers=headers, proxies=proxies, stream=True)
                print(res.status_code)
                if res.status_code == 200:
                    text = res.content
                    with open(filename, 'wb') as jpg:
                        jpg.write(text)
                    print(filename + "  OK")
                else:
                    print(filename + "  not ok")
            except:
                print(filename + "  not ok")


''' 讀取全部單個txt組合成一個TXT文件,並把網址保存在all_imag_urls中。 '''
# read_write_txt_to_main()

''' 讀取單個TXT件的網址 '''
get_url()

''' 每一個圖片保存在本地 '''
get_imags(all_imag_urls)

print("所有的妹子圖保存完畢,請盡情享受!!")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章