python爬取照片

原創

2020-06-21 09:07

import requests
import urllib.request
from bs4 import BeautifulSoup
import os
import time


# 獲得每種品牌的汽車
def get_car_brand_url(base_url):
    car_brand = 'https://car.autohome.com.cn'
    car_brand_list = []
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
    url_all = soup.find_all('a')
    for item in url_all:
        car_brand_list.append(car_brand + item.get('href'))
    return car_brand_list


# 獲取每個品牌的所以不同類的汽車
def get_car_brand_class_url(car_url_list):
    car_class_base = 'https://car.autohome.com.cn'
    car_class_list = []
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_url_list:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        url_list = soup.find('div', {'class': 'uibox-con carpic-list02'}).find_all('a')
        for a in url_list:
            car_class_list.append(car_class_base + a.get('href'))
    return car_class_list


def get_brand_class_image_url(car_class_list):
    car_image_url = []
    car_base = 'https://car.autohome.com.cn'
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_class_list:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        # car_image_url.append(car_base + soup.find('div', {'class': 'uibox-con carpic-list02'}).find('a').get('href'))
        car_image_url.append(car_base + soup.find('ul', {'class': 'search-pic-sortul'}).find('a').get('href'))
    return car_image_url


def download_image(car_image_url, folder_path):
    if not os.path.exists(folder_path):  # 判斷文件夾是否已經存在
        os.makedirs(folder_path)  # 創建文件夾
    car_base = 'https://car.autohome.com.cn'
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_image_url:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        class_all_image = soup.find('div', {'class': 'uibox-con carpic-list03 border-b-solid'}).find_all('img')
        index = 0
        for src in class_all_image:
            image_url = 'http:' + src.get('src')
            img_name = os.path.join(folder_path, '{}.jpg'.format(index))
            # img_name = folder_path + str(index) + '.jpg'
            image = requests.get(image_url)
            with open(img_name, 'wb') as file:  # 以byte形式將圖片數據寫入
                file.write(image.content)
                file.flush()
            # file.close()  # 關閉文件
            print('第%d張圖片下載完成' % index)
            index += 1


base_url = 'http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=2%20&brandId=0%20&fctId=0%20&seriesId=0'

image_url_list = get_car_brand_url(base_url)
car_class_list = get_car_brand_class_url(image_url_list)
# car_class_list = get_car_brand_class_url(['https://car.autohome.com.cn/pic/series/4482.html#pvareaid=2042214'])
car_image_url = get_brand_class_image_url(car_class_list)
folder_path = r'./car_images'
download_image(car_image_url, folder_path)

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

python爬取照片

Spring Cloud 部署時如何使用 Kubernetes 作爲註冊中心和配置中心

KubeKey 部署 K8s v1.28.8 實戰

cuda做卷積和均值池化

數據庫中的EXISTS語句

matplotlib顯示照片

tensorflow持久化數據格式（2）

今天正式學習計算機視覺

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結