import requests
import urllib.request
from bs4 import BeautifulSoup
import os
import time
# 獲得每種品牌的汽車
def get_car_brand_url(base_url):
car_brand = 'https://car.autohome.com.cn'
car_brand_list = []
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
url_all = soup.find_all('a')
for item in url_all:
car_brand_list.append(car_brand + item.get('href'))
return car_brand_list
# 獲取每個品牌的所以不同類的汽車
def get_car_brand_class_url(car_url_list):
car_class_base = 'https://car.autohome.com.cn'
car_class_list = []
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_url_list:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
url_list = soup.find('div', {'class': 'uibox-con carpic-list02'}).find_all('a')
for a in url_list:
car_class_list.append(car_class_base + a.get('href'))
return car_class_list
def get_brand_class_image_url(car_class_list):
car_image_url = []
car_base = 'https://car.autohome.com.cn'
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_class_list:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
# car_image_url.append(car_base + soup.find('div', {'class': 'uibox-con carpic-list02'}).find('a').get('href'))
car_image_url.append(car_base + soup.find('ul', {'class': 'search-pic-sortul'}).find('a').get('href'))
return car_image_url
def download_image(car_image_url, folder_path):
if not os.path.exists(folder_path): # 判斷文件夾是否已經存在
os.makedirs(folder_path) # 創建文件夾
car_base = 'https://car.autohome.com.cn'
headers = {'User-Agent': 'Mozilla/5.0'}
for item in car_image_url:
response = requests.get(item, headers=headers)
soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
class_all_image = soup.find('div', {'class': 'uibox-con carpic-list03 border-b-solid'}).find_all('img')
index = 0
for src in class_all_image:
image_url = 'http:' + src.get('src')
img_name = os.path.join(folder_path, '{}.jpg'.format(index))
# img_name = folder_path + str(index) + '.jpg'
image = requests.get(image_url)
with open(img_name, 'wb') as file: # 以byte形式將圖片數據寫入
file.write(image.content)
file.flush()
# file.close() # 關閉文件
print('第%d張圖片下載完成' % index)
index += 1
base_url = 'http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=2%20&brandId=0%20&fctId=0%20&seriesId=0'
image_url_list = get_car_brand_url(base_url)
car_class_list = get_car_brand_class_url(image_url_list)
# car_class_list = get_car_brand_class_url(['https://car.autohome.com.cn/pic/series/4482.html#pvareaid=2042214'])
car_image_url = get_brand_class_image_url(car_class_list)
folder_path = r'./car_images'
download_image(car_image_url, folder_path)
python爬取照片
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.