爬取的是國服的官網,所以圖片並沒有所謂的4K之類的, 尺寸可選擇的也不多,所以我就下載了個人比較喜歡的兩種尺寸。
import os
from urllib import request
import requests
import bs4
un_download_url = []
# 收集下載失敗的信息
def un_download(file_name,url):
fail_info = {}
fail_info['name'] = file_name
fail_info['url'] = url
return fail_info
# 下載圖片
def download_image(url_info,dirpath,file_name):
for x in range(1,3):
try:
request.urlretrieve(url_info[x],os.path.join(dirpath, "%d.jpg" %x))
print("%s下載完成!" % (file_name + url_info[x]))
except:
fail_info = un_download(file_name,url_info[x])
un_download_url.append(fail_info)
# 獲取圖片的url
def git_image_url(datas):
image_urls = []
for data in datas:
# print(data)
image_info = []
name = data['name']
mainImg = data['mainImg']
loadingImg = data['loadingImg']
image_info.append(name)
image_info.append(mainImg)
image_info.append(loadingImg)
image_urls.append(image_info)
return image_urls
# def repalce_symbol(file_name):
# specific_symbols = ['\\','*','"','?','<','>','/','|']
# for specific_symbol in specific_symbols:
# print(specific_symbol)
# file_name = file_name.replace(':','-')
# print(type(file_name))
# file_name = file_name.repalce('d','')
# return file_name
# 獲取英雄列表的id+
def get_hero_id():
id_url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
header1 = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'referer': 'https://lol.qq.com/data/info-heros.shtml'
}
resp = requests.get(id_url, headers=header1)
result = resp.json()
result = result['hero']
heroIds = []
for x in result:
heroId = x['heroId']
heroIds.append(heroId)
return heroIds
def main():
heroIds = get_hero_id()
for heroId in heroIds:
base_url = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'.format(heroId)
header2 = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36',
'referer': 'referer: https://lol.qq.com/data/info-defail.shtml?id={}'.format(heroId)
}
resp = requests.get(base_url,headers = header2)
# print(resp.text)
result = resp.json()
datas = result['skins']
image_urls = git_image_url(datas)
# print(image_urls)
for url_info in image_urls:
file_name = url_info[0].replace(':','-').replace('/','').replace('"','')
dirpath = os.path.join('lol',file_name)
if not os.path.exists(dirpath):
os.mkdir(dirpath)
download_image(url_info,dirpath,file_name)
if __name__ == '__main__':
main()