# #_*_encoding:utf-8_*_
import json
import ssl
from urllib.request import Request
import urllib
import sys
import time
import hashlib
import os
from threading import Thread
import logging
from queue import Queue
ssl._create_default_https_context = ssl._create_unverified_context
#啓動命令: mitmdump -p 8081 -s douyin1.py
keyword = 'shshs'
BASE_PATH = 'E:\douyin'
STORE_PATH = '{}\\{}'.format(BASE_PATH,keyword)#視頻保存路徑
if not os.path.exists(STORE_PATH):
os.mkdir(STORE_PATH)
TXT_FILENAME = STORE_PATH + '.txt'
header = {
"User-Agent": "com.ss.android.ugc.aweme/251 (Linux; U; Android 4.4.2; zh_CN; MI 6 ; Build/NMF26X; Cronet/58.0.2991.0)",
}
logging.basicConfig(filename='{}\douyin.log'.format(BASE_PATH),level=logging.INFO,format="%(asctime)s-%(name)s-%(levelno)s-%(lineno)d-%(message)s")
logger = logging.getLogger(__name__)
q = Queue(maxsize=2000)
def response(flow):
# 如果經過中間人的請求中有以url或者url1開頭請求,我就解析它的響應
url = 'https://aweme.snssdk.com/aweme/v1/search/'
url1 = 'https://api.amemv.com/aweme/v1/search/'
url2=" https://aweme.snssdk.com/aweme/v1/general/search/"
url3 = 'https://aweme-hl.snssdk.com/aweme/v1/search/'
if flow.request.url.startswith(url) or flow.request.url.startswith(url1) or flow.request.url.startswith(url3) or flow.request.url.startswith(url2):
logger.info(flow.request.url)
resp = flow.response.text
data = json.loads(resp)
# 解析url地址和視頻名稱
url_list = data['aweme_list']
for url in url_list:
video_url = url['video']['play_addr']['url_list'][0]
real_url = video_url.replace("play", "playwm")#視頻鏈接
video_name = url['desc']#視頻名稱
name = hashlib.md5(video_name.encode()).hexdigest()#將視頻名hash,作爲去重依據
q.put(real_url + "+" + name)
print("The len of queue is **********************************************:",q.qsize())
def download(q):
while True:
try:
a =q.get()
with open(TXT_FILENAME, 'a') as f:#將抓取的視頻鏈接寫入文件
f.write(a)
f.write('\n')
url = a.split("+")[0]
video_name = a.split("+")[1]
except:
continue
print("---------------queue has %s------------"%q.qsize())
try:
filename = '{}\\{}.mp4'.format(STORE_PATH, hashlib.md5(video_name.encode()).hexdigest())
if not os.path.exists(filename):
r = Request(url, headers=header)
data = urllib.request.urlopen(r)
with open(filename, 'ab') as f:
f.write(data.read())
data.close()
print("download ok:", video_name)
q.task_done()
time.sleep(0.1)
except Exception as e:
q.put(a)
print("Error:", e)
time.sleep(0.2)
for i in range(15):
t = Thread(target=download, args=(q,))
print("threading start")
t.start()
q.join()
sys.exit(1)
抖音小視頻爬取(配合mitmproxy和夜神模擬器)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.