Python爬虫:爬取app短视频

"""

夜神模拟器+fiddler+Python

爬取app小视频

下载fiddler 配置 网络与端口

下载夜神 配置网络 下载认证

下载app 最后抓包

 

"""

class DouYinApp():

    #页面请求
    def get_page(self,url):
        response = requests.get(url)
        response = response.json()
        return response

    #页面解析重要参数skID
    def paser_id(self,url):
        id_keys = []
        items = self.get_page(url)
        for id in items['tab_list']:
            skID = id['skipID']
            id_keys.append(skID)
        return id_keys
    

    #构造详情页url
    def details_url(self,url):
        details_url = []
        for video_url in self.paser_id(url):
            mp4_url = 'https://c.m.163.com/nc/video/detail/{}.html'.format(video_url)
            details_url.append(mp4_url)
        return details_url
    
        

    #提取视频链接并下载
    def paser_mp4(self,url):
        links = []
        for mp4 in self.details_url(url):
            resp = self.get_page(mp4)
            mp4_urls = resp['mp4Hd_url']
            title = resp['title']
            #print(mp4_urls,title)
            info = {'title':title,'mp4_url':mp4_urls}
            links.append(info)
            print(title)
        return links


    #短视频下载
    def save(self,name,contentt):
        with open(str(name)+'.mp4','wb')as f:
            f.write(contentt) 



    #下载视频并保存
    def load_app(self,url):
        for results in self.paser_mp4(url):
            title = results['title'][1:8]
            video = results['mp4_url']
            response = requests.get(video)
            response = response.content
            self.save(title,response)



    #构造翻页
    def pool_url(self):
        urls = []
        for i in range(5):
            url = 'https://c.m.163.com/nc/subscribe/list/T1528640720843/all/{}-20.html' .format(i*20)
            urls.append(url)
            #print(url)
        return urls

if __name__ == "__main__":
    douyin = DouYinApp() 
    print('开始爬取抖音视频。。。。')
    #多线程
    pool = Pool()
    results = pool.map(douyin.load_app,douyin.pool_url()) 
    pool.close()
    pool.join()

    print('开始爬取抖音视频结束喽。。。。')
    
 

    
    

爬取结果:

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章