知乎簡單抓取


from datetime import datetime
import time
from crawler_project.common_bases import CommonBases
from urllib.request import urlopen
from urllib import request
import requests


class CrawlerZhiHuInfo(CommonBases):

    def requests_data(self, url):
        headers = self.get_headers()
        response = requests.get(url=url, headers=headers)
        json_data = response.json()
        return json_data

    def treatment_data(self, data):

        pass
        return

    def page_info(self):
        """
        翻頁處理
        :return:
        """
        pag_num = 1
        num = 0
        i = 0
        data_lists = []
        for i in range(9999):
            try:
                url = 'https://www.zhihu.com/api/v4/search_v3?t=general&q={search_key}&correction=1&offset={offset}&limit=20&lc_idx=62&show_all_topics=0'.format(search_key=‘不方便', offset=num)
                num += 20
                json_data = self.requests_data(url)
                data_lists.append(json_data)
                print(f'知乎程序,正在獲取第{pag_num}頁數據')
                time.sleep(2)
                i += 1
                pag_num += 1
                print(json_data)
                if json_data['paging']['is_end']:
                    break
            except ValueError:
                print(f'知乎程序,正在獲取失敗是第{pag_num}頁數據')
        print(data_lists)
    def main(self):
        """
        主程序
        :return:
        """

        start_time = datetime.now()
        self.page_info()
        end_time = datetime.now()
        print('程序執行時間', end_time - start_time)


crawler_zhihu_info = CrawlerZhiHuInfo()

if __name__ == '__main__':
    crawler_zhihu_info.main()






發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章