# 爬取百度10頁的標題 import urllib.request import re import urllib.parse from bs4 import BeautifulSoup def build_onepage_crawl_function(keyword, number_of_page): #構建請求頭 user_agent_header = ("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0)") opener_build = urllib.request.build_opener() opener_build.addheaders = [user_agent_header] #對搜索關鍵詞進行編碼 encoded_keyword = urllib.parse.quote(keyword) # 通過for循環爬去每一頁 for i in range(int(number_of_page)): url = "https://www.baidu.com" number_of_page = i * 10 # 構造 &pn= print("百度搜索頁面鏈接:",number_of_page) url = url + "/s?wd=" + encoded_keyword + "&pn=" + str(number_of_page) # 完整的url包括關鍵詞說頁數,這裏keyword本身就是str print(url) #構造get請求中的url original_html = opener_build.open(url, timeout = 2).read().decode("utf-8","ignore") soup = BeautifulSoup(original_html ,'html.parser') for title in soup.select('.tts-title a'): print(title.text) #輸入關鍵詞以及需要爬取的頁數 keyword_input = input("請輸入關鍵詞:") number_of_page_input = input("請輸入頁數:") build_onepage_crawl_function(keyword_input, number_of_page_input)