BeautifulSoup解析H5——python爬蟲

import requests from bs4 import BeautifulSoup """ description:爬取東方語言學網 word:待查字 zu:閩語|吳語|平話|客家|贛語|官話 """ def crawl_main(word, zu): url = "http://eastling.org/fangyan_word_go.php" payload = {'word': word, 'zu': zu, 'mode': 'word', 'map': '查 詢'} headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36", } ret = requests.request("POST", url, headers=headers, data=payload) ret.encoding = ret.apparent_encoding # 指定編碼等於原始頁面編碼 # print(ret.text) soup = BeautifulSoup(ret.text, 'html.parser') tables = soup.findAll('table')[1] # 指定採集第二個table的信息 trs = soup.findAll('table')[1].findAll('tr') heads = [] # 表頭 for tr in trs: ths = tr.findAll('th') for th in ths: heads.append(th.get_text()) break cols = [] for tr in trs: col = [] ths = tr.findAll('td') for th in ths: col.append(th.get_text()) cols.append(col) res = {'heads': heads, 'cols': cols[1:]} return res if __name__ == '__main__': print(crawl_main("好", "粵語"))
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章