selenium+BeautifulSoup 爬蟲

爬取dy平臺主播名字,在線人數

from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time

driver = webdriver.PhantomJS()
driver.get("https://www.douyu.com/directory/all")
#while True:
i = 1
while True:
    #html源碼
    html = driver.page_source
    #    創建解析對象
    soup = bs(html,"lxml")
#    直接調用方法查找元素
#    存放所有主播的元素對象
    names = soup.find_all("span",{"class":"dy-name ellipsis fl"})
#    存放人氣的元素對象
    nums = soup.find_all("span",{"class":"dy-num fr"})

#    name和number 是一個對象,get_text()
    for name,number in zip(names,nums):
        print("\t觀衆人數:",number.get_text().strip(),end="")
        print("\t主播名字:",name.get_text().strip())         
    page = driver.find_element_by_class_name("jumptxt").send_keys(i)

    if driver.page_source.find("shark-pager-disable-next") == -1:   
        driver.find_element_by_class_name("shark-pager-submit").click()
        time.sleep(5)
        i+=1
#        driver.find_element_by_class_name("shark-pager-next").click()
    else:
        break
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章