獲取糗事百科文字欄目所有用戶ID

import requests
from lxml import etree
import time

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
          'Cookie': 'gr_user_id = c6f58a39 - ea25 - 4f58 - b448 - 545070192c4e;59a81cc7d8c04307ba183d331c373ef6_gr_session_id = e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26;59a81cc7d8c04307ba183d331c373ef6_gr_last_sent_sid_with_cs1 = e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26;59a81cc7d8c04307ba183d331c373ef6_gr_last_sent_cs1 = N % 2FA;59a81cc7d8c04307ba183d331c373ef6_gr_session_id_e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26 = true;grwng_uid = 9ec14ad9 - 5ac0 - 4bb1 - 81c1 - bc60d2685710;abtest_ABTest4SearchDate = b;xzuuid = 79426b52;_uab_collina = 154660443606130958890473;TY_SESSION_ID = 907f32df - c060 - 49ca - b945 - 98215cc03475;rule_math = pvzq3r06hi'}

def get_name(url):
    res = requests.get(url,headers = headers)
    html = etree.HTML(res.text)
    result = etree.tostring(html)
    infos = html.xpath('//*[@class="article block untagged mb15 typs_hot"]')
    try:
        for info in infos:
            name = info.xpath('div[1]/a[2]/h2/text()')
            if len(name) != 0:
                print(name[0])
    except Exception as e:
        print(e)

if __name__ == '__main__':
    urls = ['https://www.qiushibaike.com/text/page/{}/'.format(number) for number in range(1, 14)]
    for url in urls:
        get_name(url)
        print("------------------這是一頁的分割線----------------------------")
        time.sleep(1)``

獲取糗事百科網“文字”欄目所有的用戶ID。重點關注xpath語法;

 html.xpath('//*[@class="article block untagged mb15 typs_hot"]')獲取所有段子DIV。返回一個對象存儲在列表中。

 name = info.xpath('div[1]/a[2]/h2/text()') 再從每一個對象中獲取ID。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章