python獲取csdn課程基本信息

目標數據:

代碼:

import requests
from lxml import etree

# 爬取地址:https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page=1
# 爬取數據:課程標題、課程價格、課程講師


url = ["https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page={}".format(
    str(j)) for j in range(1, 4)]

print(len(url))
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}

return_Date = []
for urls in url:
    web_Source_Code = requests.get(urls, headers=headers)

    print(web_Source_Code.status_code)

    html = etree.HTML(web_Source_Code.text)

    block_1 = html.xpath('//div[@class="course_item "]')

    print('找到目標元素:', len(block_1))

    for block_2 in block_1:
        course_Name = block_2.xpath('div[@class="course_title"]/a/text()')
        price_Of_Course = block_2.xpath(
            'div[@class="course_lecturer"]/span/text()')
        the_Course_Instructor = block_2.xpath(
            'div[@class="course_lecturer"]/a/span/text()')
        return_Date.append({
            "course_Name": course_Name,
            "price_Of_Course": price_Of_Course,
            "the_Course_Instructor": the_Course_Instructor
        })


for date_s in return_Date:
    print(date_s)

 

輸出截圖:

發佈了13 篇原創文章 · 獲贊 3 · 訪問量 2424
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章