目標數據:
代碼:
import requests
from lxml import etree
# 爬取地址:https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page=1
# 爬取數據:課程標題、課程價格、課程講師
url = ["https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page={}".format(
str(j)) for j in range(1, 4)]
print(len(url))
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
return_Date = []
for urls in url:
web_Source_Code = requests.get(urls, headers=headers)
print(web_Source_Code.status_code)
html = etree.HTML(web_Source_Code.text)
block_1 = html.xpath('//div[@class="course_item "]')
print('找到目標元素:', len(block_1))
for block_2 in block_1:
course_Name = block_2.xpath('div[@class="course_title"]/a/text()')
price_Of_Course = block_2.xpath(
'div[@class="course_lecturer"]/span/text()')
the_Course_Instructor = block_2.xpath(
'div[@class="course_lecturer"]/a/span/text()')
return_Date.append({
"course_Name": course_Name,
"price_Of_Course": price_Of_Course,
"the_Course_Instructor": the_Course_Instructor
})
for date_s in return_Date:
print(date_s)
輸出截圖: