首先找到爬取網站
https://beijing.qfang.com/newhouse/list/n1
箭頭點擊,XPath下來你的要爬取的信息
把你要爬取的信息記錄下來
寫入代碼
from lxml import etree
import requests
import csv
import time
#寫一個函數
# def writecsv(item):
# with open('qfang.csv','a',encoding= 'utf-8') as f:
# write=csv.writer(f)
# #防止出錯
# try:
# write.writerow(item)
# except:
# print('write error!')
if __name__ =='__main__':
headers ={'user-Agent':'Mozilla/5.0'}
start_url="https://beijing.qfang.com/newhouse/list/n"
for x in range(1,9):
url =start_url+str(x)
#獲取網址
html =requests.get(url,headers=headers)
#不能頻繁獲取請求
time.sleep(1)
#構造一個選擇器,把文本源代碼傳給它
selector= etree.HTML(html.text)
xiaoqulist =selector.xpath('/html/body/div[4]/div/div[1]/div[4]/ul/li')
#循環迭代
for xiaoqu in xiaoqulist:
try:
mingcheng =xiaoqu.xpath('div[2]/div[1]/a/em/text()')[0]
layout=xiaoqu.xpath('div[2]/div[2]/p[3]/a/text()')[0]
area=xiaoqu.xpath('div[2]/div[3]/p[1]/text()')[0]
place=xiaoqu.xpath('div[2]/div[3]/p[3]/text()')[0]
money=xiaoqu.xpath('div[3]/p[2]/text()')[0]
except IndexError as a:
print(" ")
# #構造一個list
#item =[mingcheng,layout,area,place,money]
# #寫一個函數
#writecsv(item)
#保存就print
print('正在抓取:',mingcheng,layout,area,place,money)
你要爬取的信息