Python爬蟲:Q房網房源信息

#爬蟲項目採取xpath解析
#爬取Q房源網的詳情信息並保存爲csv文件
#爬取具體內容有:"小區名稱", "戶型", "面積", "裝修", "樓層", "朝向",
#  "售價", "總價/萬", "詳情"

 1、導入模塊

import requests
import time
from lxml import etree
import csv

2、#定義spider_page()函數爬取並返回頁面信息

def spider_page(url):
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
                    AppleWebKit/537.36 (KHTML, like Gecko) \
                    Chrome/70.0.3538.110 Safari/537.36',
               'upgrade-insecure-requests': '1',
               'cookie':'acw_tc=df6fef1a15477176336286817eeb02a7224b5ac26463f80afbe8cf7952; qchatid=d59ef744-850a-427b-9340-264de69f268b; WINDOW_DEVICE_PIXEL_RATIO=1; _ga=GA1.3.1616010054.1547717677; sid=373c20fd-15cf-452e-aaab-d574fa5756c0; _jzqckmp=1; _gid=GA1.3.1123773018.1550143142; cookieId=5068cade-858f-47cd-935a-cb3a511995ac; CITY_NAME=SHENZHEN; sec_tc=AQAAAA9z2QkSZAAAt9QQ9By0mazxhvEk; acw_sc__v2=5c65796b40cb4b7fcb1e52469c34bf5ad61e042a; JSESSIONID=aaaWmDq43YXm_d82KySJw; _qzja=1.1620652135.1547717679057.1550143142187.1550154096058.1550143487410.1550154096058.0.0.0.8.3; _qzjc=1; _qzjto=5.2.0; Hm_lvt_de678bd934b065f76f05705d4e7b662c=1547717676,1550143142,1550154096; Hm_lpvt_de678bd934b065f76f05705d4e7b662c=1550154096; _dc_gtm_UA-47416713-1=1; _jzqa=1.827135515274435000.1547717679.1550143142.1550154097.3; _jzqc=1; _jzqx=1.1547717679.1550154097.3.jzqsr=shenzhen%2Eqfang%2Ecom|jzqct=/sale/f2.jzqsr=shenzhen%2Eqfang%2Ecom|jzqct=/sale; _jzqb=1.1.10.1550154097.1; _qzjb=1.1550154096058.1.0.0.0'}

    response = requests.get(url, headers=headers)
    time.sleep(2)#延遲兩秒時間
    return response.text

3、#創建csv保存函數

def csv_data(item):
    with open('fangwo_info.csv', 'a+', encoding='utf-8', newline='')as csvfile:#newline設置爲''可以去點換行
        writer = csv.writer(csvfile)
        writer.writerow(item)

4、# 解析頁面所需內容

def paser_info(url):
    # 解析頁面
    html = spider_page(url)
    selector = etree.HTML(html)#以構造器的形式返回
    house_infos = selector.xpath('//*[@id="cycleListings"]/ul/li')
    for house_info in house_infos:
        name = house_info.xpath('./div[1]/p[1]/a/text()')[0].split(' ', 1)[0]
        xiangq = house_info.xpath('./div[1]/p[1]/a/text()')[0].split(' ', 1)[1]
        style = house_info.xpath('./div[1]/p[2]/span[2]/text()')[0]
        area = house_info.xpath('./div[1]/p[2]/span[4]/text()')[0]
        decotored = house_info.xpath('./div[1]/p[2]/span[6]/text()')[0]
        louceng = house_info.xpath('./div[1]/p[2]/span[8]/text()')[0].strip()
        chaoxiang = house_info.xpath('./div[1]/p[2]/span[10]/text()')[0]
        total = house_info.xpath('./div[2]/span[1]/text()')[0]
        price = house_info.xpath('./div[2]/p/text()')[0]
        info = [name, style, area, decotored, louceng, chaoxiang, price, total, xiangq]
        csv_data(info)
        print("正在爬取", name)#編輯器裏打開顯示爬取

 5、#創建主函數

def main():
    # 添加csv標題頭
    info_title = ["名稱", "戶型", "面積", "裝修", "樓層", "朝向", "售價", "總價/萬", "詳情"]
    csv_data(info_title)
    urls = ['https://shenzhen.qfang.com/sale/f%s' % x for x in range(1, 10)]
    for url in urls:
        paser_info(url)

6、# 調用函數運行

if __name__ == '__main__':
    main()

最後爬取結果如下:

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章