Python爬蟲:爬取我愛我家網二手房源信息

# xpath爬取
# 爬取小區名稱、戶型、地區、售價、總價

1、導入模塊

import requests
import csv
from lxml import etree

2、創建類

# 創建我愛我家類
class Woaiwojia:

3、類函數定義編寫

# 創建頁面獲取函數
    def get_page(self, url):
        self.url = url
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/'
            '537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36',
            'Cookie': 'yfx_c_g_u_id_10000001=_ck19022116084813839206365574151; _ga=GA1.2.172982220.1550736528; ershoufang_BROWSES=41857749%2C42331571; _gid=GA1.2.1753629442.1551407389; _Jo0OQK=3C360A430707C39DC66841396A856BB9F1CDAFCCCBE5DD3EF55A648ADA5CBA77AEE43F896CA59E44D089FA0454846BD97D221FB8F73A12B808A197E69B45975E9E5C57212F12283777C840763663251ADEB840763663251ADEB8B9BB377FBE15866A593CD374DB85252GJ1Z1dg==; PHPSESSID=plv3sri11n4ivdfekjgjrl0qme; domain=bj; yfx_f_l_v_t_10000001=f_t_1550736528365__r_t_1551407385571__v_t_1551423063129__r_c_2; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1550824470,1551407393,1551407583,1551423064; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1551423064'
        }
        response = requests.get(self.url, headers=headers)
        return response.text
# 創建解析函數
    def parse_page(self, url):
        self.url = url
        selector = etree.HTML(self.get_page(self.url))
        items = selector.xpath('/html/body/div[4]/div[1]/div[2]/ul/li')
        for item in items:
            name = item.xpath('./div[2]/h3/a/text()')[0]
            style = item.xpath('./div[2]/div[1]/p[1]/text()')[0]
            place = item.xpath('./div[2]/div[1]/p[2]/a/text()')[0]
            price = item.xpath('./div[2]/div[1]/div/p[2]/text()')[0]
            total_price = item.xpath('./div[2]/div[1]/div/p[1]/strong/text()')[0]
            info = [name, style, place, price, total_price]
            self.csv_info(info)
# 創建保存函數
    def csv_info(self, content):
        with open('info.csv', 'a', encoding='utf-8', newline='')as file:
            write = csv.writer(file)
            write.writerow(content)
# 調用運行
if __name__ == '__main__':
    k = Woaiwojia()
    title = ['名稱', '戶型', '地區', '售價', '總價/萬']
    k.csv_info(title)
    for x in range(1, 3):
        url = 'https://bj.5i5j.com/ershoufang/n%s/' % x
        k.parse_page(url)

最後運行結果如下:

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章