爬蟲-我愛我家

import requests
from lxml import etree
import pymysql

class MysqlHelper(object):
    # 初始化的構造函數
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='py101',charset='utf8')
        self.cursor = self.db.cursor()
    # 執行修改操作
    def mysql_do(self,sql):
        self.cursor.execute(sql)
        self.db.commit()

    # 結束函數
    def __del__(self):
        self.cursor.close()
        self.db.close()

# mysql_ = MysqlHelper()
for i in range(1,4):

    mysql_ = MysqlHelper()
    url = 'https://bj.5i5j.com/zufang/huilongguan/n{}/'.format(i)
    headers = {
        'Cookie': 'PHPSESSID=o6recl7t6tqc8csn07dlpkr80r; domain=bj; _ga=GA1.2.1791669657.1534587616; _gid=GA1.2.1815856453.1534587616; yfx_c_g_u_id_10000001=_ck18081818201615933781534712752; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534587619; _Jo0OQK=5F9F79779A59C4F535C10E5888515E6397E0B2430EE3960D60FE4F370CE9BB0C963B04F74F8C29212A4AF076C55792D714DAEE8FCDAD06A33FD1E2ACA96F3391223C57212F12283777C840763663251ADEB840763663251ADEBE0E04E2B3769ED3416B75375CFA1B530GJ1Z1QQ==; yfx_f_l_v_t_10000001=f_t_1534587616590__r_t_1534587616590__v_t_1534594544623__r_c_0; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534594898',
        'Host': 'bj.5i5j.com',
        'Referer': 'https://bj.5i5j.com/zufang/huilongguan/',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',

    }
    response = requests.get(url,headers=headers)
    html_ele = etree.HTML(response.text)
    #
    # with open('html.html','wb') as f :
    #     f.write(response.content)

    li_list = html_ele.xpath('//ul[@class="pList"]/li')
    # print(li_list)
    for li_ele in li_list:
        mysql_ = MysqlHelper()

        title = li_ele.xpath('./div[2]/h3/a')[0].text
        # print(title)
        particulars = li_ele.xpath('.//div[@class="listX"]/p[1]/text()')[0]
        # print(particulars)
        site_1 = li_ele.xpath('.//div[@class="listX"]/p[2]/a/text()')
        # print(site_1)
        distance = li_ele.xpath('.//div[@class="listX"]/p[2]/text()')
        if len(distance)>0:
            site = site_1+distance
            site = site[0]+site[1]
        else:
            site = site_1[0]
        # print(site)
        Rent_way = li_ele.xpath('.//div[@class="jia"]/p[2]/text()')[0]
        # print(Rent_way)
        money = li_ele.xpath('.//div[@class="jia"]/p[1]/strong/text()')[0]
        # print(money)
        print('*'*50)

        sql = 'insert into p111(title,particulars,site,Rent_way,money)values({},{},{},{},{})'.format(repr(title), repr(particulars), repr(site),repr(Rent_way),repr(money))
        print(sql)
        mysql_.mysql_do(sql)












發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章