import requests
from lxml import etree
import pymysql
class MysqlHelper(object):
# 初始化的構造函數
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='py101',charset='utf8')
self.cursor = self.db.cursor()
# 執行修改操作
def mysql_do(self,sql):
self.cursor.execute(sql)
self.db.commit()
# 結束函數
def __del__(self):
self.cursor.close()
self.db.close()
# mysql_ = MysqlHelper()
for i in range(1,4):
mysql_ = MysqlHelper()
url = 'https://bj.5i5j.com/zufang/huilongguan/n{}/'.format(i)
headers = {
'Cookie': 'PHPSESSID=o6recl7t6tqc8csn07dlpkr80r; domain=bj; _ga=GA1.2.1791669657.1534587616; _gid=GA1.2.1815856453.1534587616; yfx_c_g_u_id_10000001=_ck18081818201615933781534712752; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534587619; _Jo0OQK=5F9F79779A59C4F535C10E5888515E6397E0B2430EE3960D60FE4F370CE9BB0C963B04F74F8C29212A4AF076C55792D714DAEE8FCDAD06A33FD1E2ACA96F3391223C57212F12283777C840763663251ADEB840763663251ADEBE0E04E2B3769ED3416B75375CFA1B530GJ1Z1QQ==; yfx_f_l_v_t_10000001=f_t_1534587616590__r_t_1534587616590__v_t_1534594544623__r_c_0; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534594898',
'Host': 'bj.5i5j.com',
'Referer': 'https://bj.5i5j.com/zufang/huilongguan/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
response = requests.get(url,headers=headers)
html_ele = etree.HTML(response.text)
#
# with open('html.html','wb') as f :
# f.write(response.content)
li_list = html_ele.xpath('//ul[@class="pList"]/li')
# print(li_list)
for li_ele in li_list:
mysql_ = MysqlHelper()
title = li_ele.xpath('./div[2]/h3/a')[0].text
# print(title)
particulars = li_ele.xpath('.//div[@class="listX"]/p[1]/text()')[0]
# print(particulars)
site_1 = li_ele.xpath('.//div[@class="listX"]/p[2]/a/text()')
# print(site_1)
distance = li_ele.xpath('.//div[@class="listX"]/p[2]/text()')
if len(distance)>0:
site = site_1+distance
site = site[0]+site[1]
else:
site = site_1[0]
# print(site)
Rent_way = li_ele.xpath('.//div[@class="jia"]/p[2]/text()')[0]
# print(Rent_way)
money = li_ele.xpath('.//div[@class="jia"]/p[1]/strong/text()')[0]
# print(money)
print('*'*50)
sql = 'insert into p111(title,particulars,site,Rent_way,money)values({},{},{},{},{})'.format(repr(title), repr(particulars), repr(site),repr(Rent_way),repr(money))
print(sql)
mysql_.mysql_do(sql)
爬蟲-我愛我家
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.