爬取去哪兒數據
import pymysql
from lxml import etree
#!/usr/bin/env python
# encoding: utf-8
"""
@author: owen.cai
@contact: [email protected]
@file: qunarspider.py
@time: 2019/9/30 15:01
"""
import pymysql
from lxml import etree
class qunaer(object):
def __init__(self):
mysql_info={
'host':'localhost',
'port':3306,
'user':'root',
'password':'123456',
'db':'test',
'charset':'utf8',
'createdbsql':'''create table if not exists test.qunar(time varchar (50),title varchar (50))'''
}
print(mysql_info['host'])
# url = 'http://travel.qunar.com/travelbook/list.htm?page={0}&order=hot_heat'
url='http://travel.qunar.com/travelbook/list.htm'
# response=requests.get(url)
def mysql_(self,sql):
# 打開數據庫連接(具體配置信息請自行替換)
db = pymysql.Connect(
host=self.mysql_info['host'],
port=self.mysql_info['port'],
user=self.mysql_info['user'],
password=self.mysql_info['password'],
db=self.mysql_info['db'],
charset=self.mysql_info['charset'])
# 創建一個遊標對象
cursor = db.cursor()
# print('數據庫連接成功')
# 執行 SQL 建表語句
cursor.execute(sql)
db.commit()
# print('數據庫執行成功')
def parse(self,url):
response = etree.parse(url, etree.HTMLParser())
# print(response)
# aa=response.xpath("/html/body/div[2]/div/div[2]/ul/li[1]/p[1]/span[1]/span[3]/text()")
times=response.xpath("//span[@class='days']/text()")
titles = response.xpath("//h2/a/text()")
# title = response.xpath("//aa[@target='_blank']/text()")
for time,title in zip(times ,titles):
# print(time,title)
print('''insert into test.qunar values("{time}","{title}")'''.format(time=time,title=str(title).encode('utf-8')))
if title in ("@王鋆鋆[OCT主題樂園3日遊]It's Show Time五彩繽紛週末樂悠遊","拾童心去珠海長隆海洋王國-邂逅一場神奇的海洋奇緣VS看一場馬戲新巨創《龍秀?》","俯天津之眼?,童年動物園?,民國特色館?遊海洋公園?天津親子3日遊?"):
continue
self.mysql_('''insert into test.qunar values("{time}","{title}")'''.format(time=time,title=str(title).encode('utf-8')))
# print(aa)
# print(title)
if __name__=="__main__":
qunaer=qunaer()
for i in range(1,201):
print("第{i}頁開始".format(i=i))
qunaer.parse(qunaer.url.format(i))
# try:
# mysql_('select * from mtime limit 10')
# mysql_(mysql_info['createdbsql'])
# except Exception as except_:
# print(except_)
#多線程編程 下一步計劃,多線程編程
爬取的數據存入mysql