python etree爬取去哪兒數據

爬取去哪兒數據

import pymysql
from lxml import etree

#!/usr/bin/env python
# encoding: utf-8
"""
@author: owen.cai
@contact: [email protected]
@file: qunarspider.py
@time: 2019/9/30 15:01
"""
import pymysql
from lxml import etree
class qunaer(object):
    def __init__(self):
       mysql_info={
            'host':'localhost',
            'port':3306,
            'user':'root',
            'password':'123456',
            'db':'test',
            'charset':'utf8',
            'createdbsql':'''create table if not exists  test.qunar(time varchar (50),title varchar (50))'''
        }
       print(mysql_info['host'])
       # url = 'http://travel.qunar.com/travelbook/list.htm?page={0}&order=hot_heat'

    url='http://travel.qunar.com/travelbook/list.htm'

    # response=requests.get(url)
    def mysql_(self,sql):
        # 打開數據庫連接(具體配置信息請自行替換)
        db = pymysql.Connect(
            host=self.mysql_info['host'],
            port=self.mysql_info['port'],
            user=self.mysql_info['user'],
            password=self.mysql_info['password'],
            db=self.mysql_info['db'],
            charset=self.mysql_info['charset'])
        # 創建一個遊標對象
        cursor = db.cursor()
        # print('數據庫連接成功')
        # 執行 SQL 建表語句
        cursor.execute(sql)
        db.commit()
        # print('數據庫執行成功')

    def parse(self,url):
        response = etree.parse(url, etree.HTMLParser())
        # print(response)
        # aa=response.xpath("/html/body/div[2]/div/div[2]/ul/li[1]/p[1]/span[1]/span[3]/text()")
        times=response.xpath("//span[@class='days']/text()")
        titles = response.xpath("//h2/a/text()")
        # title = response.xpath("//aa[@target='_blank']/text()")

        for time,title in zip(times ,titles):
            # print(time,title)
            print('''insert into test.qunar values("{time}","{title}")'''.format(time=time,title=str(title).encode('utf-8')))
            if title in ("@王鋆鋆[OCT主題樂園3日遊]It's Show Time五彩繽紛週末樂悠遊","拾童心去珠海長隆海洋王國-邂逅一場神奇的海洋奇緣VS看一場馬戲新巨創《龍秀?》","俯天津之眼?,童年動物園?,民國特色館?遊海洋公園?天津親子3日遊?"):
                continue
                self.mysql_('''insert into test.qunar values("{time}","{title}")'''.format(time=time,title=str(title).encode('utf-8')))
        # print(aa)
        # print(title)
if __name__=="__main__":
    qunaer=qunaer()
    for i in range(1,201):
        print("第{i}頁開始".format(i=i))
        qunaer.parse(qunaer.url.format(i))

    # try:
    #     mysql_('select * from  mtime limit 10')
    #     mysql_(mysql_info['createdbsql'])
    # except Exception as except_:
    #     print(except_)


#多線程編程 下一步計劃,多線程編程

爬取的數據存入mysql 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章