爬蟲--騰訊招聘

import requests
from lxml import etree
import pymysql

class MysqlHelper(object):
    # 初始化的構造函數
    def __init__(self):
        self.db = pymysql.connect(host='127.0.0.1',user='root',password='123456',port=3306,database='py101',charset='utf8')
        self.cursor = self.db.cursor()
    # 執行修改操作
    def mysql_do(self,sql):
        self.cursor.execute(sql)
        self.db.commit()

    # 結束函數
    def __del__(self):
        self.cursor.close()
        self.db.close()
#1 .在列表頁獲取詳情頁的url
def a ():
    for i in range(0,4):
        mysql_ = MysqlHelper()
        url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start={}'.format(i*10)
        headers = {
        'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
        'Host': 'hr.tencent.com',
        'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=10',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',

        }
        response = requests.get(url,headers=headers)
        # print(response)
        html_ele = etree.HTML(response.text)
        li_list = html_ele.xpath('//table[@class="tablelist"]/tr')
        # print(li_list)
        li_list = li_list[1:]
        # print(li_list)
        li_list = li_list[:-1]
        #遍歷
        for li_ele in li_list:
            name = li_ele.xpath('./td[1]/a/@href')[0]
            print(name)
            particulars_url = 'https://hr.tencent.com/'+name# 分頁url
            print(particulars_url)
            b(particulars_url,mysql_)

        # break
def b(particulars_url,mysql):
    # print(particulars_url)
    headers ={
    'Cookie': 'PHPSESSID=13k2chbffttgbajagbgoivu5v1; pgv_pvi=8645490688; pgv_si=s654212096',
    'Host': 'hr.tencent.com',
    'Referer': 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
    }
    response = requests.get(particulars_url,headers=headers)
    print(response)
    html_ele = etree.HTML(response.text)
    # print(html_ele)
    li_list = html_ele.xpath('//table[@class="tablelist textl"]/tr')
    # print(li_list)
    position =html_ele.xpath('//table[@class="tablelist textl"]/tr[1]/td/text()')[0]
    print(position)
    site = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[1]/text()')[0]
    print(site)
    category = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[2]/text()')[0]
    print(category)
    people = html_ele.xpath('//table[@class="tablelist textl"]/tr[2]/td[3]/text()')[0]
    print(people)
    x_list = html_ele.xpath('//table[@class="tablelist textl"]/tr[3]/td/ul/li/text()')

    # print(x_list)
    for list_ in x_list:


        duty = list_
        print(duty)
    sql = 'insert into p222(position,site,category,people,duty)values({},{},{},{},{})'.format(repr(position),repr(site),repr(category),repr(people),repr(duty))
    print(sql)
    mysql.mysql_do(sql)
if __name__ == '__main__':
    a()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章