騰訊招聘爬蟲

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import urllib
import urllib.request

from bs4 import BeautifulSoup

url = 'https://hr.tencent.com/position.php?&start=%d'


def parse(html,fp):
    soup = BeautifulSoup(html,'lxml')
    jobs = soup.select('tr[class="odd"],tr[class="even"]')

    '''<tr class="even">
		    		<td class="l square"><a target="_blank" href="position_detail.php?id=42027&amp;keywords=&amp;tid=0&amp;lid=0">WXG01-321 終端業務拓展經理(北京)</a></td>
					<td>市場類</td>
					<td>1</td>
					<td>北京</td>
					<td>2018-07-02</td>
		    	</tr>'''

    # print(jobs)
    # print(len(jobs))
    for job in jobs:
        tds = job.select('td')
        work = tds[0].find('a').get_text()
        type = tds[1].string
        num = tds[2].string
        location = tds[3].string
        time = tds[4].string
#         保存數據
        w = '%s,%s,%s,%s,%s\n'%(work,type,num,location,time)

        fp.write(w)

if __name__ == '__main__':
    page = int(input('請輸入查詢多少頁碼:'))

    fp = open('./騰訊.csv',mode='a',encoding='utf-8')
    for p in range(page):

        url_job = url%(p*10)

        response  = urllib.request.urlopen(url=url_job)

        html = response.read().decode('utf-8')

        jobs = parse(html,fp)

    fp.close()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章