#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib
import urllib.request
from bs4 import BeautifulSoup
url = 'https://hr.tencent.com/position.php?&start=%d'
def parse(html,fp):
soup = BeautifulSoup(html,'lxml')
jobs = soup.select('tr[class="odd"],tr[class="even"]')
'''<tr class="even">
<td class="l square"><a target="_blank" href="position_detail.php?id=42027&keywords=&tid=0&lid=0">WXG01-321 終端業務拓展經理(北京)</a></td>
<td>市場類</td>
<td>1</td>
<td>北京</td>
<td>2018-07-02</td>
</tr>'''
# print(jobs)
# print(len(jobs))
for job in jobs:
tds = job.select('td')
work = tds[0].find('a').get_text()
type = tds[1].string
num = tds[2].string
location = tds[3].string
time = tds[4].string
# 保存數據
w = '%s,%s,%s,%s,%s\n'%(work,type,num,location,time)
fp.write(w)
if __name__ == '__main__':
page = int(input('請輸入查詢多少頁碼:'))
fp = open('./騰訊.csv',mode='a',encoding='utf-8')
for p in range(page):
url_job = url%(p*10)
response = urllib.request.urlopen(url=url_job)
html = response.read().decode('utf-8')
jobs = parse(html,fp)
fp.close()