import requests
import xlwt
rowNum=0
####爬取大街網keyword爲python或Java等招聘信息,並寫入到xls表格中
#請求大街網的招聘信息
def main(url):
firstUrl = 'https://so.dajie.com/job/search'
session = requests.session()
session.get(firstUrl)
session.headers['referer'] = firstUrl
response = session.get(url)
return response.json()["data"]
#將json中的職位、工資、公司和地點寫入xls中
# writeXls(sht1, dataT["list"], dataT["total"], rowNum)
def writeXls(sht1,data,total,rowNum):
for i in data:
# print(len(data),len(i),i)
if(rowNum < int(total)):
rowNum = rowNum + 1
print(rowNum,i['jobName'],i['salary'],i['compName'],i['pubCity'])
sht1.write(rowNum, 0, rowNum)
sht1.write(rowNum, 1, i['jobName'])
sht1.write(rowNum, 2, i['salary'])
sht1.write(rowNum, 3, i['compName'])
sht1.write(rowNum, 4, i['pubCity'])
return rowNum
#創建xls
def createXls():
xls = xlwt.Workbook()
return xls
#創建xls中的sheet並命名
def createSheet(xls,keyTxt):
sht1 = xls.add_sheet(keyTxt)
sht1.write(0,0,'序號')
sht1.write(0,1,'職位')
sht1.write(0,2,'工資')
sht1.write(0,3,'公司')
sht1.write(0,4,'地點')
return sht1
#保存xls
def saveXls(xls):
xls.save('/Users/User/Downloads/mydata.xls')
if __name__ == '__main__':
keyTxt = 'Java'
url1 = 'https://so.dajie.com/job/ajax/search/' \
'filter?keyword=' \
+keyTxt+ \
'&order=0&city=&recruitType=&salary=&experience=&page=1&positionFunction=&' \
'_CSRFToken=&ajax=1'
data1 = main(url1)
totalPage=data1["totalPage"] #獲取keyTxt有多好頁,便於後續爬後面頁面的內容
xls = createXls()
sheet = createSheet(xls,keyTxt)
rowNum = writeXls(sheet, data1["list"], data1["total"], rowNum)
print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX:' , rowNum)
for i in range(totalPage-1):
page=str(i+2)
urlNext = 'https://so.dajie.com/job/ajax/search/' \
'filter?keyword=' \
+keyTxt+ \
'&order=0&city=&recruitType=&salary=&experience=&page=' \
+ page +\
'&positionFunction=&' \
'_CSRFToken=&ajax=1'
print(urlNext)
data=main(urlNext) #爬蟲獲取內容
rowNum = writeXls(sheet, data["list"], data["total"], rowNum) #將爬蟲爬取的內容寫入xls中表格中的值
saveXls(xls)
爬取大街網keyword爲python或Java等招聘信息,並寫入到xls表格中保存
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.