爬取大街網keyword爲python或Java等招聘信息,並寫入到xls表格中保存

import requests
import xlwt
rowNum=0
####爬取大街網keyword爲python或Java等招聘信息,並寫入到xls表格中


#請求大街網的招聘信息
def main(url):
    firstUrl = 'https://so.dajie.com/job/search'
    session = requests.session()
    session.get(firstUrl)
    session.headers['referer'] = firstUrl
    response = session.get(url)
    return response.json()["data"]

#將json中的職位、工資、公司和地點寫入xls中
#    writeXls(sht1, dataT["list"], dataT["total"], rowNum)
def writeXls(sht1,data,total,rowNum):
    for i in data:
        # print(len(data),len(i),i)
        if(rowNum < int(total)):
            rowNum = rowNum + 1
            print(rowNum,i['jobName'],i['salary'],i['compName'],i['pubCity'])
            sht1.write(rowNum, 0, rowNum)
            sht1.write(rowNum, 1, i['jobName'])
            sht1.write(rowNum, 2, i['salary'])
            sht1.write(rowNum, 3, i['compName'])
            sht1.write(rowNum, 4, i['pubCity'])

    return rowNum

#創建xls
def createXls():
    xls = xlwt.Workbook()
    return xls

#創建xls中的sheet並命名
def createSheet(xls,keyTxt):
    sht1 = xls.add_sheet(keyTxt)
    sht1.write(0,0,'序號')
    sht1.write(0,1,'職位')
    sht1.write(0,2,'工資')
    sht1.write(0,3,'公司')
    sht1.write(0,4,'地點')
    return sht1

#保存xls
def saveXls(xls):
    xls.save('/Users/User/Downloads/mydata.xls')


if __name__ == '__main__':
    keyTxt = 'Java'
    url1 = 'https://so.dajie.com/job/ajax/search/' \
          'filter?keyword=' \
           +keyTxt+ \
           '&order=0&city=&recruitType=&salary=&experience=&page=1&positionFunction=&' \
          '_CSRFToken=&ajax=1'
    data1 = main(url1)
    totalPage=data1["totalPage"]  #獲取keyTxt有多好頁,便於後續爬後面頁面的內容

    xls = createXls()
    sheet = createSheet(xls,keyTxt)
    rowNum = writeXls(sheet, data1["list"], data1["total"], rowNum)
    print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX:' , rowNum)

    for i in range(totalPage-1):
        page=str(i+2)
        urlNext = 'https://so.dajie.com/job/ajax/search/' \
          'filter?keyword=' \
            +keyTxt+ \
            '&order=0&city=&recruitType=&salary=&experience=&page=' \
           + page +\
           '&positionFunction=&' \
          '_CSRFToken=&ajax=1'
        print(urlNext)
        data=main(urlNext)   #爬蟲獲取內容
        rowNum = writeXls(sheet, data["list"], data["total"], rowNum)   #將爬蟲爬取的內容寫入xls中表格中的值

    saveXls(xls)



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章