GIS就業分析

簡介

  • 本文針對 北京上海深圳廣州杭州武漢 這幾個城市進行GIS就業分析,分析內容:gis行業需求量,gis開發人員需求佔比,gis相關技能。
  • 本文數據來源 智聯

數據下載

爬蟲構思方案相對簡單,直接訪問一個接口即可

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __file__: download_zhilian

import requests
import json

import pymongo

client = pymongo.MongoClient(host='localhost', port=27017)

db = client.lvyou
collection = db.gis_zhaoping



url = "https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90&cityId={}&salary=0,0&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=gis&kt=3&=0&rt={}&_v=0.53968509&x-zp-page-request-id={}"

headers = {
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Connection": "keep-alive",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36",
}


def insert_mongo(all):
    for result in all:
        for i in result:
            collection.insert_one(i)


def get_zhiwei(cityid):
    all_data = []
    for i in range(20):
        req_url = url.format(i * 90, cityid, 'dc52638bdafe4e749e72a26c3aac727d',
                             '6f3e278b4d344939b526c1f21a74b26e-1557706634617-533000')
        req = requests.get(url=req_url, headers=headers)
        res_test = req.text
        all_json = json.loads(res_test)

        result = all_json['data']['results']
        if result:
            all_data.append(result)
        else:
            print(i )
            break
    print("職位數量", len(all_data))
    return all_data




def spider(cityid):
    all = get_zhiwei(cityid)
    insert_mongo(all)
    print("完成" , cityid)

def run():
    city_id = {
        "北京": 530,
        "上海": 538,
        "深圳": 765,
        "廣州": 763,
        "杭州": 653,
        "武漢": 736,
    }

    for i in city_id.values():
        spider(i)



if __name__ == '__main__':
    run()

  • 下載後其中一條數據如下
{"_id":"5cd8b7acd58cb320088356c5","applyType":"1","refreshMulscore":"0.0","endDate":"2019-07-02 00:00:00","showLicence":0,"g_weight":0,"extractSkillTag":["測繪","地理信息系統","團隊合作精神","gis(地理信息系統)","遙感","工程測量","團隊合作","jquery","javascript","gis","信息管理與信息系統","溝通能力","團隊領導","學習能力","地理信息","信息系統","數據庫"],"welfare":["五險一金","年終分紅","帶薪年假","定期體檢"],"salary":"15K-20K","SOU_POSITION_ID":"CC120525764J00155066913","score":609.2526,"number":"CC120525764J00155066913","recruitCount":1,"workingExp":{"code":"305","name":"3-5年"},"companyScore":0,"tagIntHighend":0,"jobName":"gis開發工程師","manualScore":"0.0","eduLevel":{"code":"4","name":"本科"},"rootOrgId":12052576,"recentAndTotal":{"applyTotal":"4","exposureTotal":"460","clickTotal":"4","exposureRecent":"0","clickRecent":"0","applyRecent":"0"},"tags":[],"businessArea":"馬連窪","positionLabel":"{\"qualifications\":null,\"chatWindow\":20,\"jobLight\":[\"五險一金\",\"年終分紅\",\"帶薪年假\",\"定期體檢\"],\"role\":null,\"companyTag\":null,\"level\":null,\"refreshLevel\":1,\"skill\":null}","jobTag":{"searchTag":"五險一金,年終分紅,帶薪年假,定期體檢"},"updateDate":"2019-05-13 07:49:28","g_sort":"sort-ps-score-pqks-ranking","city":{"display":"北京","items":[{"code":"530","name":"北京"}]},"saleType":0,"positionURL":"https://jobs.zhaopin.com/CC120525764J00155066913.htm","industry":"160400,160000,300300,210500","extractNormalizedTag":["信息管理與信息系統","溝通能力","gis(地理信息系統)","團隊領導","學習能力","工程測量","javascript"],"duplicated":0,"geo":{"lon":"116.280067","lat":"40.031482"},"vipLevel":1002,"company":{"number":"CZ120525760","size":{"code":"2","name":"20-99人"},"name":"北京天匯創業軟件有限公司","type":{"code":"4","name":"合資"},"url":"https://company.zhaopin.com/CZ120525760.htm"},"seo":"0","jobType":{"display":"軟件/互聯網開發/系統集成,軟件工程師","items":[{"code":"160000","name":"軟件/互聯網開發/系統集成"},{"code":"45","name":"軟件工程師"}]},"g_query":"query-ps-score-3","resumeCount":4,"createDate":"2019-05-10 13:34:24","companyLogo":"","futureJob":false,"emplType":"全職","g_source":"source-solr-position","staff":{"id":100782870},"SOU_POSITION_SOURCE_TYPE":1,"expandCount":0,"feedbackRation":0,"staffId":100782870,"selected":false,"applied":false,"collected":false,"isShow":false,"timeState":"最新","rate":""}

數據分析

  • 分析這邊利用mongo的查詢,pandas分組求和,plt的可視化
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __file__: attr
import pymongo

import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\STFANGSO.TTF')


def get_collection():
    client = pymongo.MongoClient(host='localhost', port=27017)
    db = client.lvyou
    collection = db.gis_zhaoping
    return collection


def city_quantity_demanded():
    """
    每個城市的需求量分析
    :param data: gis 職位信息
    :return: {城市:需求量}
    """
    collection = get_collection()
    pipeline = {'_id': "$city.display", 'count': {'$sum': 1}}
    ret = collection.aggregate(
        [
            {'$group': pipeline},
        ]
    )
    result = []
    x = []
    y = []
    for i in ret:
        id = i['_id'].split("-")[0]
        count = i['count']
        x.append(id)
        y.append(count)

        result.append(i)

    df = pd.DataFrame({"city": x, "count": y})
    sssss = df.groupby("city").sum()
    a = sssss.apply(list).to_dict()['count']

    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    plt.bar(range(len(a.values())), a.values(), tick_label=list(a.keys()), label="需求量")

    for xx, yy in zip(range(len(a.values())), a.values()):
        plt.text(xx, yy + 0.1, str(yy), ha='center')

    plt.title("gis需求量", fontproperties=zhfont1)
    plt.legend(prop=zhfont1)
    plt.savefig("gis需求量.png")
    plt.show()

    pass


def is_dev():
    """
    開發者需求量
    :return:
    """
    collection = get_collection()
    db_count = collection.count_documents({})
    dev_count = collection.count_documents({'jobName': {'$regex': ".*開發.*"}})

    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False

    plt.pie([dev_count, db_count - dev_count], labels=["開發者需求", "非開發者需求"], autopct='%1.2f%%')

    plt.title("開發者需求量", fontproperties=zhfont1)
    plt.legend(prop=zhfont1)
    plt.savefig("開發者需求量.png")
    plt.show()

    pass


def gis_skill_sort():
    """
    gis技能需求
    :return:
    """
    collection = get_collection()
    s = collection.find()
    result = []
    for i in s:
        extractSkillTag = i['extractSkillTag']
        extractNormalizedTag = i['extractNormalizedTag']
        result.extend(extractSkillTag)
        result.extend(extractNormalizedTag)
    dict = list_count(result)

    df = pd.DataFrame({"profession": list(dict.keys()), "count": list(dict.values())})
    sssss = df.groupby("profession").sum()
    sssss = sssss.sort_values(by=['count'], ascending=False).head(20)
    skill_data = sssss.apply(list).to_dict()['count']
    print(sssss)
    print(skill_data)
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.bar(range(len(skill_data.values())), skill_data.values(), tick_label=list(skill_data.keys()), label="技能排名")
    for xx, yy in zip(range(len(skill_data.values())), skill_data.values()):
        plt.text(xx, yy + 0.1, str(yy), ha='center')

    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(26, 24)
    plt.xticks(rotation=270)
    plt.title("技能排名", fontproperties=zhfont1)
    plt.legend(prop=zhfont1)
    plt.savefig("gis技能排名.png")
    plt.show()


def list_count(result):
    dict = {}
    for key in result:
        dict[key] = dict.get(key, 0) + 1
    return dict


def salary_bar(data, title):
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.bar(range(len(data.values())), data.values(), tick_label=list(data.keys()), label=title)
    for xx, yy in zip(range(len(data.values())), data.values()):
        plt.text(xx, yy + 0.1, str(yy), ha='center')

    fig = matplotlib.pyplot.gcf()
    plt.xticks(rotation=270)
    plt.title(title, fontproperties=zhfont1)
    plt.legend(prop=zhfont1)
    plt.savefig("{}.png".format(title))
    plt.show()


def salary_plot():
    collection = get_collection()
    s = collection.find()
    dev_salary = []
    not_dev_salary = []
    for i in s:
        salary = i['salary']
        if "開發" in i['jobName']:
            dev_salary.append(salary)
        else:
            not_dev_salary.append(salary)
    dev_salary_count = list_count(dev_salary)
    not_dev_salary_count = list_count(not_dev_salary)
    # gis 開發者薪資情況
    df = pd.DataFrame({"profession": list(dev_salary_count.keys()), "count": list(dev_salary_count.values())})
    sssss = df.groupby("profession").sum()
    sssss = sssss.sort_values(by=['count'], ascending=False).head(20)
    dev_salary_count_data = sssss.apply(list).to_dict()['count']
    # 非開發者薪資情況
    df = pd.DataFrame({"profession": list(not_dev_salary_count.keys()), "count": list(not_dev_salary_count.values())})
    sssss = df.groupby("profession").sum()
    sssss = sssss.sort_values(by=['count'], ascending=False).head(20)
    not_dev_salary_count_data = sssss.apply(list).to_dict()['count']
    # 繪圖
    salary_bar(dev_salary_count_data, "開發者薪資")
    salary_bar(not_dev_salary_count_data, "非開發者薪資")


if __name__ == '__main__':
    city_quantity_demanded()
    is_dev()
    gis_skill_sort()
    salary_plot()
    pass

  • gis需求量
    在這裏插入圖片描述
  • gis技能排名
    在這裏插入圖片描述
  • gis開發人員需求佔比
    在這裏插入圖片描述
  • gis薪資情況
    • 開發者
      在這裏插入圖片描述
    • 非開發者
      在這裏插入圖片描述
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章