python 金數據API調用數據

需求:

  1. 前期使用金數據收集數據,現在使用金數據提供的API抓取之前的數據

問題:

  1. 金數據提供了curl和ruby示例,沒有提供python版本的示例。 
  2. 得到的數據只有數據,沒有字段名
  3. 只能一次性獲取前五十條數據,超過五十條只能逐一獲取

curl用法: 

curl -u api_key:api_secret https://jinshuju.net/api/v1/forms/ex27t2

 中文亂碼問題:

curl -u key:secret https://jinshuju.net/api/v1/forms/XXX/entries|iconv -f utf-8 -t gbk

python:

問題解決:

  1. 認證問題:request(auth=(user,password)) user,password 並不是按照官方說的放在heads裏面
  2. 沒有字段名問題:事先下載了一下表格,各取一條數據,然後通過數據比對獲取相應的字段名。
  3. 獲取限制問題:超過50條數據,需要遞歸獲取新的data
import requests
import json
import pandas as pd



def get_data():
    headers = {'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Mobile Safari/537.36',
          'Authorization':'Basic Auth',
          'Content-Type': 'application/json',
    #      'user':user,
    #      'password':password
    }
    user = 'key'
    password = 'secret'
    host = 'https://jinshuju.net'
    use_class = '/api/v1'
    entries_path = '/forms/Lo8qfz/entries'
    form_path = '/forms/Lo8qfz'
    url = host + use_class + entries_path 
#    url2 = host + use_class + form_path 
    try:
        request = requests.get(url,auth=(user,password),headers=headers)
        request.raise_for_status()
        request.encoding = request.apparent_encoding
        #print(request.text)
    except:
        print("數據獲取失敗")
    
    js = json.loads(request.text)
    data50 = js.get('data')
    
    def more50(js,data):   
        if js.get('count') <= 50:
            data = js.get('data') 
            return data
        else:
            next_number = js.get('next')
            url = host + use_class + entries_path + '?' + 'next=' + next_number
            try:
                request2 = requests.get(url,auth=(user,password),headers=headers)
                request2.raise_for_status()
                request2.encoding = request2.apparent_encoding
                #print(request.text)
            except:
                print("數據獲取失敗")
            
            js2 = json.loads(request2.text)
            next_number2 = js2.get('next')
            data_next = js2.get('data')
            data = data + data_next
            if next_number2:
                return more50(js,data)
            else:
                return data
            
    data = more50(js,data50)
        
    return data

data = get_data()

      

def deal_data(data):        
    new_dic = {}  
    join_list = [] 
    df2 = pd.DataFrame()
    for i,dic in enumerate(data):
        if 'creator_name' in list(dic.keys()):
            dic.pop('creator_name')
        if 'serial_number' in list(dic.keys()):
            dic.pop('serial_number')
        for k,v in dic.items():
            if len(k.split('_')) > 1:
                k2 = k.split('_')[1]
                if type(v)==list: 
                    if len(v)>0 and type(v[0])==str:
                        v2 = ','.join(v)
                        new_dic[k2] = v2
                    elif len(v)>0 and type(v[0])==dict:
                        for j in range(len(v)):
                            join_list.append(','.join([v[j]['statement'],v[j]['choice']]))
                        v2 = ';'.join(join_list)
                        new_dic[k2] = v2
                    elif len(v)==0:
                        new_dic[k2] = ''
                else:
                    v2=v
                    new_dic[k2] = v2
        df2 = df2.append(new_dic,ignore_index=True)
        
    return df2
    
df2 = deal_data(data)
df3 = df2.drop('at',axis=1)
namelist = df3.columns.to_list()
namelist2 = [int(i) for i in namelist if i] #對數字列排序
namelist2.sort()
namelist = [str(i) for i in namelist2 if i]
df4 = df3[namelist].drop('20',axis=1)
    
          
##獲取比對列名
excel_data = pd.read_csv(r'D:\python_code\ruijin_metaboliaze\CODE_API\info_patients.csv')
match_df = excel_data[excel_data['ident_id'].isin(['130403197605251817'])]
oldmatch_df = df4[df4['22'].isin(['130403197605251817'])].reset_index(drop=True) 

##數據預處理
oldmatch_df = oldmatch_df.fillna(0)
match_df = match_df.fillna(0)
match_df[['weight','food_preference']] = match_df[['weight','food_preference']].astype(int)

##比對映射函數
def match_colname(match_df,oldmatch_df):
    m_dict = {} 
    for index,row in match_df.iteritems():
        flag = 0
        if flag == 1:
            continue
        else:
            pass
        for index2,row2 in oldmatch_df.iteritems():
            if row2[0]=='' and row[0]==0 and flag == 0:
                flag = 1
                m_dict[index]=index2
                oldmatch_df = oldmatch_df.drop(index2,axis=1)
                match_df = match_df.drop(index,axis=1)
#                print(index2)
                break
            elif str(row2[0]) == str(row[0]).strip() and flag == 0:
                
                flag = 1
                m_dict[index]=index2
                oldmatch_df = oldmatch_df.drop(index2,axis=1)
                match_df = match_df.drop(index,axis=1)
#                print(index2)
                break
    return m_dict

m_dict = match_colname(match_df,oldmatch_df)

f_dict = {v:k for k,v in m_dict.items()} #鍵值對轉換
df5 = df4.rename(columns=f_dict)

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章