开发背景:一直以来,Python受到全局解析器的影响,多线程的性能较低;在Python3.5之后,Python asyncio正式发布,利用多协程对Pythony语法在并行条件下进行补充。
线程定义:比如你需要完成一个任务,即从1加到100的和,这个任务可以看成是一个进程;然后你请了10个小学生,进行分批计算,每个小学生算10组数字。现在,每个小学生的任务,可以看成是一个线程,并且他们之间算数是互不影响的。
协程定义:协程不是进程或线程,其执行过程更类似于子例程,或者说不带返回值的函数调用-----来自于百度百科。
区别:线程的并行是在操作系统层面上完成,协程的并行是基于用户层面上完成。举例:要完成100个网页的访问,从线程的层面上理解,这个完成任务,每个线程可以负责10个,最后将结果整合;从协程角度考虑,每个网页访问可以看做是一个子例程,进行访问和结果的留存。
凡是并行,都涉及到“切换”、“中间结果缓存”、“暂停环节”。
以上是个人的理解,Python asyncio走的也是这套机制,我们可以看一个多协程抓取api数据的例子:每次循环访问可以看成是一个协程。
PS:给大家安利一个Python 的调试组件,对Python的初学者非常友好- -better_exceptions,熟练的还是建议看Log!具体用法可以参考网上教程,个人觉得还是不错的。
# install
pip install better_exceptions
# set the BETTER_EXCEPTIONS environment variable to any value
export BETTER_EXCEPTIONS=1 # Linux / OSX
setx BETTER_EXCEPTIONS 1 # Windows
import requests
import urllib
import json
import pandas as pd
import asyncio
import nest_asyncio
nest_asyncio.apply()
import math
source_data = pd.read_csv("/Users/hzp/Desktop/City_poi_concat.txt",sep=" ")
async def download(i):
province_list = []
urban_list = []
alias = []
keyword_list = []
poi_id = []
poi_address = []
poi_location = []
poi_name = []
poi_alias_name = []
poi_type = []
poi_typecode = []
business_area = []
poi_adname = []
poi_adcode = []
print("Out Layer Loop is: " + str(i))
city = source_data['City'][i]
subclass = source_data['Subclass'][i]
parameter = {
'types':subclass,
'city':city,
'output':'Json',
'offset':30,
'page':'1',
'key':'f00fffsfsfsffsd',
'extensions':'all',
'citylimit':'true',
'children':1
}
#python3.x urllib.parse.urlencode
url = 'https://restapi.amap.com/v3/place/text?' + urllib.parse.urlencode(parameter)
try:
response = requests.request("GET", url, timeout=3)
result = json.loads(response.text)
for j in range(0,len(result['pois'])):
print("Inner Loop is: " + str(j))
province_list.append(source_data['Province'][i])
urban_list.append(source_data['City'][i])
alias.append(source_data['Alias'][i])
keyword_list.append(subclass)
poi_id.append(result['pois'][j]['id'])
poi_name.append(result['pois'][j]['name'])
poi_address.append(result['pois'][j]['address'])
poi_location.append(result['pois'][j]['location'])
poi_adname.append(result['pois'][j]['adname'])
poi_adcode.append(result['pois'][j]['adcode'])
if len(result['pois'][j]['business_area']) > 0:
business_area.append(result['pois'][j]['business_area'])
else:
business_area.append(pd.NaT)
if len(result['pois'][j]['alias']) > 0:
poi_alias_name.append(result['pois'][j]['alias'])
else:
poi_alias_name.append(pd.NaT)
poi_type.append(result['pois'][j]['type'])
poi_typecode.append(result['pois'][j]['typecode'])
except:
pass
for k in range(2,(math.ceil(int(result['count']) / 30) +1)):
parameter = {
'types':subclass,
'city':city,
'output':'Json',
'offset':30,
'page':str(k),
'key':'f00fffsfsfsffsd',
'extensions':'all',
'citylimit':'true',
'children':1
}
#python3.x urllib.parse.urlencode
url = 'https://restapi.amap.com/v3/place/text?' + urllib.parse.urlencode(parameter)
try:
response = requests.request("GET", url, timeout=3)
result = json.loads(response.text)
for m in range(0,len(result['pois'])):
province_list.append(source_data['Province'][i])
urban_list.append(source_data['City'][i])
alias.append(source_data['Alias'][i])
keyword_list.append(subclass)
poi_id.append(result['pois'][m]['id'])
poi_name.append(result['pois'][m]['name'])
poi_address.append(result['pois'][m]['address'])
poi_location.append(result['pois'][m]['location'])
poi_adname.append(result['pois'][m]['adname'])
poi_adcode.append(result['pois'][m]['adcode'])
if len(result['pois'][m]['business_area']) > 0:
business_area.append(result['pois'][m]['business_area'])
else:
business_area.append(pd.NaT)
if len(result['pois'][m]['alias']) > 0:
poi_alias_name.append(result['pois'][m]['alias'])
else:
poi_alias_name.append(pd.NaT)
poi_type.append(result['pois'][m]['type'])
poi_typecode.append(result['pois'][m]['typecode'])
except:
pass
concat_result = {
'province_list':province_list,
'urban_list':urban_list,
'alias':alias,
'keyword_list':keyword_list,
'poi_id':poi_id,
'poi_address':poi_address,
'poi_location':poi_location,
'poi_name':poi_name,
'poi_alias_name':poi_alias_name,
'poi_type':poi_type,
'poi_typecode':poi_typecode,
'business_area':business_area,
'poi_adname':poi_adname,
'poi_adcode':poi_adcode
}
df_result = pd.DataFrame(concat_result)
def run():
for i in range(976,source_data.shape[0]):
loop.run_until_complete(download(i))
loop = asyncio.get_event_loop()
if __name__ == '__main__':
run()