接上篇 通過一個示例形象地理解C# async await 非並行異步、並行異步、並行異步的併發量控制
前些天寫了兩篇關於C# async await異步的博客,
第一篇博客看的人多,點贊評論也多,我想應該都看懂了,比較簡單。
第二篇博客看的人少,點讚的也少,沒有評論。
我很納悶,第二篇博客纔是重點,如此吊炸天的代碼,居然沒人評論。
這個代碼,就是.NET圈的頂級大佬也沒有寫過,爲什麼這麼說,這就要說到C# async await的語法糖:
沒有語法糖,代碼一樣寫,java8沒有語法糖,一樣寫出很厲害的代碼。但有了C# async await語法糖,普通的水平一般的業務程序員,哪怕是菜B,也能寫出高吞吐高性能的代碼,這就是意義!
所以我說頂級大佬沒寫過,因爲他們水平高,腦力好,手段多,自然不需要這麼寫。但普通程序員要那樣寫代碼,麻煩不說,BUG頻出。
標題我用了"探索"這個詞,有沒有更好的實踐,讓小白們都會寫的並行異步的實踐?
ElasticSearch的性能
代碼的實用價值,是查詢es。
最近發現es的性能非常好!先給大家看個控制檯輸出的截圖。服務我是部署在服務器上的,真實環境,不是自己電腦。
379次es查詢,僅需0.185秒,當然耗時會有波動,零點幾秒都是正常的,超過1秒也有可能。
es最怕的是什麼,是慢查詢,是條件複雜的查詢,是範圍查詢。
我的策略是多次精確查詢,這樣可以利用es極高的吞吐能力。
並行異步
既然查詢次數多,單線程或者說同步肯定是不行的,必須並行。
並行代碼,python能寫嗎?java能寫嗎?肯定能啊!
但我前同事寫的python多次查詢es寫的就是同步代碼,爲什麼不併行呢?並行肯定可以寫,但是能不寫就不寫,爲什麼?因爲寫起來複雜,不好寫。你以爲自己技術好,腦力好沒問題,但別人呢?
重點是什麼?不僅要寫並行代碼,還要寫的簡單,不破壞代碼原有邏輯結構。
異步方法
大家都會寫的,用async await就行了,很簡單,放個我寫的,代碼主要是在雙循環中多次異步請求(大致看一下先跳過):
/// <summary>
/// xxx查詢
/// </summary>
public async Task<List<AccompanyInfo>> Query2(string strStartTime, string strEndTime, int kpCountThreshold, int countThreshold, int distanceThreshold, int timeThreshold, List<PeopleCluster> peopleClusterList)
{
List<AccompanyInfo> resultList = new List<AccompanyInfo>();
Stopwatch sw = Stopwatch.StartNew();
//創建字典
Dictionary<string, PeopleCluster> clusterIdPeopleDict = new Dictionary<string, PeopleCluster>();
foreach (PeopleCluster peopleCluster in peopleClusterList)
{
foreach (string clusterId in peopleCluster.ClusterIds)
{
if (!clusterIdPeopleDict.ContainsKey(clusterId))
{
clusterIdPeopleDict.Add(clusterId, peopleCluster);
}
}
}
int queryCount = 0;
Dictionary<string, AccompanyInfo> dict = new Dictionary<string, AccompanyInfo>();
foreach (PeopleCluster people1 in peopleClusterList)
{
List<PeopleFeatureInfo> peopleFeatureList = await ServiceFactory.Get<PeopleFeatureQueryService>().Query(strStartTime, strEndTime, people1);
queryCount++;
foreach (PeopleFeatureInfo peopleFeatureInfo1 in peopleFeatureList)
{
DateTime capturedTime = DateTime.ParseExact(peopleFeatureInfo1.captured_time, "yyyyMMddHHmmss", CultureInfo.InvariantCulture);
string strStartTime2 = capturedTime.AddSeconds(-timeThreshold).ToString("yyyyMMddHHmmss");
string strEndTime2 = capturedTime.AddSeconds(timeThreshold).ToString("yyyyMMddHHmmss");
List<PeopleFeatureInfo> peopleFeatureList2 = await ServiceFactory.Get<PeopleFeatureQueryService>().QueryExcludeSelf(strStartTime2, strEndTime2, people1);
queryCount++;
if (peopleFeatureList2.Count > 0)
{
foreach (PeopleFeatureInfo peopleFeatureInfo2 in peopleFeatureList2)
{
string key = null;
PeopleCluster people2 = null;
string people2ClusterId = null;
if (clusterIdPeopleDict.ContainsKey(peopleFeatureInfo2.cluster_id.ToString()))
{
people2 = clusterIdPeopleDict[peopleFeatureInfo2.cluster_id.ToString()];
key = $"{string.Join(",", people1.ClusterIds)}_{string.Join(",", people2.ClusterIds)}";
}
else
{
people2ClusterId = peopleFeatureInfo2.cluster_id.ToString();
key = $"{string.Join(",", people1.ClusterIds)}_{string.Join(",", people2ClusterId)}";
}
double distance = LngLatUtil.CalcDistance(peopleFeatureInfo1.Longitude, peopleFeatureInfo1.Latitude, peopleFeatureInfo2.Longitude, peopleFeatureInfo2.Latitude);
if (distance > distanceThreshold) continue;
AccompanyInfo accompanyInfo;
if (dict.ContainsKey(key))
{
accompanyInfo = dict[key];
}
else
{
accompanyInfo = new AccompanyInfo();
dict.Add(key, accompanyInfo);
}
accompanyInfo.People1 = people1;
if (people2 != null)
{
accompanyInfo.People2 = people2;
}
else
{
accompanyInfo.ClusterId2 = people2ClusterId;
}
AccompanyItem accompanyItem = new AccompanyItem();
accompanyItem.Info1 = peopleFeatureInfo1;
accompanyItem.Info2 = peopleFeatureInfo2;
accompanyInfo.List.Add(accompanyItem);
accompanyInfo.Count++;
resultList.Add(accompanyInfo);
}
}
}
}
resultList = resultList.FindAll(a => (a.People2 != null && a.Count >= kpCountThreshold) || a.Count >= countThreshold);
//去重
int beforeDistinctCount = resultList.Count;
resultList = resultList.DistinctBy(a =>
{
string str1 = string.Join(",", a.People1.ClusterIds);
string str2 = a.People2 != null ? string.Join(",", a.People2.ClusterIds) : string.Empty;
string str3 = a.ClusterId2 ?? string.Empty;
StringBuilder sb = new StringBuilder();
foreach (AccompanyItem item in a.List)
{
var info2 = item.Info2;
sb.Append($"{info2.camera_id},{info2.captured_time},{info2.cluster_id}");
}
return $"{str1}_{str2}_{str3}_{sb}";
}).ToList();
sw.Stop();
string msg = $"xxx查詢,耗時:{sw.Elapsed.TotalSeconds:0.000} 秒,查詢次數:{queryCount},去重:{beforeDistinctCount}-->{resultList.Count}";
Console.WriteLine(msg);
LogUtil.Info(msg);
return resultList;
}
異步方法的並行化
上述代碼是沒有問題的,但也有問題。就是在雙循環中多次請求,雖然用了async await,但不是並行,耗時會很長,如何優化?請看如下代碼:
/// <summary>
/// xxx查詢
/// </summary>
public async Task<List<AccompanyInfo>> Query(string strStartTime, string strEndTime, int kpCountThreshold, int countThreshold, int distanceThreshold, int timeThreshold, List<PeopleCluster> peopleClusterList)
{
List<AccompanyInfo> resultList = new List<AccompanyInfo>();
Stopwatch sw = Stopwatch.StartNew();
//創建字典
Dictionary<string, PeopleCluster> clusterIdPeopleDict = new Dictionary<string, PeopleCluster>();
foreach (PeopleCluster peopleCluster in peopleClusterList)
{
foreach (string clusterId in peopleCluster.ClusterIds)
{
if (!clusterIdPeopleDict.ContainsKey(clusterId))
{
clusterIdPeopleDict.Add(clusterId, peopleCluster);
}
}
}
//組織第一層循環task
Dictionary<PeopleCluster, Task<List<PeopleFeatureInfo>>> tasks1 = new Dictionary<PeopleCluster, Task<List<PeopleFeatureInfo>>>();
foreach (PeopleCluster people1 in peopleClusterList)
{
var task1 = ServiceFactory.Get<PeopleFeatureQueryService>().Query(strStartTime, strEndTime, people1);
tasks1.Add(people1, task1);
}
//計算第一層循環task並緩存結果,組織第二層循環task
Dictionary<string, Task<List<PeopleFeatureInfo>>> tasks2 = new Dictionary<string, Task<List<PeopleFeatureInfo>>>();
Dictionary<PeopleCluster, List<PeopleFeatureInfo>> cache1 = new Dictionary<PeopleCluster, List<PeopleFeatureInfo>>();
foreach (PeopleCluster people1 in peopleClusterList)
{
List<PeopleFeatureInfo> peopleFeatureList = await tasks1[people1];
cache1.Add(people1, peopleFeatureList);
foreach (PeopleFeatureInfo peopleFeatureInfo1 in peopleFeatureList)
{
DateTime capturedTime = DateTime.ParseExact(peopleFeatureInfo1.captured_time, "yyyyMMddHHmmss", CultureInfo.InvariantCulture);
string strStartTime2 = capturedTime.AddSeconds(-timeThreshold).ToString("yyyyMMddHHmmss");
string strEndTime2 = capturedTime.AddSeconds(timeThreshold).ToString("yyyyMMddHHmmss");
var task2 = ServiceFactory.Get<PeopleFeatureQueryService>().QueryExcludeSelf(strStartTime2, strEndTime2, people1);
string task2Key = $"{strStartTime2}_{strEndTime2}_{string.Join(",", people1.ClusterIds)}";
tasks2.TryAdd(task2Key, task2);
}
}
//讀取第一層循環task緩存結果,計算第二層循環task
Dictionary<string, AccompanyInfo> dict = new Dictionary<string, AccompanyInfo>();
foreach (PeopleCluster people1 in peopleClusterList)
{
List<PeopleFeatureInfo> peopleFeatureList = cache1[people1];
foreach (PeopleFeatureInfo peopleFeatureInfo1 in peopleFeatureList)
{
DateTime capturedTime = DateTime.ParseExact(peopleFeatureInfo1.captured_time, "yyyyMMddHHmmss", CultureInfo.InvariantCulture);
string strStartTime2 = capturedTime.AddSeconds(-timeThreshold).ToString("yyyyMMddHHmmss");
string strEndTime2 = capturedTime.AddSeconds(timeThreshold).ToString("yyyyMMddHHmmss");
string task2Key = $"{strStartTime2}_{strEndTime2}_{string.Join(",", people1.ClusterIds)}";
List<PeopleFeatureInfo> peopleFeatureList2 = await tasks2[task2Key];
if (peopleFeatureList2.Count > 0)
{
foreach (PeopleFeatureInfo peopleFeatureInfo2 in peopleFeatureList2)
{
string key = null;
PeopleCluster people2 = null;
string people2ClusterId = null;
if (clusterIdPeopleDict.ContainsKey(peopleFeatureInfo2.cluster_id.ToString()))
{
people2 = clusterIdPeopleDict[peopleFeatureInfo2.cluster_id.ToString()];
key = $"{string.Join(",", people1.ClusterIds)}_{string.Join(",", people2.ClusterIds)}";
}
else
{
people2ClusterId = peopleFeatureInfo2.cluster_id.ToString();
key = $"{string.Join(",", people1.ClusterIds)}_{string.Join(",", people2ClusterId)}";
}
double distance = LngLatUtil.CalcDistance(peopleFeatureInfo1.Longitude, peopleFeatureInfo1.Latitude, peopleFeatureInfo2.Longitude, peopleFeatureInfo2.Latitude);
if (distance > distanceThreshold) continue;
AccompanyInfo accompanyInfo;
if (dict.ContainsKey(key))
{
accompanyInfo = dict[key];
}
else
{
accompanyInfo = new AccompanyInfo();
dict.Add(key, accompanyInfo);
}
accompanyInfo.People1 = people1;
if (people2 != null)
{
accompanyInfo.People2 = people2;
}
else
{
accompanyInfo.ClusterId2 = people2ClusterId;
}
AccompanyItem accompanyItem = new AccompanyItem();
accompanyItem.Info1 = peopleFeatureInfo1;
accompanyItem.Info2 = peopleFeatureInfo2;
accompanyInfo.List.Add(accompanyItem);
accompanyInfo.Count++;
resultList.Add(accompanyInfo);
}
}
}
}
resultList = resultList.FindAll(a => (a.People2 != null && a.Count >= kpCountThreshold) || a.Count >= countThreshold);
//去重
int beforeDistinctCount = resultList.Count;
resultList = resultList.DistinctBy(a =>
{
string str1 = string.Join(",", a.People1.ClusterIds);
string str2 = a.People2 != null ? string.Join(",", a.People2.ClusterIds) : string.Empty;
string str3 = a.ClusterId2 ?? string.Empty;
StringBuilder sb = new StringBuilder();
foreach (AccompanyItem item in a.List)
{
var info2 = item.Info2;
sb.Append($"{info2.camera_id},{info2.captured_time},{info2.cluster_id}");
}
return $"{str1}_{str2}_{str3}_{sb}";
}).ToList();
//排序
foreach (AccompanyInfo item in resultList)
{
item.List.Sort((a, b) => -string.Compare(a.Info1.captured_time, b.Info1.captured_time));
}
sw.Stop();
string msg = $"xxx查詢,耗時:{sw.Elapsed.TotalSeconds:0.000} 秒,查詢次數:{tasks1.Count + tasks2.Count},去重:{beforeDistinctCount}-->{resultList.Count}";
Console.WriteLine(msg);
LogUtil.Info(msg);
return resultList;
}
上述代碼說明
- 爲了使異步並行化,雙循環要寫三遍。第一遍只需寫第一層循環,省了第二層。第二遍沒有數據處理的那層子循環。第三遍是最全的。
- 和普通的異步相比,第一、二遍雙循環是多出來的,第三遍雙循環代碼結構和普通的異步代碼結構是一樣的。
- 寫的時候,可以先寫好普通的異步方法,然後再改造成並行化的異步方法。
你爲什麼說.NET圈的頂級大佬沒有寫過?
- 不吹個牛,博客沒人看,沒人點贊啊?!
- 牛B的是C#,由於C#語法糖,把牛B的代碼寫簡單了,纔是真的牛B。
- 我倒是希望有大佬寫個更好的實踐,把我這種寫法淘汰掉,因爲這是我能想到的最容易控制的寫法了。
並行代碼,很多人都會寫,java、python也能寫,但問題是,比較菜的普通業務程序員,如何無腦寫這種並行代碼?
最差的寫法,例如java的CompletableFuture,結合業務代碼,寫法就很複雜了。真的沒法無腦寫。
其次的寫法,例如:
List<PeopleFeatureInfo>[] listArray = await Task.WhenAll(tasks2.Values);
在雙循環體中,怎麼拿結果?肯定能寫,但又要思考怎麼寫了不是?
而我的寫法,在雙循環體中是可以直接拿結果的:
List<PeopleFeatureInfo> list = await tasks2[task2Key];
- 只放C#代碼沒有說服力,我一個同事python寫的很6,他寫的挖掘代碼很多都是並行,放一段代碼:
def get_es_multiprocess(index_list, people_list, core_percent, rev_clusterid_idcard_dict):
'''
多進程讀取es數據,轉爲整個數據幀,按時間排序
:return: 規模較大的數據幀
'''
col_list = ["cluster_id", "camera_id", "captured_time"]
pool = Pool(processes=int(mp.cpu_count() * core_percent))
input_list = [(i, people_list, col_list) for i in index_list]
res = pool.map(get_es, input_list)
if not res:
return None
pool.close()
pool.join()
df_all = pd.DataFrame(columns=col_list+['longitude', 'latitude'])
for df in res:
df_all = pd.concat([df_all, df])
# 這裏強制轉換爲字符串!
df_all['cluster_id_'] = df_all['cluster_id'].apply(lambda x: rev_clusterid_idcard_dict[str(x)])
del df_all['cluster_id']
df_all.rename(columns={'cluster_id_': 'cluster_id'}, inplace=True)
df_all.sort_values(by='captured_time', inplace=True)
print('=' * 100)
print('整個數據(聚類前):')
print(df_all.info())
cluster_id_list = [(i, df) for i, df in df_all.groupby(['cluster_id'])]
cluster_id_list_split = [j for j in func(cluster_id_list, 1000000)]
# todo 縮小數據集,用於調試!
data_all = df_all.iloc[:, :]
return data_all, cluster_id_list_split
上述python代碼解析
- 核心代碼:
res = pool.map(get_es, input_list)
pool.join()
...省略
其中get_es是查詢es的方法,他寫的應該不是異步方法,不過這個不是重點
2. res是查詢結果,通過並行的方式一把查出來,放到res中,然後把結果再解出來
3. 注意,這只是單循環,想想雙層循環怎麼寫
4. pool.join()是阻塞當前線程的,這個不好
5. 同事註釋中寫的是"多進程",是寫錯了嗎?實際是多線程?還是就是多進程?
6. 當然,python是有async await異步寫法的,應該不比C#差,只是同事沒有用,可能是因爲他用的python版本不夠新
7. python代碼,字符串太多,字符串是最不好維護的。C#中的字符串裏面都是強類型。
把腦力活變成體力活
照葫蘆畫瓢,把腦力活變成體力活,我又寫了一個方法(業務邏輯不重要,看並行異步的使用):
/// <summary>
/// xxx查詢
/// </summary>
public async Task<List<SameVehicleInfo>> Query(string strStartTime, string strEndTime, int kpCountThreshold, int timeThreshold, List<PeopleCluster> peopleClusterList)
{
List<SameVehicleInfo> resultList = new List<SameVehicleInfo>();
Stopwatch sw = Stopwatch.StartNew();
//組織第一層循環task,查xxx
Dictionary<PeopleCluster, Task<List<PeopleFeatureInfo>>> tasks1 = new Dictionary<PeopleCluster, Task<List<PeopleFeatureInfo>>>();
foreach (PeopleCluster people1 in peopleClusterList)
{
var task1 = ServiceFactory.Get<PeopleFeatureQueryService>().Query(strStartTime, strEndTime, people1);
tasks1.Add(people1, task1);
}
//計算第一層循環task並緩存結果,組織第二層循環task,精確搜xxx
Dictionary<string, Task<List<MotorVehicleInfo>>> tasks2 = new Dictionary<string, Task<List<MotorVehicleInfo>>>();
Dictionary<PeopleCluster, List<PeopleFeatureInfo>> cache1 = new Dictionary<PeopleCluster, List<PeopleFeatureInfo>>();
foreach (PeopleCluster people1 in peopleClusterList)
{
List<PeopleFeatureInfo> peopleFeatureList = await tasks1[people1];
cache1.Add(people1, peopleFeatureList);
foreach (PeopleFeatureInfo peopleFeatureInfo1 in peopleFeatureList)
{
string task2Key = $"{peopleFeatureInfo1.camera_id}_{peopleFeatureInfo1.captured_time}";
var task2 = ServiceFactory.Get<MotorVehicleQueryService>().QueryExact(peopleFeatureInfo1.camera_id, peopleFeatureInfo1.captured_time);
tasks2.TryAdd(task2Key, task2);
}
}
//讀取第一層循環task緩存結果,計算第二層循環task
Dictionary<PersonVehicleKey, PersonVehicleInfo> dictPersonVehicle = new Dictionary<PersonVehicleKey, PersonVehicleInfo>();
foreach (PeopleCluster people1 in peopleClusterList)
{
List<PeopleFeatureInfo> peopleFeatureList = cache1[people1];
foreach (PeopleFeatureInfo peopleFeatureInfo1 in peopleFeatureList)
{
string task2Key = $"{peopleFeatureInfo1.camera_id}_{peopleFeatureInfo1.captured_time}";
List<MotorVehicleInfo> motorVehicleList = await tasks2[task2Key];
motorVehicleList = motorVehicleList.DistinctBy(a => a.plate_no).ToList();
foreach (MotorVehicleInfo motorVehicleInfo in motorVehicleList)
{
PersonVehicleKey key = new PersonVehicleKey(people1, motorVehicleInfo.plate_no);
PersonVehicleInfo personVehicleInfo;
if (dictPersonVehicle.ContainsKey(key))
{
personVehicleInfo = dictPersonVehicle[key];
}
else
{
personVehicleInfo = new PersonVehicleInfo()
{
People = people1,
PlateNo = motorVehicleInfo.plate_no,
List = new List<PeopleFeatureInfo>()
};
dictPersonVehicle.Add(key, personVehicleInfo);
}
personVehicleInfo.List.Add(peopleFeatureInfo1);
}
}
}
//比對xxx
List<PersonVehicleKey> keys = dictPersonVehicle.Keys.ToList();
Dictionary<string, SameVehicleInfo> dict = new Dictionary<string, SameVehicleInfo>();
for (int i = 0; i < keys.Count - 1; i++)
{
for (int j = i + 1; j < keys.Count; j++)
{
var key1 = keys[i];
var key2 = keys[j];
var personVehicle1 = dictPersonVehicle[key1];
var personVehicle2 = dictPersonVehicle[key2];
if (key1.PlateNo == key2.PlateNo)
{
foreach (PeopleFeatureInfo peopleFeature1 in personVehicle1.List)
{
double minTimeDiff = double.MaxValue;
int minIndex = -1;
for (int k = 0; k < personVehicle2.List.Count; k++)
{
PeopleFeatureInfo peopleFeature2 = personVehicle2.List[k];
DateTime capturedTime1 = DateTime.ParseExact(peopleFeature1.captured_time, "yyyyMMddHHmmss", CultureInfo.InvariantCulture);
DateTime capturedTime2 = DateTime.ParseExact(peopleFeature2.captured_time, "yyyyMMddHHmmss", CultureInfo.InvariantCulture);
var timeDiff = Math.Abs(capturedTime2.Subtract(capturedTime1).TotalSeconds);
if (timeDiff < minTimeDiff)
{
minTimeDiff = timeDiff;
minIndex = k;
}
}
if (minIndex >= 0 && minTimeDiff <= timeThreshold * 60)
{
PeopleCluster people1 = key1.People;
PeopleCluster people2 = key2.People;
PeopleFeatureInfo peopleFeatureInfo2 = personVehicle2.List[minIndex];
string key = $"{string.Join(",", people1.ClusterIds)}_{string.Join(",", people2.ClusterIds)}"; ;
SameVehicleInfo accompanyInfo;
if (dict.ContainsKey(key))
{
accompanyInfo = dict[key];
}
else
{
accompanyInfo = new SameVehicleInfo();
dict.Add(key, accompanyInfo);
}
accompanyInfo.People1 = people1;
accompanyInfo.People2 = people2;
SameVehicleItem accompanyItem = new SameVehicleItem();
accompanyItem.Info1 = peopleFeature1;
accompanyItem.Info2 = peopleFeatureInfo2;
accompanyInfo.List.Add(accompanyItem);
accompanyInfo.Count++;
resultList.Add(accompanyInfo);
}
}
}
}
}
resultList = resultList.FindAll(a => a.Count >= kpCountThreshold);
//篩選,排除xxx
resultList = resultList.FindAll(a =>
{
if (string.Join(",", a.People1.ClusterIds) == string.Join(",", a.People2.ClusterIds))
{
return false;
}
return true;
});
//去重
int beforeDistinctCount = resultList.Count;
resultList = resultList.DistinctBy(a =>
{
string str1 = string.Join(",", a.People1.ClusterIds);
string str2 = string.Join(",", a.People2.ClusterIds);
StringBuilder sb = new StringBuilder();
foreach (SameVehicleItem item in a.List)
{
var info2 = item.Info2;
sb.Append($"{info2.camera_id},{info2.captured_time},{info2.cluster_id}");
}
return $"{str1}_{str2}_{sb}";
}).ToList();
//排序
foreach (SameVehicleInfo item in resultList)
{
item.List.Sort((a, b) => -string.Compare(a.Info1.captured_time, b.Info1.captured_time));
}
sw.Stop();
string msg = $"xxx查詢,耗時:{sw.Elapsed.TotalSeconds:0.000} 秒,查詢次數:{tasks1.Count + tasks2.Count},去重:{beforeDistinctCount}-->{resultList.Count}";
Console.WriteLine(msg);
LogUtil.Info(msg);
return resultList;
}
未完,待補充
XXX
- 我們開發的低代碼平臺很牛B,C#:我就是低代碼!
- 我們開發的平臺很牛B,支持寫腳本、自定義腳本,C#:我就是腳本!
- 我們用spark、flink分佈式,性能牛B,C#:並行異步性能吊炸天,內存給大些,單機就可以。C#:我的並行異步的性能,能把es幹掛,只要不是計算密集型,只要內存夠,不用spark、flink,單機簡單啊,只要es是集羣就行了。