場景:輸入一個矩陣,返回倒排索引後的矩陣。矩陣值是連續的,需要分箱。
def genInvertedIndex(X, bin_len=0.1):
# parameter: X ,numpy array (n*m)
# bin_len, float, discretize the continuous value with bins
# output: X_i, numpy array (w*n), w is the length of bins
con_values = sorted(list(set(X.flatten().tolist())))
bins = con_values#np.arange(con_values[0],con_values[-1],bin_len)
X_i = np.zeros((len(bins)+1,X.shape[0]+1))
for i in range(X.shape[0]):
for val in X[i].tolist():
X_i[np.digitize(val,bins),i]=1
return X_i
測試代碼:
X = np.array(X)
print(X)
X_i = genInvertedIndex(X)
print(X_i)
結果:
[[-0.355 -0.36 -0.375 ... -0.17 -0.17 -0.165]
[-0.09 -0.06 -0.005 ... -0.12 -0.1 -0.1 ]
[-0.085 -0.105 -0.14 ... -0.315 -0.325 -0.33 ]
...
[-0.485 -0.38 -0.25 ... -0.425 -0.43 -0.44 ]
[-0.085 -0.105 -0.13 ... -0.38 -0.38 -0.355]
[-0.405 -0.395 -0.4 ... -0.415 -0.41 -0.38 ]]
[[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 1. 0.]
...
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]
[0. 0. 0. ... 0. 0. 0.]]