import pandas as pd
df = pd.DataFrame({"key": ["aa", "bb", "cc"]})
dict = {"aa": 1, "bb": 3, "cc": 2}
df_map = df["key"].map(dict)
print("raw:\n", df.head())
print("mapped:\n", df_map)
raw:
key
0 aa
1 bb
2 cc
mapped:
0 1
1 3
2 2
Name: key, dtype: int64
from sklearn.preprocessing import LabelEncoder
df = pd.DataFrame({"f1": ["aa", "bb", "cc"],"f2": ["bb", "bb", "cc1"]})
co_feature = pd.DataFrame()
dict_data = {}
index = 0
for col in ["f1","f2"]:
le = LabelEncoder()
le.fit(df[col])
row = le.transform(df[col])
dict_data[col] = row + index
index = max(row) + 1
print(dict_data)
編碼結果:
{'f1': array([0, 1, 2]), 'f2': array([3, 3, 4])}