如何用Python構建機器學習模型？

原創

2021-05-20 16:03

{"type":"doc","content":[{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"本文，我們將通過 Python 語言包，來構建一些機器學習模型。"}]}]},{"type":"heading","attrs":{"align":null,"level":2},"content":[{"type":"text","text":"構建機器學習模型的模板"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"該 Notebook 包含了用於創建主要機器學習算法所需的代碼模板。在 scikit-learn 中，我們已經準備好了幾個算法。只需調整參數，給它們輸入數據，進行訓練，生成模型，最後進行預測。"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"1.線性迴歸"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"對於線性迴歸，我們需要從 sklearn 庫中導入 linear_model。我們準備好訓練和測試數據，然後將預測模型實例化爲一個名爲線性迴歸 LinearRegression 算法的對象，它是 linear_model 包的一個類，從而創建預測模型。之後我們利用擬合函數對算法進行訓練，並利用得分來評估模型。最後，我們將係數打印出來，用模型進行新的預測。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn import linear_model\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted_variable\n\nx_test = test_dataset_precictor_variables\n\n# Create linear regression object\nlinear = linear_model.LinearRegression()\n\n# Train the model with training data and check the score\nlinear.fit(x_train, y_train)\nlinear.score(x_train, y_train)\n\n# Collect coefficients\nprint('Coefficient: \\n', linear.coef_)\nprint('Intercept: \\n', linear.intercept_)\n\n# Make predictions\npredicted_values = linear.predict(x_test)"}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"2.邏輯迴歸"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在本例中，從線性迴歸到邏輯迴歸唯一改變的是我們要使用的算法。我們將 LinearRegression 改爲 LogisticRegression。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.linear_model import LogisticRegression\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted_variable\n\nx_test = test_dataset_precictor_variables\n\n# Create logistic regression object\nmodel = LogisticRegression()\n\n# Train the model with training data and checking the score\nmodel.fit(x_train, y_train)\nmodel.score(x_train, y_train)\n\n# Collect coefficients\nprint('Coefficient: \\n', model.coef_)\nprint('Intercept: \\n', model.intercept_)\n\n# Make predictions\npredicted_vaues = model.predict(x_teste)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"3.決策樹"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"我們再次將算法更改爲 DecisionTreeRegressor："}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn import tree\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted_variable\n\nx_test = test_dataset_precictor_variables\n\n# Create Decision Tree Regressor Object\nmodel = tree.DecisionTreeRegressor()\n\n# Create Decision Tree Classifier Object\nmodel = tree.DecisionTreeClassifier()\n\n# Train the model with training data and checking the score\nmodel.fit(x_train, y_train)\nmodel.score(x_train, y_train)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"4.樸素貝葉斯"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"我們再次將算法更改爲 DecisionTreeRegressor："}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.naive_bayes import GaussianNB\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Create GaussianNB object\nmodel = GaussianNB()\n\n# Train the model with training data \nmodel.fit(x_train, y_train)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"5.支持向量機"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在本例中，我們使用 SVM 庫的 SVC 類。如果是 SVR，它就是一個迴歸函數："}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn import svm\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Create SVM Classifier object \nmodel = svm.svc()\n\n# Train the model with training data and checking the score\nmodel.fit(x_train, y_train)\nmodel.score(x_train, y_train)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"6.K- 最近鄰"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"在 KneighborsClassifier 算法中，我們有一個超參數叫做 n_neighbors，就是我們對這個算法進行調整。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.neighbors import KNeighborsClassifier\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Create KNeighbors Classifier Objects \nKNeighborsClassifier(n_neighbors = 6) # default value = 5\n\n# Train the model with training data\nmodel.fit(x_train, y_train)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"7.K- 均值"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.cluster import KMeans\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Create KMeans objects \nk_means = KMeans(n_clusters = 3, random_state = 0)\n\n# Train the model with training data\nmodel.fit(x_train)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"8.隨機森林"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Create Random Forest Classifier objects \nmodel = RandomForestClassifier()\n\n# Train the model with training data \nmodel.fit(x_train, x_test)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"9.降維"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn import decomposition\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Creating PCA decomposition object\npca = decomposition.PCA(n_components = k)\n\n# Creating Factor analysis decomposition object\nfa = decomposition.FactorAnalysis()\n\n# Reduc the size of the training set using PCA\nreduced_train = pca.fit_transform(train)\n\n# Reduce the size of the training set using PCA\nreduced_test = pca.transform(test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"10.梯度提升和 AdaBoost"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"python"},"content":[{"type":"text","text":"# Import modules\nfrom sklearn.ensemble import GradientBoostingClassifier\n\n# Create training and test subsets\nx_train = train_dataset_predictor_variables\ny_train = train_dataset_predicted variable\n\nx_test = test_dataset_precictor_variables\n\n# Creating Gradient Boosting Classifier object\nmodel = GradientBoostingClassifier(n_estimators = 100, learning_rate = 1.0, max_depth = 1, random_state = 0)\n\n# Training the model with training data \nmodel.fit(x_train, x_test)\n\n# Make predictions\npredicted_values = model.predict(x_test)"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"我們的工作將是把這些算法中的每一個塊轉化爲一個項目。首先，定義一個業務問題，對數據進行預處理，訓練算法，調整超參數，獲得可驗證的結果，在這個過程中不斷迭代，直到我們達到滿意的精度，做出理想的預測。"}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong"}],"text":"原文鏈接："}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"https:\/\/levelup.gitconnected.com\/10-templates-for-building-machine-learning-models-with-notebook-282c4eb0987f"}]}]}