199 lines
6.0 KiB
Plaintext
199 lines
6.0 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 导入必要的库\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import numpy as np\n",
|
||
"from sklearn.datasets import make_classification\n",
|
||
"from sklearn.linear_model import LogisticRegression\n",
|
||
"from sklearn.metrics import accuracy_score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def generate_and_plot_dataset():\n",
|
||
" # 生成一个逻辑回归的数据集\n",
|
||
" x, y = make_classification(n_samples=100, n_features=2, \n",
|
||
" n_informative=2, n_redundant=0, \n",
|
||
" n_clusters_per_class=1, random_state=42)\n",
|
||
" # 可视化数据集\n",
|
||
" plt.scatter(x[:, 0], x[:, 1], c=y, cmap='viridis')\n",
|
||
" plt.xlabel('Feature 1')\n",
|
||
" plt.ylabel('Feature 2')\n",
|
||
" plt.title('Logistic Regression Dataset')\n",
|
||
" plt.show()\n",
|
||
" return x,y\n",
|
||
"x,y = generate_and_plot_dataset()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"def logistic_regression_analysis(x, y):\n",
|
||
" # 创建逻辑回归模型\n",
|
||
" model = LogisticRegression()\n",
|
||
"\n",
|
||
" # 拟合模型\n",
|
||
" model.fit(x, y)\n",
|
||
"\n",
|
||
" # 预测\n",
|
||
" y_pred = model.predict(x)\n",
|
||
"\n",
|
||
" # 计算准确率\n",
|
||
" accuracy = accuracy_score(y, y_pred)\n",
|
||
" print(f'模型准确率: {accuracy:.2f}')\n",
|
||
"\n",
|
||
" # 绘制决策边界\n",
|
||
" # 创建网格以绘制决策边界\n",
|
||
" xx, yy = np.meshgrid(np.arange(x[:, 0].min() - 1, x[:, 0].max() + 1, 0.01),\n",
|
||
" np.arange(x[:, 1].min() - 1, x[:, 1].max() + 1, 0.01))\n",
|
||
"\n",
|
||
" # 预测网格点的类别\n",
|
||
" Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
|
||
" Z = Z.reshape(xx.shape)\n",
|
||
"\n",
|
||
" # 绘制决策边界\n",
|
||
" plt.contourf(xx, yy, Z, alpha=0.8, cmap='viridis')\n",
|
||
" plt.scatter(x[:, 0], x[:, 1], c=y, edgecolors='k', marker='o', cmap='viridis')\n",
|
||
" plt.xlabel('Feature 1')\n",
|
||
" plt.ylabel('Feature 2')\n",
|
||
" plt.title('Logistic Regression Decision Boundary')\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"# 调用函数\n",
|
||
"logistic_regression_analysis(x, y)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 手动实现\n",
|
||
"$$\n",
|
||
"f_{\\vec{w},b}(x) = g(z) = \\frac{1}{1 + e^{-(\\vec{w} \\cdot \\vec{x} + b)}}\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"J(\\vec{w},b) = -\\frac{1}{m} \\sum_{i=1}^{m} \\left( y^{(i)}\\ln(f_{\\vec{w},b}(x^{(i)})) + (1-y^{(i)})\\ln(1-f_{\\vec{w},b}(x^{(i)})) \\right)\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"w_j = w_j - \\alpha \\frac{\\partial J(\\vec{w},b)}{\\partial w_j}\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"b = b - \\alpha \\frac{\\partial J(\\vec{w},b)}{\\partial b}\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"\\frac{\\partial J(\\vec{w},b)}{\\partial w_j} = \\frac{1}{m} \\sum_{i=1}^{m} (f_{\\vec{w},b}(x^{(i)}) - y^{(i)})x_j^{(i)}\n",
|
||
"$$\n",
|
||
"\n",
|
||
"$$\n",
|
||
"\\frac{\\partial J(\\vec{w},b)}{\\partial b} = \\frac{1}{m} \\sum_{i=1}^{m} (f_{\\vec{w},b}(x^{(i)}) - y^{(i)})\n",
|
||
"$$"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def compute_cost_vectorized(w, b):\n",
|
||
" m = x.shape[0]\n",
|
||
" z = np.dot(x, w) + b\n",
|
||
" f_wb = sigmoid(z)\n",
|
||
" cost = (-1/m) * np.sum(y * np.log(f_wb) + (1 - y) * np.log(1 - f_wb))\n",
|
||
" return cost\n",
|
||
" \n",
|
||
"def sigmoid(z):\n",
|
||
" return 1/(1+np.exp(-z))\n",
|
||
" \n",
|
||
"def gradient_descent(w, b, alpha, num_iterations):\n",
|
||
" m = len(x)\n",
|
||
" for i in range(num_iterations):\n",
|
||
" z = np.dot(w, x.T) + b\n",
|
||
" f_wb = sigmoid(z)\n",
|
||
" w -= alpha * 1/m * np.dot(x.T, (f_wb-y))\n",
|
||
" b -= alpha * 1/m * np.sum(f_wb-y)\n",
|
||
" return w, b"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"if __name__ == \"__main__\":\n",
|
||
" w = np.zeros(x.shape[1])\n",
|
||
" b = 0\n",
|
||
" alpha = 0.04\n",
|
||
" num_iterations = 50000\n",
|
||
" w, b = gradient_descent(w, b, alpha, num_iterations)\n",
|
||
" print(w, b)\n",
|
||
" loss = compute_cost_vectorized(w, b)\n",
|
||
" print(loss)\n",
|
||
" \n",
|
||
" plt.figure(dpi=600)\n",
|
||
" # 绘制数据点\n",
|
||
" plt.scatter(x[:, 0], x[:, 1], c=y, cmap='viridis', edgecolors='k')\n",
|
||
"\n",
|
||
" # 计算决策边界\n",
|
||
" x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1\n",
|
||
" y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1\n",
|
||
" xx = np.linspace(x_min, x_max, 100)\n",
|
||
" # 计算对应的y值: w1*x + w2*y + b = 0 => y = -(w1*x + b)/w2\n",
|
||
" if w[1] != 0:\n",
|
||
" yy = -(w[0] * xx + b) / w[1]\n",
|
||
" plt.plot(xx, yy, color='red', label='Decision Boundary')\n",
|
||
" else:\n",
|
||
" # 当w2=0时,决策边界为垂直线x = -b/w1\n",
|
||
" x_boundary = -b / w[0]\n",
|
||
" plt.axvline(x=x_boundary, color='red', label='Decision Boundary')\n",
|
||
"\n",
|
||
" plt.xlabel('Feature 1')\n",
|
||
" plt.ylabel('Feature 2')\n",
|
||
" plt.title('Logistic Regression Decision Boundary')\n",
|
||
" plt.legend()\n",
|
||
" plt.show()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "pt",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.14"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|