{ "cells": [ { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "# 引入库\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 检查os位置\n", "print(os.getcwd())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "生成数据方式\n", "$$ x_1 = x, x_2 = x^2, x_3 = \\sqrt{x} $$\n", "$$ y = 1.35x_1 + 0.75x_2 + 2.1x_3 + 2.89 $$" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "# 生成数据\n", "def generate_data():\n", " w = np.array([1.35, 0.75, 2.1]) # 权重\n", " b = 2.89 # 偏置\n", " x_min = 1\n", " x_max = 9\n", " x = np.linspace(x_min, x_max, 10) # 均匀分布\n", " X = np.array([x, x**2, np.sqrt(x)]) # 特征矩阵3x10\n", " y = np.dot(w, X) + b # 1x10 一维向量不区分行向量和列向量\n", " y += np.random.normal(scale=0.5, size=y.shape)\n", " data = np.column_stack((X.T, y)) # 10x4\n", " return data\n", "\n", "# 保存数据\n", "def save_data(filename, data):\n", " np.savetxt(filename, data, delimiter=',')\n", " print(f\"{filename} 已成功创建并写入数据。\")\n", "\n", "# 生成并保存数据\n", "data = generate_data()\n", "#save_data('./1_data.txt', data)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "# 读取数据\n", "#points = np.genfromtxt(\"./1_data.txt\", delimiter=',')\n", "\n", "scaler = StandardScaler()\n", "points = data\n", "\n", "m = len(points[:,0])\n", "x = points[:, :3] # 10x3\n", "y = points[:,3] # 1x10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "原函数:\n", "$$\n", "\\vec{w} = {\\begin{bmatrix} w_1 & w_2 & w_3 & \\cdots & w_n \\end{bmatrix}}^T\n", "$$\n", "\n", "$$\n", "\\vec{x} = \\begin{bmatrix} x_1 & x_2 & x_3 & \\cdots & x_n \\end{bmatrix}\n", "$$\n", "\n", "$$\n", "f_{\\vec{w} \\cdot,b}(\\vec{x}) = \\vec{w} \\cdot \\vec{x} + b\n", "$$\n", "\n", "损失函数: \n", "\n", "$$\n", "\\text{MSE} = \\frac{1}{2m} \\sum_{i=1}^{m} \\left( y^{(i)} - \\hat{y}^{(i)} \\right)^2\n", "$$\n", "\n", "梯度下降:\n", "\n", "分别对每个w和b求偏导数,然后更新w和b\n", " " ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "# 定义损失函数\n", "def compute_loss(w, b):\n", " return np.sum((y - (np.dot(w, x.T) + b)) ** 2) / (2 * m) # w 1x3 x.T 3x10 y 1x10 y-np.dot(w, x.T) 1x10 sum=number\n", "\n", "# 定义梯度下降\n", "def gradient_descent(w, b, alpha, num_iter):\n", " loss_history = []\n", " for _ in range(num_iter):\n", " error = y - np.dot(w, x.T) - b # 1x10\n", " # 计算梯度\n", " dw = -np.dot(x.T , error) / m # dw 1x3 \n", " db = -np.sum(error) / m # db 1x1\n", " # 更新w和b\n", " w -= alpha * dw\n", " b -= alpha * db\n", " loss_history.append(compute_loss(w, b))\n", " return w, b, loss_history" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if __name__ == \"__main__\":\n", " # 初始化w和b\n", " w = np.zeros(x.shape[1])\n", " b = 0.0\n", " # 设置学习率\n", " alpha = 0.01\n", " # 设置迭代次数\n", " num_iter = 10000\n", " x = scaler.fit_transform(x) # 特征缩放\n", " # 进行梯度下降\n", " w, b, loss_history = gradient_descent(w, b, alpha, num_iter)\n", " print(\"w:\", w)\n", " print(\"b:\", b)\n", " \n", " # 计算损失\n", " loss = compute_loss(w, b)\n", " print(\"loss:\", loss)\n", "\n", " # 绘制\n", " plt.figure(dpi=600)\n", " plt.plot(range(num_iter), loss_history)\n", " plt.xlabel('Iteration')\n", " plt.ylabel('Loss')\n", " plt.title('Loss History')\n", " # 显示学习率\n", " plt.text(num_iter * 0.6, max(loss_history) * 0.7, f'Learning Rate: {alpha}', fontsize=12, color='blue')\n", "\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 预测\n", "def predict(x, w, b):\n", " x_scaled = scaler.transform(x)\n", " y = np.dot(w, x_scaled.T) + b\n", " return y\n", "\n", "def original_predict(x):\n", " w = np.array([1.35, 0.75, 2.1])\n", " b = 2.89\n", " return np.dot(w, x.T) + b\n", "\n", "x_new = np.array([\n", " [2, 4, np.sqrt(2)],\n", " [3, 9, np.sqrt(3)],\n", " [4, 16, np.sqrt(4)]\n", "])\n", "y_pred = predict(x_new, w, b)\n", "print(\"预测值:\", y_pred)\n", "y_pred_original = original_predict(x_new)\n", "print(\"原始值:\", y_pred_original)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 编码中遇到的错误\n", "\n", "梯度下降算法中,把x.T和error相乘了,正确应使用矩阵乘法。\n", "\n", "在特征缩放前,学习率大之后会overflow,导致模型不收敛。\n", "\n", "特征缩放后,由于代码写错,把结果也缩放了,导致后面预测时,结果不正确。" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.metrics import mean_squared_error\n", "\n", "model = make_pipeline(LinearRegression())\n", "# 拟合模型\n", "model.fit(x, y)\n", "\n", "# 预测\n", "y_pred_sklearn = model.predict(scaler.transform(x_new))\n", "print(\"使用sklearn预测值:\", y_pred_sklearn)\n", "y_pred_original = original_predict(x_new)\n", "print(\"原始值:\", y_pred_original)\n", "# 计算并打印训练数据的损失(MSE)\n", "train_loss = mean_squared_error(y_pred_original, y_pred_sklearn)\n", "print(\"训练数据的损失 (MSE):\", train_loss)\n" ] } ], "metadata": { "kernelspec": { "display_name": "pt", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }