241005
@@ -39,7 +39,7 @@ def gradient_descent_runner(points, starting_b, starting_w, learningRate, num_it
|
||||
|
||||
def run():
|
||||
points_np = np.genfromtxt("data1.csv", delimiter=',').astype(np.float32)
|
||||
points = torch.tensor(points_np, device='cuda:5')
|
||||
points = torch.tensor(points_np, device='cuda')
|
||||
learning_rate = 0.0001
|
||||
initial_b = 0.0
|
||||
initial_w = 0.0
|
||||
|
||||
BIN
linear regression/m1.pth
Normal file
76
linear regression/m1.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
# 检查是否支持MPS(Apple Metal Performance Shaders)
|
||||
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
||||
print(f"使用设备: {device}")
|
||||
|
||||
# 生成示例数据
|
||||
# y = 3x + 2 + 噪声
|
||||
torch.manual_seed(0)
|
||||
X = torch.linspace(-10, 10, steps=100).reshape(-1, 1)
|
||||
y = 3 * X + 2 + torch.randn(X.size()) * 2
|
||||
|
||||
# 创建数据集和数据加载器
|
||||
dataset = TensorDataset(X, y)
|
||||
dataloader = DataLoader(dataset, batch_size=10, shuffle=True)
|
||||
|
||||
|
||||
# 定义线性回归模型
|
||||
class LinearRegressionModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(LinearRegressionModel, self).__init__()
|
||||
self.linear = nn.Linear(1, 1) # 输入和输出都是1维
|
||||
|
||||
def forward(self, x):
|
||||
return self.linear(x)
|
||||
|
||||
|
||||
# 实例化模型并移动到设备
|
||||
model = LinearRegressionModel().to(device)
|
||||
|
||||
# 定义损失函数和优化器
|
||||
criterion = nn.MSELoss()
|
||||
optimizer = optim.SGD(model.parameters(), lr=0.01)
|
||||
|
||||
# 训练模型
|
||||
num_epochs = 100
|
||||
for epoch in range(num_epochs):
|
||||
for batch_X, batch_y in dataloader:
|
||||
batch_X = batch_X.to(device)
|
||||
batch_y = batch_y.to(device)
|
||||
|
||||
# 前向传播
|
||||
outputs = model(batch_X)
|
||||
loss = criterion(outputs, batch_y)
|
||||
|
||||
# 反向传播和优化
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (epoch + 1) % 10 == 0:
|
||||
print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")
|
||||
|
||||
# 保存整个模型
|
||||
torch.save(model.state_dict(), 'm1.pth')
|
||||
print("整个模型已保存为 m1.pth")
|
||||
|
||||
# 评估模型
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
X_test = torch.linspace(-10, 10, steps=100).reshape(-1, 1).to(device)
|
||||
y_pred = model(X_test).cpu()
|
||||
|
||||
|
||||
plt.scatter(X.numpy(), y.numpy(), label='真实数据')
|
||||
plt.plot(X_test.cpu().numpy(), y_pred.numpy(), color='red', label='预测线')
|
||||
plt.legend()
|
||||
plt.xlabel('X')
|
||||
plt.ylabel('y')
|
||||
plt.title('线性回归结果')
|
||||
plt.show()
|
||||
56
linear regression/m1test.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
# 定义线性回归模型结构
|
||||
class LinearRegressionModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(LinearRegressionModel, self).__init__()
|
||||
self.linear = nn.Linear(1, 1) # 输入和输出都是1维
|
||||
|
||||
def forward(self, x):
|
||||
return self.linear(x)
|
||||
|
||||
|
||||
def main():
|
||||
# 检查是否支持MPS(Apple Metal Performance Shaders)
|
||||
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
|
||||
print(f"使用设备: {device}")
|
||||
|
||||
# 实例化模型并加载保存的模型参数
|
||||
model = LinearRegressionModel().to(device)
|
||||
model.load_state_dict(torch.load('m1.pth'))
|
||||
with open('m1.pth', 'rb') as f:
|
||||
f.seek(0, 2)
|
||||
size = f.tell()
|
||||
print(f"模型文件大小: {size} 字节")
|
||||
model.eval()
|
||||
# 输出模型大小
|
||||
model_size = sum(p.numel() for p in model.parameters())
|
||||
print(f"模型大小: {model_size} 个参数")
|
||||
print("模型参数已加载")
|
||||
|
||||
# 生成测试数据
|
||||
X_test = torch.linspace(-10, 10, steps=100).reshape(-1, 1).to(device)
|
||||
|
||||
# 使用加载的模型进行预测
|
||||
with torch.no_grad():
|
||||
y_pred = model(X_test).cpu()
|
||||
|
||||
# 将测试数据移至CPU并转换为NumPy数组
|
||||
X_test_numpy = X_test.cpu().numpy()
|
||||
y_pred_numpy = y_pred.numpy()
|
||||
|
||||
# 可视化预测结果
|
||||
plt.scatter(X_test_numpy, 3 * X_test_numpy + 2, label='真实线性关系', color='blue')
|
||||
plt.plot(X_test_numpy, y_pred_numpy, color='red', label='模型预测线')
|
||||
plt.legend()
|
||||
plt.xlabel('X')
|
||||
plt.ylabel('y')
|
||||
plt.title('加载模型后的线性回归预测结果')
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
9
mlcode/22.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import numpy as np
|
||||
|
||||
w = np.array([1, 2, 3])
|
||||
b = 4
|
||||
x = np.array([10, 20, 30])
|
||||
|
||||
f = np.dot(w, x) + b
|
||||
|
||||
print(f)
|
||||
@@ -1,52 +0,0 @@
|
||||
# Line properties
|
||||
lines.linewidth: 4
|
||||
lines.solid_capstyle: butt
|
||||
|
||||
# Legend properties
|
||||
legend.fancybox: true
|
||||
|
||||
# Color and cycle properties
|
||||
axes.prop_cycle: cycler('color', ['#0096FF', '#FF9300', '#FF40FF', '#7030A0', '#C00000'])
|
||||
axes.facecolor: '#ffffff' # white
|
||||
axes.labelsize: large
|
||||
axes.axisbelow: true
|
||||
axes.grid: False
|
||||
axes.edgecolor: '#f0f0f0'
|
||||
axes.linewidth: 3.0
|
||||
axes.titlesize: x-large
|
||||
|
||||
# Patch properties
|
||||
patch.edgecolor: '#f0f0f0'
|
||||
patch.linewidth: 0.5
|
||||
|
||||
# SVG properties
|
||||
svg.fonttype: path
|
||||
|
||||
# Grid properties
|
||||
grid.linestyle: '-'
|
||||
grid.linewidth: 1.0
|
||||
grid.color: '#cbcbcb'
|
||||
|
||||
# Ticks properties
|
||||
xtick.major.size: 0
|
||||
xtick.minor.size: 0
|
||||
ytick.major.size: 0
|
||||
ytick.minor.size: 0
|
||||
|
||||
# Savefig properties
|
||||
savefig.edgecolor: '#f0f0f0'
|
||||
savefig.facecolor: '#f0f0f0'
|
||||
|
||||
# Figure properties
|
||||
figure.facecolor: '#ffffff' # white
|
||||
|
||||
# Font properties
|
||||
font.family: sans-serif
|
||||
font.style: normal
|
||||
font.variant: normal
|
||||
font.weight: normal
|
||||
font.stretch: normal
|
||||
font.size: 8.0
|
||||
|
||||
# Text properties
|
||||
text.color: black
|
||||
916
week2/C1_W2_Lab01_Python_Numpy_Vectorization_Soln.ipynb
Normal file
@@ -0,0 +1,916 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Optional Lab: Python, NumPy and Vectorization\n",
|
||||
"A brief introduction to some of the scientific computing used in this course. In particular the NumPy scientific computing package and its use with python.\n",
|
||||
"\n",
|
||||
"# Outline\n",
|
||||
"- [ 1.1 Goals](#toc_40015_1.1)\n",
|
||||
"- [ 1.2 Useful References](#toc_40015_1.2)\n",
|
||||
"- [2 Python and NumPy <a name='Python and NumPy'></a>](#toc_40015_2)\n",
|
||||
"- [3 Vectors](#toc_40015_3)\n",
|
||||
"- [ 3.1 Abstract](#toc_40015_3.1)\n",
|
||||
"- [ 3.2 NumPy Arrays](#toc_40015_3.2)\n",
|
||||
"- [ 3.3 Vector Creation](#toc_40015_3.3)\n",
|
||||
"- [ 3.4 Operations on Vectors](#toc_40015_3.4)\n",
|
||||
"- [4 Matrices](#toc_40015_4)\n",
|
||||
"- [ 4.1 Abstract](#toc_40015_4.1)\n",
|
||||
"- [ 4.2 NumPy Arrays](#toc_40015_4.2)\n",
|
||||
"- [ 4.3 Matrix Creation](#toc_40015_4.3)\n",
|
||||
"- [ 4.4 Operations on Matrices](#toc_40015_4.4)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np # it is an unofficial standard to use np for numpy\n",
|
||||
"import time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_1.1\"></a>\n",
|
||||
"## 1.1 Goals\n",
|
||||
"In this lab, you will:\n",
|
||||
"- Review the features of NumPy and Python that are used in Course 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_1.2\"></a>\n",
|
||||
"## 1.2 Useful References\n",
|
||||
"- NumPy Documentation including a basic introduction: [NumPy.org](https://NumPy.org/doc/stable/)\n",
|
||||
"- A challenging feature topic: [NumPy Broadcasting](https://NumPy.org/doc/stable/user/basics.broadcasting.html)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_2\"></a>\n",
|
||||
"# 2 Python and NumPy <a name='Python and NumPy'></a>\n",
|
||||
"Python is the programming language we will be using in this course. It has a set of numeric data types and arithmetic operations. NumPy is a library that extends the base capabilities of python to add a richer data set including more numeric types, vectors, matrices, and many matrix functions. NumPy and python work together fairly seamlessly. Python arithmetic operators work on NumPy data types and many NumPy functions will accept python data types.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3\"></a>\n",
|
||||
"# 3 Vectors\n",
|
||||
"<a name=\"toc_40015_3.1\"></a>\n",
|
||||
"## 3.1 Abstract\n",
|
||||
"<img align=\"right\" src=\"./images/C1_W2_Lab04_Vectors.PNG\" style=\"width:340px;\" >Vectors, as you will use them in this course, are ordered arrays of numbers. In notation, vectors are denoted with lower case bold letters such as $\\mathbf{x}$. The elements of a vector are all the same type. A vector does not, for example, contain both characters and numbers. The number of elements in the array is often referred to as the *dimension* though mathematicians may prefer *rank*. The vector shown has a dimension of $n$. The elements of a vector can be referenced with an index. In math settings, indexes typically run from 1 to n. In computer science and these labs, indexing will typically run from 0 to n-1. In notation, elements of a vector, when referenced individually will indicate the index in a subscript, for example, the $0^{th}$ element, of the vector $\\mathbf{x}$ is $x_0$. Note, the x is not bold in this case. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.2\"></a>\n",
|
||||
"## 3.2 NumPy Arrays\n",
|
||||
"\n",
|
||||
"NumPy's basic data structure is an indexable, n-dimensional *array* containing elements of the same type (`dtype`). Right away, you may notice we have overloaded the term 'dimension'. Above, it was the number of elements in the vector, here, dimension refers to the number of indexes of an array. A one-dimensional or 1-D array has one index. In Course 1, we will represent vectors as NumPy 1-D arrays. \n",
|
||||
"\n",
|
||||
" - 1-D array, shape (n,): n elements indexed [0] through [n-1]\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.3\"></a>\n",
|
||||
"## 3.3 Vector Creation\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Data creation routines in NumPy will generally have a first parameter which is the shape of the object. This can either be a single value for a 1-D result or a tuple (n,m,...) specifying the shape of the result. Below are examples of creating vectors using these routines."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"np.zeros(4) : a = [0. 0. 0. 0.], a shape = (4,), a data type = float64\n",
|
||||
"np.zeros(4,) : a = [0. 0. 0. 0.], a shape = (4,), a data type = float64\n",
|
||||
"np.random.random_sample(4): a = [0.04076162 0.01386783 0.16757523 0.9833998 ], a shape = (4,), a data type = float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill arrays with value\n",
|
||||
"a = np.zeros(4); print(f\"np.zeros(4) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.zeros((4,)); print(f\"np.zeros(4,) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.random.random_sample(4); print(f\"np.random.random_sample(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some data creation routines do not take a shape tuple:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"np.arange(4.): a = [0. 1. 2. 3.], a shape = (4,), a data type = float64\n",
|
||||
"np.random.rand(4): a = [0.37476357 0.06946621 0.79627083 0.96134986], a shape = (4,), a data type = float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill arrays with value but do not accept shape as input argument\n",
|
||||
"a = np.arange(4.); print(f\"np.arange(4.): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.random.rand(4); print(f\"np.random.rand(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"values can be specified manually as well. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"np.array([5,4,3,2]): a = [5 4 3 2], a shape = (4,), a data type = int64\n",
|
||||
"np.array([5.,4,3,2]): a = [5. 4. 3. 2.], a shape = (4,), a data type = float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill with user specified values\n",
|
||||
"a = np.array([5,4,3,2]); print(f\"np.array([5,4,3,2]): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.array([5.,4,3,2]); print(f\"np.array([5.,4,3,2]): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These have all created a one-dimensional vector `a` with four elements. `a.shape` returns the dimensions. Here we see a.shape = `(4,)` indicating a 1-d array with 4 elements. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4\"></a>\n",
|
||||
"## 3.4 Operations on Vectors\n",
|
||||
"Let's explore some operations using vectors.\n",
|
||||
"<a name=\"toc_40015_3.4.1\"></a>\n",
|
||||
"### 3.4.1 Indexing\n",
|
||||
"Elements of vectors can be accessed via indexing and slicing. NumPy provides a very complete set of indexing and slicing capabilities. We will explore only the basics needed for the course here. Reference [Slicing and Indexing](https://NumPy.org/doc/stable/reference/arrays.indexing.html) for more details. \n",
|
||||
"**Indexing** means referring to *an element* of an array by its position within the array. \n",
|
||||
"**Slicing** means getting a *subset* of elements from an array based on their indices. \n",
|
||||
"NumPy starts indexing at zero so the 3rd element of an vector $\\mathbf{a}$ is `a[2]`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0 1 2 3 4 5 6 7 8 9]\n",
|
||||
"a[2].shape: () a[2] = 2, Accessing an element returns a scalar\n",
|
||||
"a[-1] = 9\n",
|
||||
"The error message you'll see is:\n",
|
||||
"index 10 is out of bounds for axis 0 with size 10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#vector indexing operations on 1-D vectors\n",
|
||||
"a = np.arange(10)\n",
|
||||
"print(a)\n",
|
||||
"\n",
|
||||
"#access an element\n",
|
||||
"print(f\"a[2].shape: {a[2].shape} a[2] = {a[2]}, Accessing an element returns a scalar\")\n",
|
||||
"\n",
|
||||
"# access the last element, negative indexes count from the end\n",
|
||||
"print(f\"a[-1] = {a[-1]}\")\n",
|
||||
"\n",
|
||||
"#indexs must be within the range of the vector or they will produce and error\n",
|
||||
"try:\n",
|
||||
" c = a[10]\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"The error message you'll see is:\")\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.2\"></a>\n",
|
||||
"### 3.4.2 Slicing\n",
|
||||
"Slicing creates an array of indices using a set of three values (`start:stop:step`). A subset of values is also valid. Its use is best explained by example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a = [0 1 2 3 4 5 6 7 8 9]\n",
|
||||
"a[2:7:1] = [2 3 4 5 6]\n",
|
||||
"a[2:7:2] = [2 4 6]\n",
|
||||
"a[3:] = [3 4 5 6 7 8 9]\n",
|
||||
"a[:3] = [0 1 2]\n",
|
||||
"a[:] = [0 1 2 3 4 5 6 7 8 9]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#vector slicing operations\n",
|
||||
"a = np.arange(10)\n",
|
||||
"print(f\"a = {a}\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step)\n",
|
||||
"c = a[2:7:1]; print(\"a[2:7:1] = \", c)\n",
|
||||
"\n",
|
||||
"# access 3 elements separated by two \n",
|
||||
"c = a[2:7:2]; print(\"a[2:7:2] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements index 3 and above\n",
|
||||
"c = a[3:]; print(\"a[3:] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements below index 3\n",
|
||||
"c = a[:3]; print(\"a[:3] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements\n",
|
||||
"c = a[:]; print(\"a[:] = \", c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.3\"></a>\n",
|
||||
"### 3.4.3 Single vector operations\n",
|
||||
"There are a number of useful operations that involve operations on a single vector."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a : [1 2 3 4]\n",
|
||||
"b = -a : [-1 -2 -3 -4]\n",
|
||||
"b = np.sum(a) : 10\n",
|
||||
"b = np.mean(a): 2.5\n",
|
||||
"b = a**2 : [ 1 4 9 16]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.array([1,2,3,4])\n",
|
||||
"print(f\"a : {a}\")\n",
|
||||
"# negate elements of a\n",
|
||||
"b = -a \n",
|
||||
"print(f\"b = -a : {b}\")\n",
|
||||
"\n",
|
||||
"# sum all elements of a, returns a scalar\n",
|
||||
"b = np.sum(a) \n",
|
||||
"print(f\"b = np.sum(a) : {b}\")\n",
|
||||
"\n",
|
||||
"b = np.mean(a)\n",
|
||||
"print(f\"b = np.mean(a): {b}\")\n",
|
||||
"\n",
|
||||
"b = a**2\n",
|
||||
"print(f\"b = a**2 : {b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.4\"></a>\n",
|
||||
"### 3.4.4 Vector Vector element-wise operations\n",
|
||||
"Most of the NumPy arithmetic, logical and comparison operations apply to vectors as well. These operators work on an element-by-element basis. For example \n",
|
||||
"$$ c_i = a_i + b_i $$"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Binary operators work element wise: [0 0 6 8]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.array([ 1, 2, 3, 4])\n",
|
||||
"b = np.array([-1,-2, 3, 4])\n",
|
||||
"print(f\"Binary operators work element wise: {a + b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Of course, for this to work correctly, the vectors must be of the same size:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The error message you'll see is:\n",
|
||||
"operands could not be broadcast together with shapes (4,) (2,) \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#try a mismatched vector operation\n",
|
||||
"c = np.array([1, 2])\n",
|
||||
"try:\n",
|
||||
" d = a + c\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"The error message you'll see is:\")\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.5\"></a>\n",
|
||||
"### 3.4.5 Scalar Vector operations\n",
|
||||
"Vectors can be 'scaled' by scalar values. A scalar value is just a number. The scalar multiplies all the elements of the vector."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"b = 5 * a : [ 5 10 15 20]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"\n",
|
||||
"# multiply a by a scalar\n",
|
||||
"b = 5 * a \n",
|
||||
"print(f\"b = 5 * a : {b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.6\"></a>\n",
|
||||
"### 3.4.6 Vector Vector dot product\n",
|
||||
"The dot product is a mainstay of Linear Algebra and NumPy. This is an operation used extensively in this course and should be well understood. The dot product is shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"./images/C1_W2_Lab04_dot_notrans.gif\" width=800> "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The dot product multiplies the values in two vectors element-wise and then sums the result.\n",
|
||||
"Vector dot product requires the dimensions of the two vectors to be the same. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's implement our own version of the dot product below:\n",
|
||||
"\n",
|
||||
"**Using a for loop**, implement a function which returns the dot product of two vectors. The function to return given inputs $a$ and $b$:\n",
|
||||
"$$ x = \\sum_{i=0}^{n-1} a_i b_i $$\n",
|
||||
"Assume both `a` and `b` are the same shape."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def my_dot(a, b): \n",
|
||||
" \"\"\"\n",
|
||||
" Compute the dot product of two vectors\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" a (ndarray (n,)): input vector \n",
|
||||
" b (ndarray (n,)): input vector with same dimension as a\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" x (scalar): \n",
|
||||
" \"\"\"\n",
|
||||
" x=0\n",
|
||||
" for i in range(a.shape[0]):\n",
|
||||
" x = x + a[i] * b[i]\n",
|
||||
" return x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"my_dot(a, b) = 24\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# test 1-D\n",
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"b = np.array([-1, 4, 3, 2])\n",
|
||||
"print(f\"my_dot(a, b) = {my_dot(a, b)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note, the dot product is expected to return a scalar value. \n",
|
||||
"\n",
|
||||
"Let's try the same operations using `np.dot`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NumPy 1-D np.dot(a, b) = 24, np.dot(a, b).shape = () \n",
|
||||
"NumPy 1-D np.dot(b, a) = 24, np.dot(a, b).shape = () \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# test 1-D\n",
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"b = np.array([-1, 4, 3, 2])\n",
|
||||
"c = np.dot(a, b)\n",
|
||||
"print(f\"NumPy 1-D np.dot(a, b) = {c}, np.dot(a, b).shape = {c.shape} \") \n",
|
||||
"c = np.dot(b, a)\n",
|
||||
"print(f\"NumPy 1-D np.dot(b, a) = {c}, np.dot(a, b).shape = {c.shape} \")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Above, you will note that the results for 1-D matched our implementation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.7\"></a>\n",
|
||||
"### 3.4.7 The Need for Speed: vector vs for loop\n",
|
||||
"We utilized the NumPy library because it improves speed memory efficiency. Let's demonstrate:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"np.dot(a, b) = 2501072.5817\n",
|
||||
"Vectorized version duration: 197.7768 ms \n",
|
||||
"my_dot(a, b) = 2501072.5817\n",
|
||||
"loop version duration: 8699.8875 ms \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.random.seed(1)\n",
|
||||
"a = np.random.rand(10000000) # very large arrays\n",
|
||||
"b = np.random.rand(10000000)\n",
|
||||
"\n",
|
||||
"tic = time.time() # capture start time\n",
|
||||
"c = np.dot(a, b)\n",
|
||||
"toc = time.time() # capture end time\n",
|
||||
"\n",
|
||||
"print(f\"np.dot(a, b) = {c:.4f}\")\n",
|
||||
"print(f\"Vectorized version duration: {1000*(toc-tic):.4f} ms \")\n",
|
||||
"\n",
|
||||
"tic = time.time() # capture start time\n",
|
||||
"c = my_dot(a,b)\n",
|
||||
"toc = time.time() # capture end time\n",
|
||||
"\n",
|
||||
"print(f\"my_dot(a, b) = {c:.4f}\")\n",
|
||||
"print(f\"loop version duration: {1000*(toc-tic):.4f} ms \")\n",
|
||||
"\n",
|
||||
"del(a);del(b) #remove these big arrays from memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"So, vectorization provides a large speed up in this example. This is because NumPy makes better use of available data parallelism in the underlying hardware. GPU's and modern CPU's implement Single Instruction, Multiple Data (SIMD) pipelines allowing multiple operations to be issued in parallel. This is critical in Machine Learning where the data sets are often very large."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_12345_3.4.8\"></a>\n",
|
||||
"### 3.4.8 Vector Vector operations in Course 1\n",
|
||||
"Vector Vector operations will appear frequently in course 1. Here is why:\n",
|
||||
"- Going forward, our examples will be stored in an array, `X_train` of dimension (m,n). This will be explained more in context, but here it is important to note it is a 2 Dimensional array or matrix (see next section on matrices).\n",
|
||||
"- `w` will be a 1-dimensional vector of shape (n,).\n",
|
||||
"- we will perform operations by looping through the examples, extracting each example to work on individually by indexing X. For example:`X[i]`\n",
|
||||
"- `X[i]` returns a value of shape (n,), a 1-dimensional vector. Consequently, operations involving `X[i]` are often vector-vector. \n",
|
||||
"\n",
|
||||
"That is a somewhat lengthy explanation, but aligning and understanding the shapes of your operands is important when performing vector operations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"X[1] has shape (1,)\n",
|
||||
"w has shape (1,)\n",
|
||||
"c has shape ()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# show common Course 1 example\n",
|
||||
"X = np.array([[1],[2],[3],[4]])\n",
|
||||
"w = np.array([2])\n",
|
||||
"c = np.dot(X[1], w)\n",
|
||||
"\n",
|
||||
"print(f\"X[1] has shape {X[1].shape}\")\n",
|
||||
"print(f\"w has shape {w.shape}\")\n",
|
||||
"print(f\"c has shape {c.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4\"></a>\n",
|
||||
"# 4 Matrices\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.1\"></a>\n",
|
||||
"## 4.1 Abstract\n",
|
||||
"Matrices, are two dimensional arrays. The elements of a matrix are all of the same type. In notation, matrices are denoted with capitol, bold letter such as $\\mathbf{X}$. In this and other labs, `m` is often the number of rows and `n` the number of columns. The elements of a matrix can be referenced with a two dimensional index. In math settings, numbers in the index typically run from 1 to n. In computer science and these labs, indexing will run from 0 to n-1. \n",
|
||||
"<figure>\n",
|
||||
" <center> <img src=\"./images/C1_W2_Lab04_Matrices.PNG\" alt='missing' width=900><center/>\n",
|
||||
" <figcaption> Generic Matrix Notation, 1st index is row, 2nd is column </figcaption>\n",
|
||||
"<figure/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.2\"></a>\n",
|
||||
"## 4.2 NumPy Arrays\n",
|
||||
"\n",
|
||||
"NumPy's basic data structure is an indexable, n-dimensional *array* containing elements of the same type (`dtype`). These were described earlier. Matrices have a two-dimensional (2-D) index [m,n].\n",
|
||||
"\n",
|
||||
"In Course 1, 2-D matrices are used to hold training data. Training data is $m$ examples by $n$ features creating an (m,n) array. Course 1 does not do operations directly on matrices but typically extracts an example as a vector and operates on that. Below you will review: \n",
|
||||
"- data creation\n",
|
||||
"- slicing and indexing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.3\"></a>\n",
|
||||
"## 4.3 Matrix Creation\n",
|
||||
"The same functions that created 1-D vectors will create 2-D or n-D arrays. Here are some examples\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below, the shape tuple is provided to achieve a 2-D result. Notice how NumPy uses brackets to denote each dimension. Notice further than NumPy, when printing, will print one row per line.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a shape = (1, 5), a = [[0. 0. 0. 0. 0.]]\n",
|
||||
"a shape = (2, 1), a = [[0.]\n",
|
||||
" [0.]]\n",
|
||||
"a shape = (1, 1), a = [[0.44236513]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.zeros((1, 5)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") \n",
|
||||
"\n",
|
||||
"a = np.zeros((2, 1)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") \n",
|
||||
"\n",
|
||||
"a = np.random.random_sample((1, 1)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"One can also manually specify data. Dimensions are specified with additional brackets matching the format in the printing above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" a shape = (3, 1), np.array: a = [[5]\n",
|
||||
" [4]\n",
|
||||
" [3]]\n",
|
||||
" a shape = (3, 1), np.array: a = [[5]\n",
|
||||
" [4]\n",
|
||||
" [3]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill with user specified values\n",
|
||||
"a = np.array([[5], [4], [3]]); print(f\" a shape = {a.shape}, np.array: a = {a}\")\n",
|
||||
"a = np.array([[5], # One can also\n",
|
||||
" [4], # separate values\n",
|
||||
" [3]]); #into separate rows\n",
|
||||
"print(f\" a shape = {a.shape}, np.array: a = {a}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4\"></a>\n",
|
||||
"## 4.4 Operations on Matrices\n",
|
||||
"Let's explore some operations using matrices."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4.1\"></a>\n",
|
||||
"### 4.4.1 Indexing\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Matrices include a second index. The two indexes describe [row, column]. Access can either return an element or a row/column. See below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a.shape: (3, 2), \n",
|
||||
"a= [[0 1]\n",
|
||||
" [2 3]\n",
|
||||
" [4 5]]\n",
|
||||
"\n",
|
||||
"a[2,0].shape: (), a[2,0] = 4, type(a[2,0]) = <class 'numpy.int64'> Accessing an element returns a scalar\n",
|
||||
"\n",
|
||||
"a[2].shape: (2,), a[2] = [4 5], type(a[2]) = <class 'numpy.ndarray'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#vector indexing operations on matrices\n",
|
||||
"a = np.arange(6).reshape(-1, 2) #reshape is a convenient way to create matrices\n",
|
||||
"print(f\"a.shape: {a.shape}, \\na= {a}\")\n",
|
||||
"\n",
|
||||
"#access an element\n",
|
||||
"print(f\"\\na[2,0].shape: {a[2, 0].shape}, a[2,0] = {a[2, 0]}, type(a[2,0]) = {type(a[2, 0])} Accessing an element returns a scalar\\n\")\n",
|
||||
"\n",
|
||||
"#access a row\n",
|
||||
"print(f\"a[2].shape: {a[2].shape}, a[2] = {a[2]}, type(a[2]) = {type(a[2])}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is worth drawing attention to the last example. Accessing a matrix by just specifying the row will return a *1-D vector*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Reshape** \n",
|
||||
"The previous example used [reshape](https://numpy.org/doc/stable/reference/generated/numpy.reshape.html) to shape the array. \n",
|
||||
"`a = np.arange(6).reshape(-1, 2) ` \n",
|
||||
"This line of code first created a *1-D Vector* of six elements. It then reshaped that vector into a *2-D* array using the reshape command. This could have been written: \n",
|
||||
"`a = np.arange(6).reshape(3, 2) ` \n",
|
||||
"To arrive at the same 3 row, 2 column array.\n",
|
||||
"The -1 argument tells the routine to compute the number of rows given the size of the array and the number of columns.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4.2\"></a>\n",
|
||||
"### 4.4.2 Slicing\n",
|
||||
"Slicing creates an array of indices using a set of three values (`start:stop:step`). A subset of values is also valid. Its use is best explained by example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a = \n",
|
||||
"[[ 0 1 2 3 4 5 6 7 8 9]\n",
|
||||
" [10 11 12 13 14 15 16 17 18 19]]\n",
|
||||
"a[0, 2:7:1] = [2 3 4 5 6] , a[0, 2:7:1].shape = (5,) a 1-D array\n",
|
||||
"a[:, 2:7:1] = \n",
|
||||
" [[ 2 3 4 5 6]\n",
|
||||
" [12 13 14 15 16]] , a[:, 2:7:1].shape = (2, 5) a 2-D array\n",
|
||||
"a[:,:] = \n",
|
||||
" [[ 0 1 2 3 4 5 6 7 8 9]\n",
|
||||
" [10 11 12 13 14 15 16 17 18 19]] , a[:,:].shape = (2, 10)\n",
|
||||
"a[1,:] = [10 11 12 13 14 15 16 17 18 19] , a[1,:].shape = (10,) a 1-D array\n",
|
||||
"a[1] = [10 11 12 13 14 15 16 17 18 19] , a[1].shape = (10,) a 1-D array\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#vector 2-D slicing operations\n",
|
||||
"a = np.arange(20).reshape(-1, 10)\n",
|
||||
"print(f\"a = \\n{a}\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step)\n",
|
||||
"print(\"a[0, 2:7:1] = \", a[0, 2:7:1], \", a[0, 2:7:1].shape =\", a[0, 2:7:1].shape, \"a 1-D array\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step) in two rows\n",
|
||||
"print(\"a[:, 2:7:1] = \\n\", a[:, 2:7:1], \", a[:, 2:7:1].shape =\", a[:, 2:7:1].shape, \"a 2-D array\")\n",
|
||||
"\n",
|
||||
"# access all elements\n",
|
||||
"print(\"a[:,:] = \\n\", a[:,:], \", a[:,:].shape =\", a[:,:].shape)\n",
|
||||
"\n",
|
||||
"# access all elements in one row (very common usage)\n",
|
||||
"print(\"a[1,:] = \", a[1,:], \", a[1,:].shape =\", a[1,:].shape, \"a 1-D array\")\n",
|
||||
"# same as\n",
|
||||
"print(\"a[1] = \", a[1], \", a[1].shape =\", a[1].shape, \"a 1-D array\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_5.0\"></a>\n",
|
||||
"## Congratulations!\n",
|
||||
"In this lab you mastered the features of Python and NumPy that are needed for Course 1."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"dl_toc_settings": {
|
||||
"rndtag": "40015"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
},
|
||||
"toc-autonumbering": false
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
737
week2/C1_W2_Lab02_Multiple_Variable_Soln.ipynb
Normal file
864
week2/C1_W2_Lab03_Feature_Scaling_and_Learning_Rate_Soln.ipynb
Normal file
505
week2/C1_W2_Lab04_FeatEng_PolyReg_Soln.ipynb
Normal file
277
week2/C1_W2_Lab05_Sklearn_GD_Soln.ipynb
Normal file
241
week2/C1_W2_Lab06_Sklearn_Normal_Soln.ipynb
Normal file
@@ -0,0 +1,241 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Optional Lab: Linear Regression using Scikit-Learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There is an open-source, commercially usable machine learning toolkit called [scikit-learn](https://scikit-learn.org/stable/index.html). This toolkit contains implementations of many of the algorithms that you will work with in this course.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Goals\n",
|
||||
"In this lab you will:\n",
|
||||
"- Utilize scikit-learn to implement linear regression using a close form solution based on the normal equation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"You will utilize functions from scikit-learn as well as matplotlib and NumPy. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"from lab_utils_multi import load_house_data\n",
|
||||
"plt.style.use('./deeplearning.mplstyle')\n",
|
||||
"np.set_printoptions(precision=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40291_2\"></a>\n",
|
||||
"# Linear Regression, closed-form solution\n",
|
||||
"Scikit-learn has the [linear regression model](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression) which implements a closed-form linear regression.\n",
|
||||
"\n",
|
||||
"Let's use the data from the early labs - a house with 1000 square feet sold for \\\\$300,000 and a house with 2000 square feet sold for \\\\$500,000.\n",
|
||||
"\n",
|
||||
"| Size (1000 sqft) | Price (1000s of dollars) |\n",
|
||||
"| ----------------| ------------------------ |\n",
|
||||
"| 1 | 300 |\n",
|
||||
"| 2 | 500 |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the data set"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train = np.array([1.0, 2.0]) #features\n",
|
||||
"y_train = np.array([300, 500]) #target value"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create and fit the model\n",
|
||||
"The code below performs regression using scikit-learn. \n",
|
||||
"The first step creates a regression object. \n",
|
||||
"The second step utilizes one of the methods associated with the object, `fit`. This performs regression, fitting the parameters to the input data. The toolkit expects a two-dimensional X matrix."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"linear_model = LinearRegression()\n",
|
||||
"#X must be a 2-D Matrix\n",
|
||||
"linear_model.fit(X_train.reshape(-1, 1), y_train) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### View Parameters \n",
|
||||
"The $\\mathbf{w}$ and $\\mathbf{b}$ parameters are referred to as 'coefficients' and 'intercept' in scikit-learn."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"b = linear_model.intercept_\n",
|
||||
"w = linear_model.coef_\n",
|
||||
"print(f\"w = {w:}, b = {b:0.2f}\")\n",
|
||||
"print(f\"'manual' prediction: f_wb = wx+b : {1200*w + b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Make Predictions\n",
|
||||
"\n",
|
||||
"Calling the `predict` function generates predictions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = linear_model.predict(X_train.reshape(-1, 1))\n",
|
||||
"\n",
|
||||
"print(\"Prediction on training set:\", y_pred)\n",
|
||||
"\n",
|
||||
"X_test = np.array([[1200]])\n",
|
||||
"print(f\"Prediction for 1200 sqft house: ${linear_model.predict(X_test)[0]:0.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Second Example\n",
|
||||
"The second example is from an earlier lab with multiple features. The final parameter values and predictions are very close to the results from the un-normalized 'long-run' from that lab. That un-normalized run took hours to produce results, while this is nearly instantaneous. The closed-form solution work well on smaller data sets such as these but can be computationally demanding on larger data sets. \n",
|
||||
">The closed-form solution does not require normalization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load the dataset\n",
|
||||
"X_train, y_train = load_house_data()\n",
|
||||
"X_features = ['size(sqft)','bedrooms','floors','age']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"linear_model = LinearRegression()\n",
|
||||
"linear_model.fit(X_train, y_train) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"b = linear_model.intercept_\n",
|
||||
"w = linear_model.coef_\n",
|
||||
"print(f\"w = {w:}, b = {b:0.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f\"Prediction on training set:\\n {linear_model.predict(X_train)[:4]}\" )\n",
|
||||
"print(f\"prediction using w,b:\\n {(X_train @ w + b)[:4]}\")\n",
|
||||
"print(f\"Target values \\n {y_train[:4]}\")\n",
|
||||
"\n",
|
||||
"x_house = np.array([1200, 3,1, 40]).reshape(-1,4)\n",
|
||||
"x_house_predict = linear_model.predict(x_house)[0]\n",
|
||||
"print(f\" predicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Congratulations!\n",
|
||||
"In this lab you:\n",
|
||||
"- utilized an open-source machine learning toolkit, scikit-learn\n",
|
||||
"- implemented linear regression using a close-form solution from that toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,730 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Optional Lab: Python, NumPy and Vectorization\n",
|
||||
"A brief introduction to some of the scientific computing used in this course. In particular the NumPy scientific computing package and its use with python.\n",
|
||||
"\n",
|
||||
"# Outline\n",
|
||||
"- [ 1.1 Goals](#toc_40015_1.1)\n",
|
||||
"- [ 1.2 Useful References](#toc_40015_1.2)\n",
|
||||
"- [2 Python and NumPy <a name='Python and NumPy'></a>](#toc_40015_2)\n",
|
||||
"- [3 Vectors](#toc_40015_3)\n",
|
||||
"- [ 3.1 Abstract](#toc_40015_3.1)\n",
|
||||
"- [ 3.2 NumPy Arrays](#toc_40015_3.2)\n",
|
||||
"- [ 3.3 Vector Creation](#toc_40015_3.3)\n",
|
||||
"- [ 3.4 Operations on Vectors](#toc_40015_3.4)\n",
|
||||
"- [4 Matrices](#toc_40015_4)\n",
|
||||
"- [ 4.1 Abstract](#toc_40015_4.1)\n",
|
||||
"- [ 4.2 NumPy Arrays](#toc_40015_4.2)\n",
|
||||
"- [ 4.3 Matrix Creation](#toc_40015_4.3)\n",
|
||||
"- [ 4.4 Operations on Matrices](#toc_40015_4.4)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np # it is an unofficial standard to use np for numpy\n",
|
||||
"import time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_1.1\"></a>\n",
|
||||
"## 1.1 Goals\n",
|
||||
"In this lab, you will:\n",
|
||||
"- Review the features of NumPy and Python that are used in Course 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_1.2\"></a>\n",
|
||||
"## 1.2 Useful References\n",
|
||||
"- NumPy Documentation including a basic introduction: [NumPy.org](https://NumPy.org/doc/stable/)\n",
|
||||
"- A challenging feature topic: [NumPy Broadcasting](https://NumPy.org/doc/stable/user/basics.broadcasting.html)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_2\"></a>\n",
|
||||
"# 2 Python and NumPy <a name='Python and NumPy'></a>\n",
|
||||
"Python is the programming language we will be using in this course. It has a set of numeric data types and arithmetic operations. NumPy is a library that extends the base capabilities of python to add a richer data set including more numeric types, vectors, matrices, and many matrix functions. NumPy and python work together fairly seamlessly. Python arithmetic operators work on NumPy data types and many NumPy functions will accept python data types.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3\"></a>\n",
|
||||
"# 3 Vectors\n",
|
||||
"<a name=\"toc_40015_3.1\"></a>\n",
|
||||
"## 3.1 Abstract\n",
|
||||
"<img align=\"right\" src=\"./images/C1_W2_Lab04_Vectors.PNG\" style=\"width:340px;\" >Vectors, as you will use them in this course, are ordered arrays of numbers. In notation, vectors are denoted with lower case bold letters such as $\\mathbf{x}$. The elements of a vector are all the same type. A vector does not, for example, contain both characters and numbers. The number of elements in the array is often referred to as the *dimension* though mathematicians may prefer *rank*. The vector shown has a dimension of $n$. The elements of a vector can be referenced with an index. In math settings, indexes typically run from 1 to n. In computer science and these labs, indexing will typically run from 0 to n-1. In notation, elements of a vector, when referenced individually will indicate the index in a subscript, for example, the $0^{th}$ element, of the vector $\\mathbf{x}$ is $x_0$. Note, the x is not bold in this case. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.2\"></a>\n",
|
||||
"## 3.2 NumPy Arrays\n",
|
||||
"\n",
|
||||
"NumPy's basic data structure is an indexable, n-dimensional *array* containing elements of the same type (`dtype`). Right away, you may notice we have overloaded the term 'dimension'. Above, it was the number of elements in the vector, here, dimension refers to the number of indexes of an array. A one-dimensional or 1-D array has one index. In Course 1, we will represent vectors as NumPy 1-D arrays. \n",
|
||||
"\n",
|
||||
" - 1-D array, shape (n,): n elements indexed [0] through [n-1]\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.3\"></a>\n",
|
||||
"## 3.3 Vector Creation\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Data creation routines in NumPy will generally have a first parameter which is the shape of the object. This can either be a single value for a 1-D result or a tuple (n,m,...) specifying the shape of the result. Below are examples of creating vectors using these routines."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill arrays with value\n",
|
||||
"a = np.zeros(4); print(f\"np.zeros(4) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.zeros((4,)); print(f\"np.zeros(4,) : a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.random.random_sample(4); print(f\"np.random.random_sample(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some data creation routines do not take a shape tuple:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill arrays with value but do not accept shape as input argument\n",
|
||||
"a = np.arange(4.); print(f\"np.arange(4.): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.random.rand(4); print(f\"np.random.rand(4): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"values can be specified manually as well. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill with user specified values\n",
|
||||
"a = np.array([5,4,3,2]); print(f\"np.array([5,4,3,2]): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")\n",
|
||||
"a = np.array([5.,4,3,2]); print(f\"np.array([5.,4,3,2]): a = {a}, a shape = {a.shape}, a data type = {a.dtype}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These have all created a one-dimensional vector `a` with four elements. `a.shape` returns the dimensions. Here we see a.shape = `(4,)` indicating a 1-d array with 4 elements. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4\"></a>\n",
|
||||
"## 3.4 Operations on Vectors\n",
|
||||
"Let's explore some operations using vectors.\n",
|
||||
"<a name=\"toc_40015_3.4.1\"></a>\n",
|
||||
"### 3.4.1 Indexing\n",
|
||||
"Elements of vectors can be accessed via indexing and slicing. NumPy provides a very complete set of indexing and slicing capabilities. We will explore only the basics needed for the course here. Reference [Slicing and Indexing](https://NumPy.org/doc/stable/reference/arrays.indexing.html) for more details. \n",
|
||||
"**Indexing** means referring to *an element* of an array by its position within the array. \n",
|
||||
"**Slicing** means getting a *subset* of elements from an array based on their indices. \n",
|
||||
"NumPy starts indexing at zero so the 3rd element of an vector $\\mathbf{a}$ is `a[2]`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#vector indexing operations on 1-D vectors\n",
|
||||
"a = np.arange(10)\n",
|
||||
"print(a)\n",
|
||||
"\n",
|
||||
"#access an element\n",
|
||||
"print(f\"a[2].shape: {a[2].shape} a[2] = {a[2]}, Accessing an element returns a scalar\")\n",
|
||||
"\n",
|
||||
"# access the last element, negative indexes count from the end\n",
|
||||
"print(f\"a[-1] = {a[-1]}\")\n",
|
||||
"\n",
|
||||
"#indexs must be within the range of the vector or they will produce and error\n",
|
||||
"try:\n",
|
||||
" c = a[10]\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"The error message you'll see is:\")\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.2\"></a>\n",
|
||||
"### 3.4.2 Slicing\n",
|
||||
"Slicing creates an array of indices using a set of three values (`start:stop:step`). A subset of values is also valid. Its use is best explained by example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#vector slicing operations\n",
|
||||
"a = np.arange(10)\n",
|
||||
"print(f\"a = {a}\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step)\n",
|
||||
"c = a[2:7:1]; print(\"a[2:7:1] = \", c)\n",
|
||||
"\n",
|
||||
"# access 3 elements separated by two \n",
|
||||
"c = a[2:7:2]; print(\"a[2:7:2] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements index 3 and above\n",
|
||||
"c = a[3:]; print(\"a[3:] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements below index 3\n",
|
||||
"c = a[:3]; print(\"a[:3] = \", c)\n",
|
||||
"\n",
|
||||
"# access all elements\n",
|
||||
"c = a[:]; print(\"a[:] = \", c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.3\"></a>\n",
|
||||
"### 3.4.3 Single vector operations\n",
|
||||
"There are a number of useful operations that involve operations on a single vector."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = np.array([1,2,3,4])\n",
|
||||
"print(f\"a : {a}\")\n",
|
||||
"# negate elements of a\n",
|
||||
"b = -a \n",
|
||||
"print(f\"b = -a : {b}\")\n",
|
||||
"\n",
|
||||
"# sum all elements of a, returns a scalar\n",
|
||||
"b = np.sum(a) \n",
|
||||
"print(f\"b = np.sum(a) : {b}\")\n",
|
||||
"\n",
|
||||
"b = np.mean(a)\n",
|
||||
"print(f\"b = np.mean(a): {b}\")\n",
|
||||
"\n",
|
||||
"b = a**2\n",
|
||||
"print(f\"b = a**2 : {b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.4\"></a>\n",
|
||||
"### 3.4.4 Vector Vector element-wise operations\n",
|
||||
"Most of the NumPy arithmetic, logical and comparison operations apply to vectors as well. These operators work on an element-by-element basis. For example \n",
|
||||
"$$ \\mathbf{a} + \\mathbf{b} = \\sum_{i=0}^{n-1} a_i + b_i $$"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = np.array([ 1, 2, 3, 4])\n",
|
||||
"b = np.array([-1,-2, 3, 4])\n",
|
||||
"print(f\"Binary operators work element wise: {a + b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Of course, for this to work correctly, the vectors must be of the same size:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#try a mismatched vector operation\n",
|
||||
"c = np.array([1, 2])\n",
|
||||
"try:\n",
|
||||
" d = a + c\n",
|
||||
"except Exception as e:\n",
|
||||
" print(\"The error message you'll see is:\")\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.5\"></a>\n",
|
||||
"### 3.4.5 Scalar Vector operations\n",
|
||||
"Vectors can be 'scaled' by scalar values. A scalar value is just a number. The scalar multiplies all the elements of the vector."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"\n",
|
||||
"# multiply a by a scalar\n",
|
||||
"b = 5 * a \n",
|
||||
"print(f\"b = 5 * a : {b}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.6\"></a>\n",
|
||||
"### 3.4.6 Vector Vector dot product\n",
|
||||
"The dot product is a mainstay of Linear Algebra and NumPy. This is an operation used extensively in this course and should be well understood. The dot product is shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img src=\"./images/C1_W2_Lab04_dot_notrans.gif\" width=800> "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The dot product multiplies the values in two vectors element-wise and then sums the result.\n",
|
||||
"Vector dot product requires the dimensions of the two vectors to be the same. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's implement our own version of the dot product below:\n",
|
||||
"\n",
|
||||
"**Using a for loop**, implement a function which returns the dot product of two vectors. The function to return given inputs $a$ and $b$:\n",
|
||||
"$$ x = \\sum_{i=0}^{n-1} a_i b_i $$\n",
|
||||
"Assume both `a` and `b` are the same shape."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def my_dot(a, b): \n",
|
||||
" \"\"\"\n",
|
||||
" Compute the dot product of two vectors\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" a (ndarray (n,)): input vector \n",
|
||||
" b (ndarray (n,)): input vector with same dimension as a\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" x (scalar): \n",
|
||||
" \"\"\"\n",
|
||||
" x=0\n",
|
||||
" for i in range(a.shape[0]):\n",
|
||||
" x = x + a[i] * b[i]\n",
|
||||
" return x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test 1-D\n",
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"b = np.array([-1, 4, 3, 2])\n",
|
||||
"print(f\"my_dot(a, b) = {my_dot(a, b)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note, the dot product is expected to return a scalar value. \n",
|
||||
"\n",
|
||||
"Let's try the same operations using `np.dot`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test 1-D\n",
|
||||
"a = np.array([1, 2, 3, 4])\n",
|
||||
"b = np.array([-1, 4, 3, 2])\n",
|
||||
"c = np.dot(a, b)\n",
|
||||
"print(f\"NumPy 1-D np.dot(a, b) = {c}, np.dot(a, b).shape = {c.shape} \") \n",
|
||||
"c = np.dot(b, a)\n",
|
||||
"print(f\"NumPy 1-D np.dot(b, a) = {c}, np.dot(a, b).shape = {c.shape} \")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Above, you will note that the results for 1-D matched our implementation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_3.4.7\"></a>\n",
|
||||
"### 3.4.7 The Need for Speed: vector vs for loop\n",
|
||||
"We utilized the NumPy library because it improves speed memory efficiency. Let's demonstrate:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"np.random.seed(1)\n",
|
||||
"a = np.random.rand(10000000) # very large arrays\n",
|
||||
"b = np.random.rand(10000000)\n",
|
||||
"\n",
|
||||
"tic = time.time() # capture start time\n",
|
||||
"c = np.dot(a, b)\n",
|
||||
"toc = time.time() # capture end time\n",
|
||||
"\n",
|
||||
"print(f\"np.dot(a, b) = {c:.4f}\")\n",
|
||||
"print(f\"Vectorized version duration: {1000*(toc-tic):.4f} ms \")\n",
|
||||
"\n",
|
||||
"tic = time.time() # capture start time\n",
|
||||
"c = my_dot(a,b)\n",
|
||||
"toc = time.time() # capture end time\n",
|
||||
"\n",
|
||||
"print(f\"my_dot(a, b) = {c:.4f}\")\n",
|
||||
"print(f\"loop version duration: {1000*(toc-tic):.4f} ms \")\n",
|
||||
"\n",
|
||||
"del(a);del(b) #remove these big arrays from memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"So, vectorization provides a large speed up in this example. This is because NumPy makes better use of available data parallelism in the underlying hardware. GPU's and modern CPU's implement Single Instruction, Multiple Data (SIMD) pipelines allowing multiple operations to be issued in parallel. This is critical in Machine Learning where the data sets are often very large."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_12345_3.4.8\"></a>\n",
|
||||
"### 3.4.8 Vector Vector operations in Course 1\n",
|
||||
"Vector Vector operations will appear frequently in course 1. Here is why:\n",
|
||||
"- Going forward, our examples will be stored in an array, `X_train` of dimension (m,n). This will be explained more in context, but here it is important to note it is a 2 Dimensional array or matrix (see next section on matrices).\n",
|
||||
"- `w` will be a 1-dimensional vector of shape (n,).\n",
|
||||
"- we will perform operations by looping through the examples, extracting each example to work on individually by indexing X. For example:`X[i]`\n",
|
||||
"- `X[i]` returns a value of shape (n,), a 1-dimensional vector. Consequently, operations involving `X[i]` are often vector-vector. \n",
|
||||
"\n",
|
||||
"That is a somewhat lengthy explanation, but aligning and understanding the shapes of your operands is important when performing vector operations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# show common Course 1 example\n",
|
||||
"X = np.array([[1],[2],[3],[4]])\n",
|
||||
"w = np.array([2])\n",
|
||||
"c = np.dot(X[1], w)\n",
|
||||
"\n",
|
||||
"print(f\"X[1] has shape {X[1].shape}\")\n",
|
||||
"print(f\"w has shape {w.shape}\")\n",
|
||||
"print(f\"c has shape {c.shape}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4\"></a>\n",
|
||||
"# 4 Matrices\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.1\"></a>\n",
|
||||
"## 4.1 Abstract\n",
|
||||
"Matrices, are two dimensional arrays. The elements of a matrix are all of the same type. In notation, matrices are denoted with capitol, bold letter such as $\\mathbf{X}$. In this and other labs, `m` is often the number of rows and `n` the number of columns. The elements of a matrix can be referenced with a two dimensional index. In math settings, numbers in the index typically run from 1 to n. In computer science and these labs, indexing will run from 0 to n-1. \n",
|
||||
"<figure>\n",
|
||||
" <center> <img src=\"./images/C1_W2_Lab04_Matrices.PNG\" alt='missing' width=900><center/>\n",
|
||||
" <figcaption> Generic Matrix Notation, 1st index is row, 2nd is column </figcaption>\n",
|
||||
"<figure/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.2\"></a>\n",
|
||||
"## 4.2 NumPy Arrays\n",
|
||||
"\n",
|
||||
"NumPy's basic data structure is an indexable, n-dimensional *array* containing elements of the same type (`dtype`). These were described earlier. Matrices have a two-dimensional (2-D) index [m,n].\n",
|
||||
"\n",
|
||||
"In Course 1, 2-D matrices are used to hold training data. Training data is $m$ examples by $n$ features creating an (m,n) array. Course 1 does not do operations directly on matrices but typically extracts an example as a vector and operates on that. Below you will review: \n",
|
||||
"- data creation\n",
|
||||
"- slicing and indexing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.3\"></a>\n",
|
||||
"## 4.3 Matrix Creation\n",
|
||||
"The same functions that created 1-D vectors will create 2-D or n-D arrays. Here are some examples\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below, the shape tuple is provided to achieve a 2-D result. Notice how NumPy uses brackets to denote each dimension. Notice further than NumPy, when printing, will print one row per line.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = np.zeros((1, 5)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") \n",
|
||||
"\n",
|
||||
"a = np.zeros((2, 1)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") \n",
|
||||
"\n",
|
||||
"a = np.random.random_sample((1, 1)) \n",
|
||||
"print(f\"a shape = {a.shape}, a = {a}\") "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"One can also manually specify data. Dimensions are specified with additional brackets matching the format in the printing above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NumPy routines which allocate memory and fill with user specified values\n",
|
||||
"a = np.array([[5], [4], [3]]); print(f\" a shape = {a.shape}, np.array: a = {a}\")\n",
|
||||
"a = np.array([[5], # One can also\n",
|
||||
" [4], # separate values\n",
|
||||
" [3]]); #into separate rows\n",
|
||||
"print(f\" a shape = {a.shape}, np.array: a = {a}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4\"></a>\n",
|
||||
"## 4.4 Operations on Matrices\n",
|
||||
"Let's explore some operations using matrices."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4.1\"></a>\n",
|
||||
"### 4.4.1 Indexing\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Matrices include a second index. The two indexes describe [row, column]. Access can either return an element or a row/column. See below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#vector indexing operations on matrices\n",
|
||||
"a = np.arange(6).reshape(-1, 2) #reshape is a convenient way to create matrices\n",
|
||||
"print(f\"a.shape: {a.shape}, \\na= {a}\")\n",
|
||||
"\n",
|
||||
"#access an element\n",
|
||||
"print(f\"\\na[2,0].shape: {a[2, 0].shape}, a[2,0] = {a[2, 0]}, type(a[2,0]) = {type(a[2, 0])} Accessing an element returns a scalar\\n\")\n",
|
||||
"\n",
|
||||
"#access a row\n",
|
||||
"print(f\"a[2].shape: {a[2].shape}, a[2] = {a[2]}, type(a[2]) = {type(a[2])}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is worth drawing attention to the last example. Accessing a matrix by just specifying the row will return a *1-D vector*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Reshape** \n",
|
||||
"The previous example used [reshape](https://numpy.org/doc/stable/reference/generated/numpy.reshape.html) to shape the array. \n",
|
||||
"`a = np.arange(6).reshape(-1, 2) ` \n",
|
||||
"This line of code first created a *1-D Vector* of six elements. It then reshaped that vector into a *2-D* array using the reshape command. This could have been written: \n",
|
||||
"`a = np.arange(6).reshape(3, 2) ` \n",
|
||||
"To arrive at the same 3 row, 2 column array.\n",
|
||||
"The -1 argument tells the routine to compute the number of rows given the size of the array and the number of columns.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_4.4.2\"></a>\n",
|
||||
"### 4.4.2 Slicing\n",
|
||||
"Slicing creates an array of indices using a set of three values (`start:stop:step`). A subset of values is also valid. Its use is best explained by example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#vector 2-D slicing operations\n",
|
||||
"a = np.arange(20).reshape(-1, 10)\n",
|
||||
"print(f\"a = \\n{a}\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step)\n",
|
||||
"print(\"a[0, 2:7:1] = \", a[0, 2:7:1], \", a[0, 2:7:1].shape =\", a[0, 2:7:1].shape, \"a 1-D array\")\n",
|
||||
"\n",
|
||||
"#access 5 consecutive elements (start:stop:step) in two rows\n",
|
||||
"print(\"a[:, 2:7:1] = \\n\", a[:, 2:7:1], \", a[:, 2:7:1].shape =\", a[:, 2:7:1].shape, \"a 2-D array\")\n",
|
||||
"\n",
|
||||
"# access all elements\n",
|
||||
"print(\"a[:,:] = \\n\", a[:,:], \", a[:,:].shape =\", a[:,:].shape)\n",
|
||||
"\n",
|
||||
"# access all elements in one row (very common usage)\n",
|
||||
"print(\"a[1,:] = \", a[1,:], \", a[1,:].shape =\", a[1,:].shape, \"a 1-D array\")\n",
|
||||
"# same as\n",
|
||||
"print(\"a[1] = \", a[1], \", a[1].shape =\", a[1].shape, \"a 1-D array\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a name=\"toc_40015_5.0\"></a>\n",
|
||||
"## Congratulations!\n",
|
||||
"In this lab you mastered the features of Python and NumPy that are needed for Course 1."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"dl_toc_settings": {
|
||||
"rndtag": "40015"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.10"
|
||||
},
|
||||
"toc-autonumbering": false
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
100
week2/data/houses.txt
Normal file
@@ -0,0 +1,100 @@
|
||||
9.520000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,6.500000000000000000e+01,2.715000000000000000e+02
|
||||
1.244000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.400000000000000000e+01,3.000000000000000000e+02
|
||||
1.947000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.700000000000000000e+01,5.098000000000000114e+02
|
||||
1.725000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,4.200000000000000000e+01,3.940000000000000000e+02
|
||||
1.959000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.500000000000000000e+01,5.400000000000000000e+02
|
||||
1.314000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.400000000000000000e+01,4.150000000000000000e+02
|
||||
8.640000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,6.600000000000000000e+01,2.300000000000000000e+02
|
||||
1.836000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.700000000000000000e+01,5.600000000000000000e+02
|
||||
1.026000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,4.300000000000000000e+01,2.940000000000000000e+02
|
||||
3.194000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,8.700000000000000000e+01,7.182000000000000455e+02
|
||||
7.880000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,8.000000000000000000e+01,2.000000000000000000e+02
|
||||
1.200000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,1.700000000000000000e+01,3.020000000000000000e+02
|
||||
1.557000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.800000000000000000e+01,4.680000000000000000e+02
|
||||
1.430000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+01,3.741999999999999886e+02
|
||||
1.220000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.500000000000000000e+01,3.880000000000000000e+02
|
||||
1.092000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,6.400000000000000000e+01,2.820000000000000000e+02
|
||||
8.480000000000000000e+02,1.000000000000000000e+00,1.000000000000000000e+00,1.700000000000000000e+01,3.118000000000000114e+02
|
||||
1.682000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.300000000000000000e+01,4.010000000000000000e+02
|
||||
1.768000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.800000000000000000e+01,4.498000000000000114e+02
|
||||
1.040000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,4.400000000000000000e+01,3.010000000000000000e+02
|
||||
1.652000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,2.100000000000000000e+01,5.020000000000000000e+02
|
||||
1.088000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,3.500000000000000000e+01,3.400000000000000000e+02
|
||||
1.316000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.400000000000000000e+01,4.002819999999999823e+02
|
||||
1.593000000000000000e+03,0.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+01,5.720000000000000000e+02
|
||||
9.720000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,7.300000000000000000e+01,2.640000000000000000e+02
|
||||
1.097000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,3.700000000000000000e+01,3.040000000000000000e+02
|
||||
1.004000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,5.100000000000000000e+01,2.980000000000000000e+02
|
||||
9.040000000000000000e+02,3.000000000000000000e+00,1.000000000000000000e+00,5.500000000000000000e+01,2.198000000000000114e+02
|
||||
1.694000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.300000000000000000e+01,4.906999999999999886e+02
|
||||
1.073000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.000000000000000000e+02,2.169600000000000080e+02
|
||||
1.419000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.900000000000000000e+01,3.681999999999999886e+02
|
||||
1.164000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,5.200000000000000000e+01,2.800000000000000000e+02
|
||||
1.935000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.200000000000000000e+01,5.268700000000000045e+02
|
||||
1.216000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,7.400000000000000000e+01,2.370000000000000000e+02
|
||||
2.482000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,5.624260000000000446e+02
|
||||
1.200000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.800000000000000000e+01,3.698000000000000114e+02
|
||||
1.840000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.000000000000000000e+01,4.600000000000000000e+02
|
||||
1.851000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,5.700000000000000000e+01,3.740000000000000000e+02
|
||||
1.660000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.900000000000000000e+01,3.900000000000000000e+02
|
||||
1.096000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,9.700000000000000000e+01,1.580000000000000000e+02
|
||||
1.775000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.800000000000000000e+01,4.260000000000000000e+02
|
||||
2.030000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,4.500000000000000000e+01,3.900000000000000000e+02
|
||||
1.784000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,1.070000000000000000e+02,2.777740000000000009e+02
|
||||
1.073000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.000000000000000000e+02,2.169600000000000080e+02
|
||||
1.552000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.600000000000000000e+01,4.258000000000000114e+02
|
||||
1.953000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,5.040000000000000000e+02
|
||||
1.224000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,1.200000000000000000e+01,3.290000000000000000e+02
|
||||
1.616000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.600000000000000000e+01,4.640000000000000000e+02
|
||||
8.160000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,5.800000000000000000e+01,2.200000000000000000e+02
|
||||
1.349000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.100000000000000000e+01,3.580000000000000000e+02
|
||||
1.571000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.400000000000000000e+01,4.780000000000000000e+02
|
||||
1.486000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,5.700000000000000000e+01,3.340000000000000000e+02
|
||||
1.506000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.600000000000000000e+01,4.269800000000000182e+02
|
||||
1.097000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.700000000000000000e+01,2.900000000000000000e+02
|
||||
1.764000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.400000000000000000e+01,4.630000000000000000e+02
|
||||
1.208000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.400000000000000000e+01,3.908000000000000114e+02
|
||||
1.470000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.400000000000000000e+01,3.540000000000000000e+02
|
||||
1.768000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,8.400000000000000000e+01,3.500000000000000000e+02
|
||||
1.654000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.900000000000000000e+01,4.600000000000000000e+02
|
||||
1.029000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.000000000000000000e+01,2.370000000000000000e+02
|
||||
1.120000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,2.883039999999999736e+02
|
||||
1.150000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.200000000000000000e+01,2.820000000000000000e+02
|
||||
8.160000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,3.900000000000000000e+01,2.490000000000000000e+02
|
||||
1.040000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.500000000000000000e+01,3.040000000000000000e+02
|
||||
1.392000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.400000000000000000e+01,3.320000000000000000e+02
|
||||
1.603000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.900000000000000000e+01,3.518000000000000114e+02
|
||||
1.215000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.300000000000000000e+01,3.100000000000000000e+02
|
||||
1.073000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.000000000000000000e+02,2.169600000000000080e+02
|
||||
2.599000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,2.200000000000000000e+01,6.663360000000000127e+02
|
||||
1.431000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,5.900000000000000000e+01,3.300000000000000000e+02
|
||||
2.090000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.600000000000000000e+01,4.800000000000000000e+02
|
||||
1.790000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,4.900000000000000000e+01,3.303000000000000114e+02
|
||||
1.484000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,3.480000000000000000e+02
|
||||
1.040000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.500000000000000000e+01,3.040000000000000000e+02
|
||||
1.431000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.200000000000000000e+01,3.840000000000000000e+02
|
||||
1.159000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,5.300000000000000000e+01,3.160000000000000000e+02
|
||||
1.547000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.200000000000000000e+01,4.303999999999999773e+02
|
||||
1.983000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.200000000000000000e+01,4.500000000000000000e+02
|
||||
1.056000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,5.300000000000000000e+01,2.840000000000000000e+02
|
||||
1.180000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,9.900000000000000000e+01,2.750000000000000000e+02
|
||||
1.358000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.700000000000000000e+01,4.140000000000000000e+02
|
||||
9.600000000000000000e+02,3.000000000000000000e+00,1.000000000000000000e+00,5.100000000000000000e+01,2.580000000000000000e+02
|
||||
1.456000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,3.780000000000000000e+02
|
||||
1.446000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.500000000000000000e+01,3.500000000000000000e+02
|
||||
1.208000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,1.500000000000000000e+01,4.120000000000000000e+02
|
||||
1.553000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.600000000000000000e+01,3.730000000000000000e+02
|
||||
8.820000000000000000e+02,3.000000000000000000e+00,1.000000000000000000e+00,4.900000000000000000e+01,2.250000000000000000e+02
|
||||
2.030000000000000000e+03,4.000000000000000000e+00,2.000000000000000000e+00,4.500000000000000000e+01,3.900000000000000000e+02
|
||||
1.040000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.200000000000000000e+01,2.673999999999999773e+02
|
||||
1.616000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.600000000000000000e+01,4.640000000000000000e+02
|
||||
8.030000000000000000e+02,2.000000000000000000e+00,1.000000000000000000e+00,8.000000000000000000e+01,1.740000000000000000e+02
|
||||
1.430000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,2.100000000000000000e+01,3.400000000000000000e+02
|
||||
1.656000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,6.100000000000000000e+01,4.300000000000000000e+02
|
||||
1.541000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,1.600000000000000000e+01,4.400000000000000000e+02
|
||||
9.480000000000000000e+02,3.000000000000000000e+00,1.000000000000000000e+00,5.300000000000000000e+01,2.160000000000000000e+02
|
||||
1.224000000000000000e+03,2.000000000000000000e+00,2.000000000000000000e+00,1.200000000000000000e+01,3.290000000000000000e+02
|
||||
1.432000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,4.300000000000000000e+01,3.880000000000000000e+02
|
||||
1.660000000000000000e+03,3.000000000000000000e+00,2.000000000000000000e+00,1.900000000000000000e+01,3.900000000000000000e+02
|
||||
1.212000000000000000e+03,3.000000000000000000e+00,1.000000000000000000e+00,2.000000000000000000e+01,3.560000000000000000e+02
|
||||
1.050000000000000000e+03,2.000000000000000000e+00,1.000000000000000000e+00,6.500000000000000000e+01,2.578000000000000114e+02
|
||||
124
week2/deeplearning.mplstyle
Normal file
@@ -0,0 +1,124 @@
|
||||
# see https://matplotlib.org/stable/tutorials/introductory/customizing.html
|
||||
lines.linewidth: 4
|
||||
lines.solid_capstyle: butt
|
||||
|
||||
legend.fancybox: true
|
||||
|
||||
# Verdana" for non-math text,
|
||||
# Cambria Math
|
||||
|
||||
#Blue (Crayon-Aqua) 0096FF
|
||||
#Dark Red C00000
|
||||
#Orange (Apple Orange) FF9300
|
||||
#Black 000000
|
||||
#Magenta FF40FF
|
||||
#Purple 7030A0
|
||||
|
||||
axes.prop_cycle: cycler('color', ['0096FF', 'FF9300', 'FF40FF', '7030A0', 'C00000'])
|
||||
#axes.facecolor: f0f0f0 # grey
|
||||
axes.facecolor: ffffff # white
|
||||
axes.labelsize: large
|
||||
axes.axisbelow: true
|
||||
axes.grid: False
|
||||
axes.edgecolor: f0f0f0
|
||||
axes.linewidth: 3.0
|
||||
axes.titlesize: x-large
|
||||
|
||||
patch.edgecolor: f0f0f0
|
||||
patch.linewidth: 0.5
|
||||
|
||||
svg.fonttype: path
|
||||
|
||||
grid.linestyle: -
|
||||
grid.linewidth: 1.0
|
||||
grid.color: cbcbcb
|
||||
|
||||
xtick.major.size: 0
|
||||
xtick.minor.size: 0
|
||||
ytick.major.size: 0
|
||||
ytick.minor.size: 0
|
||||
|
||||
savefig.edgecolor: f0f0f0
|
||||
savefig.facecolor: f0f0f0
|
||||
|
||||
#figure.subplot.left: 0.08
|
||||
#figure.subplot.right: 0.95
|
||||
#figure.subplot.bottom: 0.07
|
||||
|
||||
#figure.facecolor: f0f0f0 # grey
|
||||
figure.facecolor: ffffff # white
|
||||
|
||||
## ***************************************************************************
|
||||
## * FONT *
|
||||
## ***************************************************************************
|
||||
## The font properties used by `text.Text`.
|
||||
## See https://matplotlib.org/api/font_manager_api.html for more information
|
||||
## on font properties. The 6 font properties used for font matching are
|
||||
## given below with their default values.
|
||||
##
|
||||
## The font.family property can take either a concrete font name (not supported
|
||||
## when rendering text with usetex), or one of the following five generic
|
||||
## values:
|
||||
## - 'serif' (e.g., Times),
|
||||
## - 'sans-serif' (e.g., Helvetica),
|
||||
## - 'cursive' (e.g., Zapf-Chancery),
|
||||
## - 'fantasy' (e.g., Western), and
|
||||
## - 'monospace' (e.g., Courier).
|
||||
## Each of these values has a corresponding default list of font names
|
||||
## (font.serif, etc.); the first available font in the list is used. Note that
|
||||
## for font.serif, font.sans-serif, and font.monospace, the first element of
|
||||
## the list (a DejaVu font) will always be used because DejaVu is shipped with
|
||||
## Matplotlib and is thus guaranteed to be available; the other entries are
|
||||
## left as examples of other possible values.
|
||||
##
|
||||
## The font.style property has three values: normal (or roman), italic
|
||||
## or oblique. The oblique style will be used for italic, if it is not
|
||||
## present.
|
||||
##
|
||||
## The font.variant property has two values: normal or small-caps. For
|
||||
## TrueType fonts, which are scalable fonts, small-caps is equivalent
|
||||
## to using a font size of 'smaller', or about 83%% of the current font
|
||||
## size.
|
||||
##
|
||||
## The font.weight property has effectively 13 values: normal, bold,
|
||||
## bolder, lighter, 100, 200, 300, ..., 900. Normal is the same as
|
||||
## 400, and bold is 700. bolder and lighter are relative values with
|
||||
## respect to the current weight.
|
||||
##
|
||||
## The font.stretch property has 11 values: ultra-condensed,
|
||||
## extra-condensed, condensed, semi-condensed, normal, semi-expanded,
|
||||
## expanded, extra-expanded, ultra-expanded, wider, and narrower. This
|
||||
## property is not currently implemented.
|
||||
##
|
||||
## The font.size property is the default font size for text, given in points.
|
||||
## 10 pt is the standard value.
|
||||
##
|
||||
## Note that font.size controls default text sizes. To configure
|
||||
## special text sizes tick labels, axes, labels, title, etc., see the rc
|
||||
## settings for axes and ticks. Special text sizes can be defined
|
||||
## relative to font.size, using the following values: xx-small, x-small,
|
||||
## small, medium, large, x-large, xx-large, larger, or smaller
|
||||
|
||||
|
||||
font.family: sans-serif
|
||||
font.style: normal
|
||||
font.variant: normal
|
||||
font.weight: normal
|
||||
font.stretch: normal
|
||||
font.size: 12.0
|
||||
|
||||
font.serif: DejaVu Serif, Bitstream Vera Serif, Computer Modern Roman, New Century Schoolbook, Century Schoolbook L, Utopia, ITC Bookman, Bookman, Nimbus Roman No9 L, Times New Roman, Times, Palatino, Charter, serif
|
||||
font.sans-serif: Verdana, DejaVu Sans, Bitstream Vera Sans, Computer Modern Sans Serif, Lucida Grande, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
|
||||
font.cursive: Apple Chancery, Textile, Zapf Chancery, Sand, Script MT, Felipa, Comic Neue, Comic Sans MS, cursive
|
||||
font.fantasy: Chicago, Charcoal, Impact, Western, Humor Sans, xkcd, fantasy
|
||||
font.monospace: DejaVu Sans Mono, Bitstream Vera Sans Mono, Computer Modern Typewriter, Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace
|
||||
|
||||
|
||||
## ***************************************************************************
|
||||
## * TEXT *
|
||||
## ***************************************************************************
|
||||
## The text properties used by `text.Text`.
|
||||
## See https://matplotlib.org/api/artist_api.html#module-matplotlib.text
|
||||
## for more information on text properties
|
||||
#text.color: black
|
||||
|
||||
BIN
week2/images/C1_W2_L1_S1_Lecture_b.png
Normal file
|
After Width: | Height: | Size: 83 KiB |
BIN
week2/images/C1_W2_L1_S1_model.png
Normal file
|
After Width: | Height: | Size: 76 KiB |
BIN
week2/images/C1_W2_L1_S1_trainingdata.png
Normal file
|
After Width: | Height: | Size: 86 KiB |
BIN
week2/images/C1_W2_L1_S2_Lectureb.png
Normal file
|
After Width: | Height: | Size: 133 KiB |
BIN
week2/images/C1_W2_L2_S1_Lecture_GD.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
week2/images/C1_W2_Lab02_GoalOfRegression.PNG
Normal file
|
After Width: | Height: | Size: 105 KiB |
BIN
week2/images/C1_W2_Lab03_alpha_to_big.PNG
Normal file
|
After Width: | Height: | Size: 60 KiB |
BIN
week2/images/C1_W2_Lab03_lecture_learningrate.PNG
Normal file
|
After Width: | Height: | Size: 84 KiB |
BIN
week2/images/C1_W2_Lab03_lecture_slopes.PNG
Normal file
|
After Width: | Height: | Size: 67 KiB |
BIN
week2/images/C1_W2_Lab04_Figures And animations.pptx
Normal file
BIN
week2/images/C1_W2_Lab04_Matrices.PNG
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
week2/images/C1_W2_Lab04_Vectors.PNG
Normal file
|
After Width: | Height: | Size: 5.8 KiB |
BIN
week2/images/C1_W2_Lab04_dot_notrans.gif
Normal file
|
After Width: | Height: | Size: 1.6 MiB |
BIN
week2/images/C1_W2_Lab06_LongRun.PNG
Normal file
|
After Width: | Height: | Size: 302 KiB |
BIN
week2/images/C1_W2_Lab06_ShortRun.PNG
Normal file
|
After Width: | Height: | Size: 363 KiB |
BIN
week2/images/C1_W2_Lab06_contours.PNG
Normal file
|
After Width: | Height: | Size: 37 KiB |
BIN
week2/images/C1_W2_Lab06_featurescalingheader.PNG
Normal file
|
After Width: | Height: | Size: 68 KiB |
BIN
week2/images/C1_W2_Lab06_learningrate.PNG
Normal file
|
After Width: | Height: | Size: 76 KiB |
BIN
week2/images/C1_W2_Lab06_scale.PNG
Normal file
|
After Width: | Height: | Size: 65 KiB |
BIN
week2/images/C1_W2_Lab07_FeatureEngLecture.PNG
Normal file
|
After Width: | Height: | Size: 93 KiB |
112
week2/lab_utils_common.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
lab_utils_common.py
|
||||
functions common to all optional labs, Course 1, Week 2
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
plt.style.use('./deeplearning.mplstyle')
|
||||
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0';
|
||||
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
|
||||
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0')
|
||||
|
||||
|
||||
##########################################################
|
||||
# Regression Routines
|
||||
##########################################################
|
||||
|
||||
#Function to calculate the cost
|
||||
def compute_cost_matrix(X, y, w, b, verbose=False):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
Args:
|
||||
X (ndarray (m,n)): Data, m examples with n features
|
||||
y (ndarray (m,)) : target values
|
||||
w (ndarray (n,)) : model parameters
|
||||
b (scalar) : model parameter
|
||||
verbose : (Boolean) If true, print out intermediate value f_wb
|
||||
Returns
|
||||
cost: (scalar)
|
||||
"""
|
||||
m = X.shape[0]
|
||||
|
||||
# calculate f_wb for all examples.
|
||||
f_wb = X @ w + b
|
||||
# calculate cost
|
||||
total_cost = (1/(2*m)) * np.sum((f_wb-y)**2)
|
||||
|
||||
if verbose: print("f_wb:")
|
||||
if verbose: print(f_wb)
|
||||
|
||||
return total_cost
|
||||
|
||||
def compute_gradient_matrix(X, y, w, b):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
|
||||
Args:
|
||||
X (ndarray (m,n)): Data, m examples with n features
|
||||
y (ndarray (m,)) : target values
|
||||
w (ndarray (n,)) : model parameters
|
||||
b (scalar) : model parameter
|
||||
Returns
|
||||
dj_dw (ndarray (n,1)): The gradient of the cost w.r.t. the parameters w.
|
||||
dj_db (scalar): The gradient of the cost w.r.t. the parameter b.
|
||||
|
||||
"""
|
||||
m,n = X.shape
|
||||
f_wb = X @ w + b
|
||||
e = f_wb - y
|
||||
dj_dw = (1/m) * (X.T @ e)
|
||||
dj_db = (1/m) * np.sum(e)
|
||||
|
||||
return dj_db,dj_dw
|
||||
|
||||
|
||||
# Loop version of multi-variable compute_cost
|
||||
def compute_cost(X, y, w, b):
|
||||
"""
|
||||
compute cost
|
||||
Args:
|
||||
X (ndarray (m,n)): Data, m examples with n features
|
||||
y (ndarray (m,)) : target values
|
||||
w (ndarray (n,)) : model parameters
|
||||
b (scalar) : model parameter
|
||||
Returns
|
||||
cost (scalar) : cost
|
||||
"""
|
||||
m = X.shape[0]
|
||||
cost = 0.0
|
||||
for i in range(m):
|
||||
f_wb_i = np.dot(X[i],w) + b #(n,)(n,)=scalar
|
||||
cost = cost + (f_wb_i - y[i])**2
|
||||
cost = cost/(2*m)
|
||||
return cost
|
||||
|
||||
def compute_gradient(X, y, w, b):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
Args:
|
||||
X (ndarray (m,n)): Data, m examples with n features
|
||||
y (ndarray (m,)) : target values
|
||||
w (ndarray (n,)) : model parameters
|
||||
b (scalar) : model parameter
|
||||
Returns
|
||||
dj_dw (ndarray Shape (n,)): The gradient of the cost w.r.t. the parameters w.
|
||||
dj_db (scalar): The gradient of the cost w.r.t. the parameter b.
|
||||
"""
|
||||
m,n = X.shape #(number of examples, number of features)
|
||||
dj_dw = np.zeros((n,))
|
||||
dj_db = 0.
|
||||
|
||||
for i in range(m):
|
||||
err = (np.dot(X[i], w) + b) - y[i]
|
||||
for j in range(n):
|
||||
dj_dw[j] = dj_dw[j] + err * X[i,j]
|
||||
dj_db = dj_db + err
|
||||
dj_dw = dj_dw/m
|
||||
dj_db = dj_db/m
|
||||
|
||||
return dj_db,dj_dw
|
||||
|
||||
569
week2/lab_utils_multi.py
Normal file
@@ -0,0 +1,569 @@
|
||||
import numpy as np
|
||||
import copy
|
||||
import math
|
||||
from scipy.stats import norm
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import axes3d
|
||||
from matplotlib.ticker import MaxNLocator
|
||||
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0';
|
||||
plt.style.use('./deeplearning.mplstyle')
|
||||
|
||||
def load_data_multi():
|
||||
data = np.loadtxt("data/ex1data2.txt", delimiter=',')
|
||||
X = data[:,:2]
|
||||
y = data[:,2]
|
||||
return X, y
|
||||
|
||||
##########################################################
|
||||
# Plotting Routines
|
||||
##########################################################
|
||||
|
||||
def plt_house_x(X, y,f_wb=None, ax=None):
|
||||
''' plot house with aXis '''
|
||||
if not ax:
|
||||
fig, ax = plt.subplots(1,1)
|
||||
ax.scatter(X, y, marker='x', c='r', label="Actual Value")
|
||||
|
||||
ax.set_title("Housing Prices")
|
||||
ax.set_ylabel('Price (in 1000s of dollars)')
|
||||
ax.set_xlabel(f'Size (1000 sqft)')
|
||||
if f_wb is not None:
|
||||
ax.plot(X, f_wb, c=dlblue, label="Our Prediction")
|
||||
ax.legend()
|
||||
|
||||
|
||||
def mk_cost_lines(x,y,w,b, ax):
|
||||
''' makes vertical cost lines'''
|
||||
cstr = "cost = (1/2m)*1000*("
|
||||
ctot = 0
|
||||
label = 'cost for point'
|
||||
for p in zip(x,y):
|
||||
f_wb_p = w*p[0]+b
|
||||
c_p = ((f_wb_p - p[1])**2)/2
|
||||
c_p_txt = c_p/1000
|
||||
ax.vlines(p[0], p[1],f_wb_p, lw=3, color=dlpurple, ls='dotted', label=label)
|
||||
label='' #just one
|
||||
cxy = [p[0], p[1] + (f_wb_p-p[1])/2]
|
||||
ax.annotate(f'{c_p_txt:0.0f}', xy=cxy, xycoords='data',color=dlpurple,
|
||||
xytext=(5, 0), textcoords='offset points')
|
||||
cstr += f"{c_p_txt:0.0f} +"
|
||||
ctot += c_p
|
||||
ctot = ctot/(len(x))
|
||||
cstr = cstr[:-1] + f") = {ctot:0.0f}"
|
||||
ax.text(0.15,0.02,cstr, transform=ax.transAxes, color=dlpurple)
|
||||
|
||||
|
||||
def inbounds(a,b,xlim,ylim):
|
||||
xlow,xhigh = xlim
|
||||
ylow,yhigh = ylim
|
||||
ax, ay = a
|
||||
bx, by = b
|
||||
if (ax > xlow and ax < xhigh) and (bx > xlow and bx < xhigh) \
|
||||
and (ay > ylow and ay < yhigh) and (by > ylow and by < yhigh):
|
||||
return(True)
|
||||
else:
|
||||
return(False)
|
||||
|
||||
from mpl_toolkits.mplot3d import axes3d
|
||||
def plt_contour_wgrad(x, y, hist, ax, w_range=[-100, 500, 5], b_range=[-500, 500, 5],
|
||||
contours = [0.1,50,1000,5000,10000,25000,50000],
|
||||
resolution=5, w_final=200, b_final=100,step=10 ):
|
||||
b0,w0 = np.meshgrid(np.arange(*b_range),np.arange(*w_range))
|
||||
z=np.zeros_like(b0)
|
||||
n,_ = w0.shape
|
||||
for i in range(w0.shape[0]):
|
||||
for j in range(w0.shape[1]):
|
||||
z[i][j] = compute_cost(x, y, w0[i][j], b0[i][j] )
|
||||
|
||||
CS = ax.contour(w0, b0, z, contours, linewidths=2,
|
||||
colors=[dlblue, dlorange, dldarkred, dlmagenta, dlpurple])
|
||||
ax.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
|
||||
ax.set_xlabel("w"); ax.set_ylabel("b")
|
||||
ax.set_title('Contour plot of cost J(w,b), vs b,w with path of gradient descent')
|
||||
w = w_final; b=b_final
|
||||
ax.hlines(b, ax.get_xlim()[0],w, lw=2, color=dlpurple, ls='dotted')
|
||||
ax.vlines(w, ax.get_ylim()[0],b, lw=2, color=dlpurple, ls='dotted')
|
||||
|
||||
base = hist[0]
|
||||
for point in hist[0::step]:
|
||||
edist = np.sqrt((base[0] - point[0])**2 + (base[1] - point[1])**2)
|
||||
if(edist > resolution or point==hist[-1]):
|
||||
if inbounds(point,base, ax.get_xlim(),ax.get_ylim()):
|
||||
plt.annotate('', xy=point, xytext=base,xycoords='data',
|
||||
arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 3},
|
||||
va='center', ha='center')
|
||||
base=point
|
||||
return
|
||||
|
||||
|
||||
# plots p1 vs p2. Prange is an array of entries [min, max, steps]. In feature scaling lab.
|
||||
def plt_contour_multi(x, y, w, b, ax, prange, p1, p2, title="", xlabel="", ylabel=""):
|
||||
contours = [1e2, 2e2,3e2,4e2, 5e2, 6e2, 7e2,8e2,1e3, 1.25e3,1.5e3, 1e4, 1e5, 1e6, 1e7]
|
||||
px,py = np.meshgrid(np.linspace(*(prange[p1])),np.linspace(*(prange[p2])))
|
||||
z=np.zeros_like(px)
|
||||
n,_ = px.shape
|
||||
for i in range(px.shape[0]):
|
||||
for j in range(px.shape[1]):
|
||||
w_ij = w
|
||||
b_ij = b
|
||||
if p1 <= 3: w_ij[p1] = px[i,j]
|
||||
if p1 == 4: b_ij = px[i,j]
|
||||
if p2 <= 3: w_ij[p2] = py[i,j]
|
||||
if p2 == 4: b_ij = py[i,j]
|
||||
|
||||
z[i][j] = compute_cost(x, y, w_ij, b_ij )
|
||||
CS = ax.contour(px, py, z, contours, linewidths=2,
|
||||
colors=[dlblue, dlorange, dldarkred, dlmagenta, dlpurple])
|
||||
ax.clabel(CS, inline=1, fmt='%1.2e', fontsize=10)
|
||||
ax.set_xlabel(xlabel); ax.set_ylabel(ylabel)
|
||||
ax.set_title(title, fontsize=14)
|
||||
|
||||
|
||||
def plt_equal_scale(X_train, X_norm, y_train):
|
||||
fig,ax = plt.subplots(1,2,figsize=(12,5))
|
||||
prange = [
|
||||
[ 0.238-0.045, 0.238+0.045, 50],
|
||||
[-25.77326319-0.045, -25.77326319+0.045, 50],
|
||||
[-50000, 0, 50],
|
||||
[-1500, 0, 50],
|
||||
[0, 200000, 50]]
|
||||
w_best = np.array([0.23844318, -25.77326319, -58.11084634, -1.57727192])
|
||||
b_best = 235
|
||||
plt_contour_multi(X_train, y_train, w_best, b_best, ax[0], prange, 0, 1,
|
||||
title='Unnormalized, J(w,b), vs w[0],w[1]',
|
||||
xlabel= "w[0] (size(sqft))", ylabel="w[1] (# bedrooms)")
|
||||
#
|
||||
w_best = np.array([111.1972, -16.75480051, -28.51530411, -37.17305735])
|
||||
b_best = 376.949151515151
|
||||
prange = [[ 111-50, 111+50, 75],
|
||||
[-16.75-50,-16.75+50, 75],
|
||||
[-28.5-8, -28.5+8, 50],
|
||||
[-37.1-16,-37.1+16, 50],
|
||||
[376-150, 376+150, 50]]
|
||||
plt_contour_multi(X_norm, y_train, w_best, b_best, ax[1], prange, 0, 1,
|
||||
title='Normalized, J(w,b), vs w[0],w[1]',
|
||||
xlabel= "w[0] (normalized size(sqft))", ylabel="w[1] (normalized # bedrooms)")
|
||||
fig.suptitle("Cost contour with equal scale", fontsize=18)
|
||||
#plt.tight_layout(rect=(0,0,1.05,1.05))
|
||||
fig.tight_layout(rect=(0,0,1,0.95))
|
||||
plt.show()
|
||||
|
||||
def plt_divergence(p_hist, J_hist, x_train,y_train):
|
||||
|
||||
x=np.zeros(len(p_hist))
|
||||
y=np.zeros(len(p_hist))
|
||||
v=np.zeros(len(p_hist))
|
||||
for i in range(len(p_hist)):
|
||||
x[i] = p_hist[i][0]
|
||||
y[i] = p_hist[i][1]
|
||||
v[i] = J_hist[i]
|
||||
|
||||
fig = plt.figure(figsize=(12,5))
|
||||
plt.subplots_adjust( wspace=0 )
|
||||
gs = fig.add_gridspec(1, 5)
|
||||
fig.suptitle(f"Cost escalates when learning rate is too large")
|
||||
#===============
|
||||
# First subplot
|
||||
#===============
|
||||
ax = fig.add_subplot(gs[:2], )
|
||||
|
||||
# Print w vs cost to see minimum
|
||||
fix_b = 100
|
||||
w_array = np.arange(-70000, 70000, 1000)
|
||||
cost = np.zeros_like(w_array)
|
||||
|
||||
for i in range(len(w_array)):
|
||||
tmp_w = w_array[i]
|
||||
cost[i] = compute_cost(x_train, y_train, tmp_w, fix_b)
|
||||
|
||||
ax.plot(w_array, cost)
|
||||
ax.plot(x,v, c=dlmagenta)
|
||||
ax.set_title("Cost vs w, b set to 100")
|
||||
ax.set_ylabel('Cost')
|
||||
ax.set_xlabel('w')
|
||||
ax.xaxis.set_major_locator(MaxNLocator(2))
|
||||
|
||||
#===============
|
||||
# Second Subplot
|
||||
#===============
|
||||
|
||||
tmp_b,tmp_w = np.meshgrid(np.arange(-35000, 35000, 500),np.arange(-70000, 70000, 500))
|
||||
z=np.zeros_like(tmp_b)
|
||||
for i in range(tmp_w.shape[0]):
|
||||
for j in range(tmp_w.shape[1]):
|
||||
z[i][j] = compute_cost(x_train, y_train, tmp_w[i][j], tmp_b[i][j] )
|
||||
|
||||
ax = fig.add_subplot(gs[2:], projection='3d')
|
||||
ax.plot_surface(tmp_w, tmp_b, z, alpha=0.3, color=dlblue)
|
||||
ax.xaxis.set_major_locator(MaxNLocator(2))
|
||||
ax.yaxis.set_major_locator(MaxNLocator(2))
|
||||
|
||||
ax.set_xlabel('w', fontsize=16)
|
||||
ax.set_ylabel('b', fontsize=16)
|
||||
ax.set_zlabel('\ncost', fontsize=16)
|
||||
plt.title('Cost vs (b, w)')
|
||||
# Customize the view angle
|
||||
ax.view_init(elev=20., azim=-65)
|
||||
ax.plot(x, y, v,c=dlmagenta)
|
||||
|
||||
return
|
||||
|
||||
# draw derivative line
|
||||
# y = m*(x - x1) + y1
|
||||
def add_line(dj_dx, x1, y1, d, ax):
|
||||
x = np.linspace(x1-d, x1+d,50)
|
||||
y = dj_dx*(x - x1) + y1
|
||||
ax.scatter(x1, y1, color=dlblue, s=50)
|
||||
ax.plot(x, y, '--', c=dldarkred,zorder=10, linewidth = 1)
|
||||
xoff = 30 if x1 == 200 else 10
|
||||
ax.annotate(r"$\frac{\partial J}{\partial w}$ =%d" % dj_dx, fontsize=14,
|
||||
xy=(x1, y1), xycoords='data',
|
||||
xytext=(xoff, 10), textcoords='offset points',
|
||||
arrowprops=dict(arrowstyle="->"),
|
||||
horizontalalignment='left', verticalalignment='top')
|
||||
|
||||
def plt_gradients(x_train,y_train, f_compute_cost, f_compute_gradient):
|
||||
#===============
|
||||
# First subplot
|
||||
#===============
|
||||
fig,ax = plt.subplots(1,2,figsize=(12,4))
|
||||
|
||||
# Print w vs cost to see minimum
|
||||
fix_b = 100
|
||||
w_array = np.linspace(-100, 500, 50)
|
||||
w_array = np.linspace(0, 400, 50)
|
||||
cost = np.zeros_like(w_array)
|
||||
|
||||
for i in range(len(w_array)):
|
||||
tmp_w = w_array[i]
|
||||
cost[i] = f_compute_cost(x_train, y_train, tmp_w, fix_b)
|
||||
ax[0].plot(w_array, cost,linewidth=1)
|
||||
ax[0].set_title("Cost vs w, with gradient; b set to 100")
|
||||
ax[0].set_ylabel('Cost')
|
||||
ax[0].set_xlabel('w')
|
||||
|
||||
# plot lines for fixed b=100
|
||||
for tmp_w in [100,200,300]:
|
||||
fix_b = 100
|
||||
dj_dw,dj_db = f_compute_gradient(x_train, y_train, tmp_w, fix_b )
|
||||
j = f_compute_cost(x_train, y_train, tmp_w, fix_b)
|
||||
add_line(dj_dw, tmp_w, j, 30, ax[0])
|
||||
|
||||
#===============
|
||||
# Second Subplot
|
||||
#===============
|
||||
|
||||
tmp_b,tmp_w = np.meshgrid(np.linspace(-200, 200, 10), np.linspace(-100, 600, 10))
|
||||
U = np.zeros_like(tmp_w)
|
||||
V = np.zeros_like(tmp_b)
|
||||
for i in range(tmp_w.shape[0]):
|
||||
for j in range(tmp_w.shape[1]):
|
||||
U[i][j], V[i][j] = f_compute_gradient(x_train, y_train, tmp_w[i][j], tmp_b[i][j] )
|
||||
X = tmp_w
|
||||
Y = tmp_b
|
||||
n=-2
|
||||
color_array = np.sqrt(((V-n)/2)**2 + ((U-n)/2)**2)
|
||||
|
||||
ax[1].set_title('Gradient shown in quiver plot')
|
||||
Q = ax[1].quiver(X, Y, U, V, color_array, units='width', )
|
||||
qk = ax[1].quiverkey(Q, 0.9, 0.9, 2, r'$2 \frac{m}{s}$', labelpos='E',coordinates='figure')
|
||||
ax[1].set_xlabel("w"); ax[1].set_ylabel("b")
|
||||
|
||||
def norm_plot(ax, data):
|
||||
scale = (np.max(data) - np.min(data))*0.2
|
||||
x = np.linspace(np.min(data)-scale,np.max(data)+scale,50)
|
||||
_,bins, _ = ax.hist(data, x, color="xkcd:azure")
|
||||
#ax.set_ylabel("Count")
|
||||
|
||||
mu = np.mean(data);
|
||||
std = np.std(data);
|
||||
dist = norm.pdf(bins, loc=mu, scale = std)
|
||||
|
||||
axr = ax.twinx()
|
||||
axr.plot(bins,dist, color = "orangered", lw=2)
|
||||
axr.set_ylim(bottom=0)
|
||||
axr.axis('off')
|
||||
|
||||
def plot_cost_i_w(X,y,hist):
|
||||
ws = np.array([ p[0] for p in hist["params"]])
|
||||
rng = max(abs(ws[:,0].min()),abs(ws[:,0].max()))
|
||||
wr = np.linspace(-rng+0.27,rng+0.27,20)
|
||||
cst = [compute_cost(X,y,np.array([wr[i],-32, -67, -1.46]), 221) for i in range(len(wr))]
|
||||
|
||||
fig,ax = plt.subplots(1,2,figsize=(12,3))
|
||||
ax[0].plot(hist["iter"], (hist["cost"])); ax[0].set_title("Cost vs Iteration")
|
||||
ax[0].set_xlabel("iteration"); ax[0].set_ylabel("Cost")
|
||||
ax[1].plot(wr, cst); ax[1].set_title("Cost vs w[0]")
|
||||
ax[1].set_xlabel("w[0]"); ax[1].set_ylabel("Cost")
|
||||
ax[1].plot(ws[:,0],hist["cost"])
|
||||
plt.show()
|
||||
|
||||
|
||||
##########################################################
|
||||
# Regression Routines
|
||||
##########################################################
|
||||
|
||||
def compute_gradient_matrix(X, y, w, b):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
|
||||
Args:
|
||||
X : (array_like Shape (m,n)) variable such as house size
|
||||
y : (array_like Shape (m,1)) actual value
|
||||
w : (array_like Shape (n,1)) Values of parameters of the model
|
||||
b : (scalar ) Values of parameter of the model
|
||||
Returns
|
||||
dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w.
|
||||
dj_db: (scalar) The gradient of the cost w.r.t. the parameter b.
|
||||
|
||||
"""
|
||||
m,n = X.shape
|
||||
f_wb = X @ w + b
|
||||
e = f_wb - y
|
||||
dj_dw = (1/m) * (X.T @ e)
|
||||
dj_db = (1/m) * np.sum(e)
|
||||
|
||||
return dj_db,dj_dw
|
||||
|
||||
#Function to calculate the cost
|
||||
def compute_cost_matrix(X, y, w, b, verbose=False):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
Args:
|
||||
X : (array_like Shape (m,n)) variable such as house size
|
||||
y : (array_like Shape (m,)) actual value
|
||||
w : (array_like Shape (n,)) parameters of the model
|
||||
b : (scalar ) parameter of the model
|
||||
verbose : (Boolean) If true, print out intermediate value f_wb
|
||||
Returns
|
||||
cost: (scalar)
|
||||
"""
|
||||
m,n = X.shape
|
||||
|
||||
# calculate f_wb for all examples.
|
||||
f_wb = X @ w + b
|
||||
# calculate cost
|
||||
total_cost = (1/(2*m)) * np.sum((f_wb-y)**2)
|
||||
|
||||
if verbose: print("f_wb:")
|
||||
if verbose: print(f_wb)
|
||||
|
||||
return total_cost
|
||||
|
||||
# Loop version of multi-variable compute_cost
|
||||
def compute_cost(X, y, w, b):
|
||||
"""
|
||||
compute cost
|
||||
Args:
|
||||
X : (ndarray): Shape (m,n) matrix of examples with multiple features
|
||||
w : (ndarray): Shape (n) parameters for prediction
|
||||
b : (scalar): parameter for prediction
|
||||
Returns
|
||||
cost: (scalar) cost
|
||||
"""
|
||||
m = X.shape[0]
|
||||
cost = 0.0
|
||||
for i in range(m):
|
||||
f_wb_i = np.dot(X[i],w) + b
|
||||
cost = cost + (f_wb_i - y[i])**2
|
||||
cost = cost/(2*m)
|
||||
return(np.squeeze(cost))
|
||||
|
||||
def compute_gradient(X, y, w, b):
|
||||
"""
|
||||
Computes the gradient for linear regression
|
||||
Args:
|
||||
X : (ndarray Shape (m,n)) matrix of examples
|
||||
y : (ndarray Shape (m,)) target value of each example
|
||||
w : (ndarray Shape (n,)) parameters of the model
|
||||
b : (scalar) parameter of the model
|
||||
Returns
|
||||
dj_dw : (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w.
|
||||
dj_db : (scalar) The gradient of the cost w.r.t. the parameter b.
|
||||
"""
|
||||
m,n = X.shape #(number of examples, number of features)
|
||||
dj_dw = np.zeros((n,))
|
||||
dj_db = 0.
|
||||
|
||||
for i in range(m):
|
||||
err = (np.dot(X[i], w) + b) - y[i]
|
||||
for j in range(n):
|
||||
dj_dw[j] = dj_dw[j] + err * X[i,j]
|
||||
dj_db = dj_db + err
|
||||
dj_dw = dj_dw/m
|
||||
dj_db = dj_db/m
|
||||
|
||||
return dj_db,dj_dw
|
||||
|
||||
#This version saves more values and is more verbose than the assigment versons
|
||||
def gradient_descent_houses(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
|
||||
"""
|
||||
Performs batch gradient descent to learn theta. Updates theta by taking
|
||||
num_iters gradient steps with learning rate alpha
|
||||
|
||||
Args:
|
||||
X : (array_like Shape (m,n) matrix of examples
|
||||
y : (array_like Shape (m,)) target value of each example
|
||||
w_in : (array_like Shape (n,)) Initial values of parameters of the model
|
||||
b_in : (scalar) Initial value of parameter of the model
|
||||
cost_function: function to compute cost
|
||||
gradient_function: function to compute the gradient
|
||||
alpha : (float) Learning rate
|
||||
num_iters : (int) number of iterations to run gradient descent
|
||||
Returns
|
||||
w : (array_like Shape (n,)) Updated values of parameters of the model after
|
||||
running gradient descent
|
||||
b : (scalar) Updated value of parameter of the model after
|
||||
running gradient descent
|
||||
"""
|
||||
|
||||
# number of training examples
|
||||
m = len(X)
|
||||
|
||||
# An array to store values at each iteration primarily for graphing later
|
||||
hist={}
|
||||
hist["cost"] = []; hist["params"] = []; hist["grads"]=[]; hist["iter"]=[];
|
||||
|
||||
w = copy.deepcopy(w_in) #avoid modifying global w within function
|
||||
b = b_in
|
||||
save_interval = np.ceil(num_iters/10000) # prevent resource exhaustion for long runs
|
||||
|
||||
print(f"Iteration Cost w0 w1 w2 w3 b djdw0 djdw1 djdw2 djdw3 djdb ")
|
||||
print(f"---------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|")
|
||||
|
||||
for i in range(num_iters):
|
||||
|
||||
# Calculate the gradient and update the parameters
|
||||
dj_db,dj_dw = gradient_function(X, y, w, b)
|
||||
|
||||
# Update Parameters using w, b, alpha and gradient
|
||||
w = w - alpha * dj_dw
|
||||
b = b - alpha * dj_db
|
||||
|
||||
# Save cost J,w,b at each save interval for graphing
|
||||
if i == 0 or i % save_interval == 0:
|
||||
hist["cost"].append(cost_function(X, y, w, b))
|
||||
hist["params"].append([w,b])
|
||||
hist["grads"].append([dj_dw,dj_db])
|
||||
hist["iter"].append(i)
|
||||
|
||||
# Print cost every at intervals 10 times or as many iterations if < 10
|
||||
if i% math.ceil(num_iters/10) == 0:
|
||||
#print(f"Iteration {i:4d}: Cost {cost_function(X, y, w, b):8.2f} ")
|
||||
cst = cost_function(X, y, w, b)
|
||||
print(f"{i:9d} {cst:0.5e} {w[0]: 0.1e} {w[1]: 0.1e} {w[2]: 0.1e} {w[3]: 0.1e} {b: 0.1e} {dj_dw[0]: 0.1e} {dj_dw[1]: 0.1e} {dj_dw[2]: 0.1e} {dj_dw[3]: 0.1e} {dj_db: 0.1e}")
|
||||
|
||||
return w, b, hist #return w,b and history for graphing
|
||||
|
||||
def run_gradient_descent(X,y,iterations=1000, alpha = 1e-6):
|
||||
|
||||
m,n = X.shape
|
||||
# initialize parameters
|
||||
initial_w = np.zeros(n)
|
||||
initial_b = 0
|
||||
# run gradient descent
|
||||
w_out, b_out, hist_out = gradient_descent_houses(X ,y, initial_w, initial_b,
|
||||
compute_cost, compute_gradient_matrix, alpha, iterations)
|
||||
print(f"w,b found by gradient descent: w: {w_out}, b: {b_out:0.2f}")
|
||||
|
||||
return(w_out, b_out, hist_out)
|
||||
|
||||
# compact extaction of hist data
|
||||
#x = hist["iter"]
|
||||
#J = np.array([ p for p in hist["cost"]])
|
||||
#ws = np.array([ p[0] for p in hist["params"]])
|
||||
#dj_ws = np.array([ p[0] for p in hist["grads"]])
|
||||
|
||||
#bs = np.array([ p[1] for p in hist["params"]])
|
||||
|
||||
def run_gradient_descent_feng(X,y,iterations=1000, alpha = 1e-6):
|
||||
m,n = X.shape
|
||||
# initialize parameters
|
||||
initial_w = np.zeros(n)
|
||||
initial_b = 0
|
||||
# run gradient descent
|
||||
w_out, b_out, hist_out = gradient_descent(X ,y, initial_w, initial_b,
|
||||
compute_cost, compute_gradient_matrix, alpha, iterations)
|
||||
print(f"w,b found by gradient descent: w: {w_out}, b: {b_out:0.4f}")
|
||||
|
||||
return(w_out, b_out)
|
||||
|
||||
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
|
||||
"""
|
||||
Performs batch gradient descent to learn theta. Updates theta by taking
|
||||
num_iters gradient steps with learning rate alpha
|
||||
|
||||
Args:
|
||||
X : (array_like Shape (m,n) matrix of examples
|
||||
y : (array_like Shape (m,)) target value of each example
|
||||
w_in : (array_like Shape (n,)) Initial values of parameters of the model
|
||||
b_in : (scalar) Initial value of parameter of the model
|
||||
cost_function: function to compute cost
|
||||
gradient_function: function to compute the gradient
|
||||
alpha : (float) Learning rate
|
||||
num_iters : (int) number of iterations to run gradient descent
|
||||
Returns
|
||||
w : (array_like Shape (n,)) Updated values of parameters of the model after
|
||||
running gradient descent
|
||||
b : (scalar) Updated value of parameter of the model after
|
||||
running gradient descent
|
||||
"""
|
||||
|
||||
# number of training examples
|
||||
m = len(X)
|
||||
|
||||
# An array to store values at each iteration primarily for graphing later
|
||||
hist={}
|
||||
hist["cost"] = []; hist["params"] = []; hist["grads"]=[]; hist["iter"]=[];
|
||||
|
||||
w = copy.deepcopy(w_in) #avoid modifying global w within function
|
||||
b = b_in
|
||||
save_interval = np.ceil(num_iters/10000) # prevent resource exhaustion for long runs
|
||||
|
||||
for i in range(num_iters):
|
||||
|
||||
# Calculate the gradient and update the parameters
|
||||
dj_db,dj_dw = gradient_function(X, y, w, b)
|
||||
|
||||
# Update Parameters using w, b, alpha and gradient
|
||||
w = w - alpha * dj_dw
|
||||
b = b - alpha * dj_db
|
||||
|
||||
# Save cost J,w,b at each save interval for graphing
|
||||
if i == 0 or i % save_interval == 0:
|
||||
hist["cost"].append(cost_function(X, y, w, b))
|
||||
hist["params"].append([w,b])
|
||||
hist["grads"].append([dj_dw,dj_db])
|
||||
hist["iter"].append(i)
|
||||
|
||||
# Print cost every at intervals 10 times or as many iterations if < 10
|
||||
if i% math.ceil(num_iters/10) == 0:
|
||||
#print(f"Iteration {i:4d}: Cost {cost_function(X, y, w, b):8.2f} ")
|
||||
cst = cost_function(X, y, w, b)
|
||||
print(f"Iteration {i:9d}, Cost: {cst:0.5e}")
|
||||
return w, b, hist #return w,b and history for graphing
|
||||
|
||||
def load_house_data():
|
||||
data = np.loadtxt("./data/houses.txt", delimiter=',', skiprows=1)
|
||||
X = data[:,:4]
|
||||
y = data[:,4]
|
||||
return X, y
|
||||
|
||||
def zscore_normalize_features(X,rtn_ms=False):
|
||||
"""
|
||||
returns z-score normalized X by column
|
||||
Args:
|
||||
X : (numpy array (m,n))
|
||||
Returns
|
||||
X_norm: (numpy array (m,n)) input normalized by column
|
||||
"""
|
||||
mu = np.mean(X,axis=0)
|
||||
sigma = np.std(X,axis=0)
|
||||
X_norm = (X - mu)/sigma
|
||||
|
||||
if rtn_ms:
|
||||
return(X_norm, mu, sigma)
|
||||
else:
|
||||
return(X_norm)
|
||||
|
||||
|
||||