diff --git a/linear regression/1_cuda.py b/linear regression/1_cuda.py
index be818aa..634e697 100644
--- a/linear regression/1_cuda.py	
+++ b/linear regression/1_cuda.py	
@@ -2,6 +2,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
 
+
 # 线性回归训练代码
 def compute_error_for_line_given_points(b, w, points):
     totalError = 0
@@ -12,6 +13,7 @@ def compute_error_for_line_given_points(b, w, points):
         totalError += (y - (w * x + b)) ** 2
     return totalError / N
 
+
 def step_gradient(b_current, w_current, points, learningRate):
     b_gradient = torch.tensor(0.0, device=points.device)
     w_gradient = torch.tensor(0.0, device=points.device)
@@ -25,25 +27,29 @@ def step_gradient(b_current, w_current, points, learningRate):
     new_w = w_current - (learningRate * w_gradient)
     return [new_b, new_w]
 
+
 def gradient_descent_runner(points, starting_b, starting_w, learningRate, num_iterations):
     b = torch.tensor(starting_b, device=points.device)
     w = torch.tensor(starting_w, device=points.device)
     for i in range(num_iterations):
         b, w = step_gradient(b, w, points, learningRate)
+        print("round:", i)
     return [b, w]
 
+
 def run():
     points_np = np.genfromtxt("data1.csv", delimiter=',').astype(np.float32)
-    points = torch.tensor(points_np, device='cuda')
+    points = torch.tensor(points_np, device='cuda:5')
     learning_rate = 0.0001
     initial_b = 0.0
     initial_w = 0.0
     num_iterations = 100000
     [b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
     print("After gradient descent at b={0}, w={1}, error={2}".format(b.item(), w.item(),
-                                                                   compute_error_for_line_given_points(b, w, points)))
+                                                                     compute_error_for_line_given_points(b, w, points)))
     return b.item(), w.item()
 
+
 # 运行线性回归
 final_b, final_w = run()
 
diff --git a/linear regression/print1.png b/linear regression/print1.png
index 1b6dbdd..153b7af 100644
Binary files a/linear regression/print1.png and b/linear regression/print1.png differ
diff --git a/mnist/README.md b/mnist/README.md
new file mode 100644
index 0000000..0f727ad
--- /dev/null
+++ b/mnist/README.md
@@ -0,0 +1,43 @@
+# No deep learning,just function mapping
+
+$$
+X = [v_1,v_2,.....,v_{784}]\\
+X:[1,dx]
+$$
+
+$$
+H_1 = XW_{1} + b_{1} \\
+W_1:[d_1,dx] \\
+b_1:[d_1]
+$$
+
+$$
+H_2 = H_1W_2 + b_2 \\
+W_1:[d_2,d_1] \\
+b_1:[d_2]
+$$
+
+$$
+H_3=H_2W_3 + b_3 \\
+W_3:[10,d_2]\\
+b_3:[10]
+$$
+
+## Loss
+
+$$
+H_3:[1,d_3] \\
+Y:[0/1/2/.../9] \\
+eg.:1\geq[0,1,0,0,0,0,0,0,0,0,0] \\
+eg.:3\geq[0,0,0,1,0,0,0,0,0,0,0] \\
+Euclidean\ Distance:H_3\ vs\ Y
+$$
+
+
+
+## In a nutshell
+
+$$
+pred = W_3 \times \{W_2\cdot[W_1X+b_1]+b_2\}+b_3
+$$
+