# [SemiFlow 动手实现深度学习框架 01] 从一个例子开始

2020年3月24日 3146点热度 0人点赞 0条评论

## 数据实例

Contents

import numpy as np
import matplotlib.pyplot as plt
from Dense import Dense

Asamples = np.random.multivariate_normal([6, 6], [[1, 0], [0, 1]], 200)
Bsamples = np.random.multivariate_normal([1, 1], [[1, 0], [0, 1]], 200)
plt.figure()
plt.plot(Asamples[:, 0], Asamples[:, 1], 'r.')
plt.plot(Bsamples[:, 0], Bsamples[:, 1], 'b.')
plt.show()


x_train = np.vstack((Asamples, Bsamples))
y_train = np.append(np.ones(200), np.zeros(200))


x_{train}的大小为2 \times 400

y_{train}的大小为1\times 400

## 神经网络的基本要素

def train(self, x_train, y_train, learning_rate=0.01):
""" Train neural network"""
pass

def evaluate(self, x_test, y_test):
""" Evaluate test data"""
pass


class Dense:
def __init__(self):
""" Do something """
pass

def train(self, x_train, y_train, learning_rate=0.01):
""" Train neural network"""
pass

def evaluate(self, x_test, y_test):
""" Evaluate test data"""
pass


H^1 = h(W^1 x + b^1)

Z = W^3 H^1 +b^2

g = \frac{1}{1+e^{-z}}

# offset in first layer
self._b11 = np.random.rand(1)
self._b12 = np.random.rand(1)
self._b13 = np.random.rand(1)
self._b14 = np.random.rand(1)
# x1 to 4 hidden nodes
self._w111 = np.random.rand(1)
self._w112 = np.random.rand(1)
self._w113 = np.random.rand(1)
self._w114 = np.random.rand(1)
# x2 to 4 hidden nodes
self._w121 = np.random.rand(1)
self._w122 = np.random.rand(1)
self._w123 = np.random.rand(1)
self._w124 = np.random.rand(1)
# 4 hidden nodes to output node
self._w211 = np.random.rand(1)
self._w221 = np.random.rand(1)
self._w231 = np.random.rand(1)
self._w241 = np.random.rand(1)
# offset in second layer
self._b2 = np.random.rand(1)
self._logits = np.random.rand(1)
self._h1 = np.random.rand(1)
self._h2 = np.random.rand(1)
self._h3 = np.random.rand(1)
self._h4 = np.random.rand(1)
self._o1 = np.random.rand(1)


Ok, 我们有了网络的结构，下来就是训练的设置了。我们还需要

• Loss function 用来描述网络性能差距
• ForwardPropagation 用来从input开始到output计算网络的参数值

• BackPropagation 用来从output到input依据导数的链式法则更新参数值

• Learning_rate 用来控制参数更新的步长

L(\mathbf{y}, \mathbf{p})=-\sum_{m=1}^{M} y_{m} \ln g_{m}

def _ForwardPropagation(self, x):
self._x1 = x[0]
self._x2 = x[1]
self._h1 = max(0, self._w111 * x[0] + self._w121 * x[1] + self._b11)
self._h2 = max(0, self._w112 * x[0] + self._w122 * x[1] + self._b12)
self._h3 = max(0, self._w113 * x[0] + self._w123 * x[1] + self._b13)
self._h4 = max(0, self._w114 * x[0] + self._w124 * x[1] + self._b14)
self._logits = self._b2 + self._w211 * self._h1 + self._w221 * self._h2 + self._w231 * self._h3 + self._w241 * self._h4
self._pred = sigmoid(self._logits)


def _BackPropagation(self, y_true, learning_rate):

d_L_d_logits = self._pred - y_true

d_L_d_W211 = d_L_d_logits * self._h1
d_L_d_W221 = d_L_d_logits * self._h2
d_L_d_W231 = d_L_d_logits * self._h3
d_L_d_W241 = d_L_d_logits * self._h4
d_L_d_b2 = d_L_d_logits

d_L_d_h1 = d_L_d_logits * self._w211
d_L_d_h2 = d_L_d_logits * self._w221
d_L_d_h3 = d_L_d_logits * self._w231
d_L_d_h4 = d_L_d_logits * self._w241

d_h1_d_h1 = 1
d_h1_d_w111 = d_h1_d_h1 * self._x1
d_h1_d_w121 = d_h1_d_h1 * self._x2
d_h1_d_b11 = d_h1_d_h1

d_h2_d_h2 = 1
d_h2_d_w112 = d_h2_d_h2 * self._x1
d_h2_d_w122 = d_h2_d_h2 * self._x2
d_h2_d_b12 = d_h2_d_h2

d_h3_d_h3 = 1
d_h3_d_w113 = d_h3_d_h3 * self._x1
d_h3_d_w123 = d_h3_d_h3 * self._x2
d_h3_d_b13 = d_h3_d_h3

d_h4_d_h4 = 1
d_h4_d_w114 = d_h4_d_h4 * self._x1
d_h4_d_w124 = d_h4_d_h4 * self._x2
d_h4_d_b14 = d_h4_d_h4

# Update parameters

self._w111 -= learning_rate * d_L_d_h1 * d_h1_d_w111
self._w121 -= learning_rate * d_L_d_h1 * d_h1_d_w121
self._w112 -= learning_rate * d_L_d_h2 * d_h2_d_w112
self._w122 -= learning_rate * d_L_d_h2 * d_h2_d_w122
self._w113 -= learning_rate * d_L_d_h3 * d_h3_d_w113
self._w123 -= learning_rate * d_L_d_h3 * d_h3_d_w123
self._w114 -= learning_rate * d_L_d_h4 * d_h4_d_w114
self._w124 -= learning_rate * d_L_d_h4 * d_h4_d_w124

self._b11 -= learning_rate * d_L_d_h1 * d_h1_d_b11
self._b12 -= learning_rate * d_L_d_h2 * d_h2_d_b12
self._b13 -= learning_rate * d_L_d_h3 * d_h3_d_b13
self._b14 -= learning_rate * d_L_d_h4 * d_h4_d_b14
self._w211 -= learning_rate * d_L_d_W211
self._w221 -= learning_rate * d_L_d_W221
self._w231 -= learning_rate * d_L_d_W231
self._w241 -= learning_rate * d_L_d_W241
self._b2 -= learning_rate * d_L_d_b2


def train(self, x_train, y_train, learning_rate=0.01, epochs=100):
for epoch in range(epochs):
for i, x in enumerate(x_train):
self._ForwardPropagation(x)
self._BackPropagation(y_train[i], learning_rate)

if epoch % 10 == 0:
preds = np.zeros(y_train.shape[0])
for i in range(y_train.shape[0]):
preds[i] = self._feedforward(x_train[i])

loss = binary_cross_entropy(y_train, preds)
print("[Epoch %2d] loss : %.3f" % (epoch, loss))


evaluate function就更容易了。这里我采用accuracy作为evaluation的measure。

def evaluate(self, x_test, y_test):
preds = np.zeros(y_test.shape[0])
for i in range(y_test.shape[0]):
preds[i] = self._feedforward(x_test[i])
num = 0
for i in range(x_test.shape[0]):
y = 0
if 0.5 <= preds[i] < 1:
y = 1
elif preds[i] >= 0:
y = 0
if y_test[i] == y:
num += 1
accuracy = num/x_test.shape[0]
print("[Train accuracy: %.3f]" % accuracy)


## 测试

[Epoch 0] loss : 4.786
[Epoch 10] loss : 0.061
[Epoch 20] loss : 0.011
[Epoch 30] loss : 0.006
[Epoch 40] loss : 0.004
[Epoch 50] loss : 0.004
[Epoch 60] loss : 0.003
[Epoch 70] loss : 0.003
[Epoch 80] loss : 0.002
[Epoch 90] loss : 0.002
[Epoch 100] loss : 0.002
[Train accuracy: 1.000]

## 其他SemiFlow文章

Dong Wang

A final year master's student in computer science at Uppsala University in Sweden. I am interested in deep learning, computer vision, and optimization. I am actively looking for Ph.D. position.