1
import numpy as np

目标

使用numpy实现多层感知机的正向和反向传播

层次构建

全连接层

正向传播

正向传播的公式为:$Y = f(W \times X + b)$,其中,Y为输出,W为权值,b为偏置

反向传播

对于反向传播,已知上一层传回的梯度为dY,对应的反向传播公式为:

代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class numpy_fc(object):

def __init__(self, in_channel, out_channel, optim):
self.weight = np.float64(np.random.randn(out_channel, in_channel) * 0.1)
self.bias = np.zeros((out_channel, 1),dtype=np.float64)
self.in_data = np.zeros((1, in_channel))
self.out_data = None
self.weight_grad = None
self.bias_grad = None
self.optimizer = optim

def forward(self, data):
self.in_data = data
self.out_data = np.dot(self.weight, data) + self.bias
return self.out_data

def backward(self, grad):
data_grad = np.dot(self.weight.T, grad)
self.weight_grad = np.dot(grad, self.in_data.T)
self.bias_grad = np.sum(grad, axis=1).reshape((-1,1))
return data_grad

def step(self):
# print(self.bias_grad.shape,self.bias.shape)
self.weight += self.optimizer(self.weight_grad)
self.bias += self.optimizer(self.bias_grad)

代码测试

1
2
3
4
5
6
7
test_fc = numpy_fc(16,8,None)
test_fc_forward = test_fc.forward(np.random.rand(16,10))
print(test_fc_forward.shape)
test_fc_back = test_fc.backward(test_fc_forward)
print(test_fc_back.shape)
print(test_fc.weight_grad.shape,test_fc.weight.shape)
print(test_fc.bias_grad.shape,test_fc.bias.shape)
(8, 10)
(16, 10)
(8, 16) (8, 16)
(8, 1) (8, 1)

激活函数

sigmoid函数

sigmoid函数是常用的二分类问题输出层激活函数,前向传播和反向传播分别如下所示:

1
2
3
4
5
6
7
8
9
10
11
12
13
class numpy_sigmoid(object):
def __init__(self):
self.result = None

def forward(self,data):
self.result = 1 / (1 + np.exp(-data))
return self.result

def backward(self,grad):
return grad * self.result * (1 - self.result)

def step(self):
pass

relu函数

relu是现阶段最常用的隐层激活函数,前向传播和反向传播如下所示

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class numpy_relu(object):
def __init__(self):
self.result = None

def forward(self,data):
self.result = data
self.result[data < 0] = 0
return self.result

def backward(self,grad):
relu_grad = self.result
relu_grad[self.result > 0] = 1
return grad * relu_grad

def step(self):
pass

其他组件构建

代价函数

MES

MES代价函数的前向传播和反向传播为:

1
2
3
4
def MES_loss(y_pre,y):
loss = np.sum((y_pre - y) ** 2)
loss_back = np.abs(y_pre - y)
return loss,loss_back

交叉熵

交叉熵的前向传播和反向传播分别为:

1
2
3
4
def Cross_loss(y_pre,y):
loss = -np.sum(y*np.log(y_pre)+(1-y)*np.log(1-y_pre))
loss_back = y/y_pre + (1-y)/(1-y_pre)
return loss,-loss_back

带交叉熵的softmax函数

softmax函数是多分类问题常用的输出激活函数,一般与交叉熵代价函数结合使用,组合函数(softmax+交叉熵)的前向传播如下:

反向传播如下:

详细推导可参见这里

1
2
3
4
5
6
7
8
def Softmax_cross_loss(y_pre,y):
softmax = np.exp(y_pre) / np.sum(np.exp(y_pre),axis=0)
# print(np.sum(np.exp(y_pre),axis=1,keepdims=True))
# print(np.sum(softmax,axis=0))
# print(softmax)
loss = - np.sum(y * np.log(softmax))
loss_back = softmax - y
return loss,loss_back
1
Softmax_cross_loss(np.random.randn(2,4),np.random.randn(2,4))
(-4.9084963417988003,
 array([[-0.09065384,  0.07506358,  0.32789286,  1.26735185],
        [ 1.93958915,  0.01316283,  1.20922904,  2.87550082]]))

优化器SGD

随机梯度下降优化器是一种比较简单的优化方法,优化公式如下:

1
2
3
4
5
6
7
class optim_sgd(object):
def __init__(self,learning_rate):
super(optim_sgd,self).__init__()
self.learning_rate = learning_rate

def __call__(self,grad):
return -self.learning_rate * grad

其他组件

导入数据集——乳腺癌数据集

下载数据集

1
2
import re
import pandas as pd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
data_label = """ 1. Sample code number 1id number
2. Clump Thickness 1 - 10
3. Uniformity of Cell Size 1 - 10
4. Uniformity of Cell Shape 1 - 10
5. Marginal Adhesion 1 - 10
6. Single Epithelial Cell Size 1 - 10
7. Bare Nuclei 1 - 10
8. Bland Chromatin 1 - 10
9. Normal Nucleoli 1 - 10
10. Mitoses 1 - 10
11. Class 2 for benign, 4 for malignant)
"""
data_label = [re.sub(r"\s+\d","",x[2:]) for x in re.findall(r"\. [\w\s]+\d",data_label)]
# print(data_label)
data = pd.read_csv(data_url,names=data_label)
# data["Bare Nuclei"] = data["Bare Nuclei"].map(int)
print(data.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
Sample code numberid number    699 non-null int64
Clump Thickness                699 non-null int64
Uniformity of Cell Size        699 non-null int64
Uniformity of Cell Shape       699 non-null int64
Marginal Adhesion              699 non-null int64
Single Epithelial Cell Size    699 non-null int64
Bare Nuclei                    699 non-null object
Bland Chromatin                699 non-null int64
Normal Nucleoli                699 non-null int64
Mitoses                        699 non-null int64
Class                          699 non-null int64
dtypes: int64(10), object(1)
memory usage: 60.1+ KB
None

清洗数据集

1
2
3
4
data = data.replace(to_replace="?",value=np.nan)
data = data.dropna(how='any')
data["Bare Nuclei"] = data["Bare Nuclei"].map(int)
print(data.info())
<class 'pandas.core.frame.DataFrame'>
Int64Index: 683 entries, 0 to 698
Data columns (total 11 columns):
Sample code numberid number    683 non-null int64
Clump Thickness                683 non-null int64
Uniformity of Cell Size        683 non-null int64
Uniformity of Cell Shape       683 non-null int64
Marginal Adhesion              683 non-null int64
Single Epithelial Cell Size    683 non-null int64
Bare Nuclei                    683 non-null int64
Bland Chromatin                683 non-null int64
Normal Nucleoli                683 non-null int64
Mitoses                        683 non-null int64
Class                          683 non-null int64
dtypes: int64(11)
memory usage: 64.0 KB
None

切分数据集

1
2
3
4
5
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(data[data_label[1:10]],data[data_label[10]],test_size=0.25,random_state=1)
print(x_train.shape,x_test.shape)
print(y_train.shape)
print(pd.value_counts(y_train))
(512, 9) (171, 9)
(512,)
2    333
4    179
Name: Class, dtype: int64

标准化

1
2
3
4
5
6
7
8
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
x_train_ss = ss.fit_transform(x_train)
x_test_ss = ss.transform(x_test)
# x_train_ss = x_train.values
# x_test_ss = x_test.values
print(type(x_train_ss))
# print(x_train[:5]/,x_train_ss[:5])
<class 'numpy.ndarray'>

1
2
3
4
5
6
def y_standard(data):
return (data / 2) - 1
y_train_ss = y_standard(y_train).values
y_test_ss = y_standard(y_test).values
print(pd.value_counts(y_train_ss))
print(pd.value_counts(y_test_ss))
0.0    333
1.0    179
dtype: int64
0.0    111
1.0     60
dtype: int64

制作可迭代数据集

1
2
3
4
5
6
import random
def dataset(data,lable,batch_size=100,epoch=10):
for _ in range(epoch):
index = [random.randint(0,data.shape[0]-1) for _ in range(batch_size)]
# print(index)
yield data[index],lable[index]
1
2
3
4
# print(x_train_ss,type(y_train_ss))
for i in dataset(x_train_ss,y_train_ss,batch_size=100):
print(i[0].shape,i[1].shape)
break
(100, 9) (100,)

独热码编码

1
2
3
4
5
def onehot(data,tp_num):
x = np.zeros((data.shape[0],tp_num))
for i in range(data.shape[0]):
x[i][int(data[i])] = 1
return x
1
2
test_onehot = np.arange(2)
onehot(test_onehot,2)
array([[ 1.,  0.],
       [ 0.,  1.]])

网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class numpy_network_base(object):
def __init__(self,network_list):
self.network = network_list

def forward(self,x):
for layer in self.network:
x = layer.forward(x)
return x

def backward(self,grad):
last_grad = grad.copy()
for layer in self.network[::-1]:
last_grad = layer.backward(last_grad)
return last_grad

def step(self):
for layer in self.network:
layer.step()

准确率计算

1
2
3
def accuracy(y_pre,lable):
y_pre = np.argmax(y_pre,axis=0)
return np.mean(np.int8(y_pre == lable))
1
2
3
a = np.arange(4*8).reshape((4,8))
b = np.ones((1,8)) * 3
accuracy(a,b)
1.0

网络训练与测试

网络搭建

1
2
3
4
5
6
7
8
9
10
11
12
13
network = numpy_network_base([numpy_fc(9,20,optim_sgd(0.001)),numpy_relu(),numpy_fc(20,2,optim_sgd(0.001))])
for i,(din,lable) in enumerate(dataset(x_train_ss,y_train_ss,epoch=10,batch_size=100)):
# print(din)
result = network.forward(din.T)
# print(result)
# print(np.argmax(result,axis=0),lable)
loss,grad = Softmax_cross_loss(result.T,onehot(lable,2))
# print(loss)
# print(pd.get_dummies(lable))
# print(grad.shape)
print(accuracy(result,lable))
network.backward(grad.T)
network.step()
0.19
0.32
0.86
0.96
0.94
0.93
0.9
0.96
0.98
0.95

网络测试

1
2
result = network.forward(x_test_ss.T)
print(accuracy(result,y_test_ss))
0.982456140351