零基础入门深度学习(4) - 卷积神经网络
前向传播:
\begin{align}\notag x_j^l = f(\sum_ {i\in M_j} x_i^{l-1} * k_{ij}^l + b_j^l)\end{align}
def forward(self, input_array): ''' 计算卷积层的输出 输出结果保存在self.output_array ''' self.input_array = input_array self.padded_input_array = padding(input_array, self.zero_padding) for f in range(self.filter_number): filter = self.filters[f] conv(self.padded_input_array, filter.get_weights(), self.output_array[f], self.stride, filter.get_bias()) element_wise_op(self.output_array, self.activator.forward) # 计算卷积 def conv(input_array, kernel_array, output_array, stride, bias): ''' 计算卷积,自动适配输入为2D和3D的情况 ''' channel_number = input_array.ndim output_width = output_array.shape[1] output_height = output_array.shape[0] kernel_width = kernel_array.shape[-1] kernel_height = kernel_array.shape[-2] for i in range(output_height): for j in range(output_width): output_array[i][j] = (get_patch(input_array, i, j, kernel_width, kernel_height, stride) * kernel_array).sum() + bias # 获取卷积区域 def get_patch(input_array, i, j, filter_width, filter_height, stride): ''' 从输入数组中获取本次卷积的区域, 自动适配输入为2D和3D的情况 ''' start_i = i * stride start_j = j * stride if input_array.ndim == 2: return input_array[start_i: start_i + filter_height, start_j: start_j + filter_width] elif input_array.ndim == 3: return input_array[:, start_i: start_i + filter_height, start_j: start_j + filter_width]
前向传播:
\begin{align}\notag x_j^l = f(\beta_j^l down(x_j^{l-1}) + b_j^l)\end{align}
def forward(self, input_array): for d in range(self.channel_number): for i in range(int(self.output_height)): for j in range(int(self.output_width)): self.output_array[d, i, j] = (get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride).max())
反向传播:
\begin{align}\notag\delta_j^l = f^\prime(u_j^l)\circ conv2(\delta_j^{l+1},rot180(k_j^{l+1}),‘full‘)\end{align}
def bp_sensitivity_map(self, sensitivity_array, activator): ''' 计算传递到上一层的sensitivity map sensitivity_array: 本层的sensitivity map activator: 上一层的激活函数 ''' # 处理卷积步长,对原始sensitivity map进行扩展 expanded_array = self.expand_sensitivity_map(sensitivity_array) # full卷积,对sensitivitiy map进行zero padding # 虽然原始输入的zero padding单元也会获得残差 # 但这个残差不需要继续向上传递,因此就不计算了 expanded_width = expanded_array.shape[2] zp = (self.input_width + self.filter_width - 1 - expanded_width) / 2 padded_array = padding(expanded_array, zp) # 初始化delta_array,用于保存传递到上一层的 # sensitivity map self.delta_array = self.create_delta_array() # 对于具有多个filter的卷积层来说,最终传递到上一层的 # sensitivity map相当于所有的filter的 # sensitivity map之和 for f in range(self.filter_number): filter = self.filters[f] # 将filter权重翻转180度 ''' flipped_weights = np.array(map(lambda i: np.rot90(i, 2), filter.get_weights())) ''' flipped_weights = np.rot90(filter.get_weights(), 2, (1, 2)) # 计算与一个filter对应的delta_array delta_array = self.create_delta_array() for d in range(delta_array.shape[0]): conv(padded_array[f], flipped_weights[d], delta_array[d], 1, 0) self.delta_array += delta_array # 将计算结果与激活函数的偏导数做element-wise乘法操作 derivative_array = np.array(self.input_array) element_wise_op(derivative_array, activator.backward) self.delta_array *= derivative_array def expand_sensitivity_map(self, sensitivity_array): depth = sensitivity_array.shape[0] # 确定扩展后sensitivity map的大小 # 计算stride为1时sensitivity map的大小 expanded_width = (self.input_width - self.filter_width + 2 * self.zero_padding + 1) expanded_height = (self.input_height - self.filter_height + 2 * self.zero_padding + 1) # 构建新的sensitivity_map expand_array = np.zeros((depth, expanded_height, expanded_width)) # 从原始sensitivity map拷贝误差值 for i in range(int(self.output_height)): for j in range(int(self.output_width)): i_pos = i * self.stride j_pos = j * self.stride expand_array[:, i_pos, j_pos] = sensitivity_array[:, i, j] return expand_array def create_delta_array(self): return np.zeros((self.channel_number, self.input_height, self.input_width))
反向传播:
\begin{align}\notag\delta_j^l = \beta_j^{l+1}(f^\prime(u_j^l) \circ up(\delta_j^{l+1}))\end{align}
def backward(self, input_array, sensitivity_array): self.delta_array = np.zeros(input_array.shape) for d in range(self.channel_number): for i in range(int(self.output_height)): for j in range(int(self.output_width)): patch_array = get_patch(input_array[d], i, j, self.filter_width, self.filter_height, self.stride) k, l = get_max_index(patch_array) self.delta_array[d, i * self.stride + k, j * self.stride + l] = sensitivity_array[d, i, j]
相关推荐
demm 2020-09-18
sunxinyu 2020-09-17
walegahaha 2020-08-15
cherry0 2020-08-15
georgesale 2020-08-14
fengzhimohan 2020-07-23
wenxuegeng 2020-06-14
wenxuegeng 2020-06-08
cherry0 2020-06-06
hnyzyty 2020-06-05
hnyzyty 2020-06-03
wenxuegeng 2020-06-03
walegahaha 2020-06-03
cherry0 2020-06-03
zhaorui0 2020-06-01
kuankeTech 2020-06-01
hnyzyty 2020-05-12
georgesale 2020-05-10
hnyzyty 2020-05-08
hnyzyty 2020-05-05
walegahaha 2020-05-05
wenxuegeng 2020-05-04
玉来愈宏的随笔 2020-05-02
liqing 2020-04-19