本文主要介绍各类典型卷积神经网络的代码实现。

LeNet-雏形网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tensorflow as tf  
import input_data

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

sess = tf.InteractiveSession()

# 训练数据
x = tf.placeholder("float", shape=[None, 784])
# 训练标签数据
y_ = tf.placeholder("float", shape=[None, 10])
# 把x更改为4维张量,第1维代表样本数量,第2维和第3维代表图像长宽, 第4维代表图像通道数, 1表示黑白
x_image = tf.reshape(x, [-1, 28, 28, 1])

# 第一层:卷积层
# 过滤器大小为5*5, 当前层深度为1, 过滤器的深度为32
conv1_weights = tf.get_variable("conv1_weights", [5, 5, 1, 32], initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("conv1_biases", [32], initializer=tf.constant_initializer(0.0))
# 移动步长为1, 使用全0填充
conv1 = tf.nn.conv2d(x_image, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
# 激活函数Relu去线性化
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

#第二层:最大池化层
#池化层过滤器的大小为2*2, 移动步长为2,使用全0填充
pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#第三层:卷积层
conv2_weights = tf.get_variable("conv2_weights", [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1)) #过滤器大小为5*5, 当前层深度为32, 过滤器的深度为64
conv2_biases = tf.get_variable("conv2_biases", [64], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') #移动步长为1, 使用全0填充
relu2 = tf.nn.relu( tf.nn.bias_add(conv2, conv2_biases) )

#第四层:最大池化层
#池化层过滤器的大小为2*2, 移动步长为2,使用全0填充
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#第五层:全连接层
fc1_weights = tf.get_variable("fc1_weights", [7 * 7 * 64, 1024], initializer=tf.truncated_normal_initializer(stddev=0.1)) #7*7*64=3136把前一层的输出变成特征向量
fc1_baises = tf.get_variable("fc1_baises", [1024], initializer=tf.constant_initializer(0.1))
pool2_vector = tf.reshape(pool2, [-1, 7 * 7 * 64])
fc1 = tf.nn.relu(tf.matmul(pool2_vector, fc1_weights) + fc1_baises)

#为了减少过拟合,加入Dropout层
keep_prob = tf.placeholder(tf.float32)
fc1_dropout = tf.nn.dropout(fc1, keep_prob)

#第六层:全连接层
fc2_weights = tf.get_variable("fc2_weights", [1024, 10], initializer=tf.truncated_normal_initializer(stddev=0.1)) #神经元节点数1024, 分类节点10
fc2_biases = tf.get_variable("fc2_biases", [10], initializer=tf.constant_initializer(0.1))
fc2 = tf.matmul(fc1_dropout, fc2_weights) + fc2_biases

#第七层:输出层
# softmax
y_conv = tf.nn.softmax(fc2)

#定义交叉熵损失函数
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))

#选择优化器,并让优化器最小化损失函数/收敛, 反向传播
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# tf.argmax()返回的是某一维度上其数据最大所在的索引值,在这里即代表预测值和真实值
# 判断预测值y和真实值y_中最大数的索引是否一致,y的值为1-10概率
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))

# 用平均值来统计测试准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#开始训练
sess.run(tf.global_variables_initializer())
for i in range(10000):
batch = mnist.train.next_batch(100)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0}) #评估阶段不使用Dropout
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) #训练阶段使用50%的Dropout


#在测试数据上测试准确率
print("test accuracy %g" % accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

AlexLet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
第一层:卷积层

该层的输入是原始图像的像素值,以MNIST数据集为例,则是28x28x1,第一层过滤器尺寸为5x5,深度设置为6,不适用0去填充,因此该层的输出尺寸是28-5+1=24,深度也为6.

第二层:池化层

接受第一层的输出作为输入,过滤器大小选为2x2,步长2.

第三层:卷积层

卷积和大小5x5,深度为16,同样不使用0填充,步长为1.

第四层:池化层

卷积核采用2x2,步长2

第五层:全连接

卷积核为5x5,输出节点为120

第六层:全连接层

输入节点数120,输出节点数84

第七层:全连接层

输入84,输出10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

import argparse
import sys

import input_data
import tensorflow as tf

mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

# 定义网络超参数
learning_rate = 1e-4
training_iters = 300000
batch_size = 64
display_step = 20

# 定义网络参数
n_input = 784 # 输入的维度
n_classes = 10 # 标签的维度
dropout = 0.5 # Dropout 的概率

# 占位符输入
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# 卷积操作
def conv2d(name, l_input, w, b, k):
return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(l_input,
w, strides=[1, k, k, 1],
padding='SAME'), b), name=name)

# 最大下采样操作
def max_pool(name, l_input, k1, k2):
return tf.nn.max_pool(l_input, ksize=[1, k1, k1, 1], strides=[1, k2, k2, 1], padding='SAME', name=name)

# 归一化操作
def norm(name, l_input, lsize=4):
return tf.nn.lrn(l_input, lsize, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name=name)

# 定义整个网络
def alex_net(_X, _weights, _biases, _dropout):
# 向量转为矩阵
_X = tf.reshape(_X, shape=[-1, 28, 28, 1])

# 卷积层
conv1 = conv2d('conv1', _X, _weights['wc1'], _biases['bc1'], 2)

# 归一化层
norm1 = norm('norm1', conv1, lsize=4)
# 下采样层
pool1 = max_pool('pool1', norm1, k1=3, k2=2)
# Dropout
norm1 = tf.nn.dropout(pool1, _dropout)

# 卷积
conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2'], 1)
# 归一化
norm2 = norm('norm2', conv2, lsize=4)
# 下采样
pool2 = max_pool('pool2', norm2, k1=3, k2=2)
# Dropout
norm2 = tf.nn.dropout(pool2, _dropout)

# 卷积
conv3 = conv2d('conv3', norm2, _weights['wc3'], _biases['bc3'], 1)
# 归一化384
norm3 = norm('norm3', conv3, lsize=4)
# 下采样
# pool3 = max_pool('pool3', norm3, k=2)
# Dropoutize of tensor shape you provided is 150528 : 224x224x
norm3 = tf.nn.dropout(norm3, _dropout)
'''
# 卷积
conv4 = conv2d('conv4', norm3, _weights['wc4'], _biases['bc4'], 1)
# 归一化
norm4 = norm('norm4', conv4, lsize=4)
# 下采样
# pool3 = max_pool('pool3', norm3, k=2)
# Dropout
norm4 = tf.nn.dropout(norm4, _dropout)

# 卷积
conv5 = conv2d('conv5', norm4, _weights['wc5'], _biases['bc5'], 1)
# 归一化256
norm5 = norm('norm5', conv5, lsize=4)
# 下采样
pool5 = max_pool('pool5', norm5, k1=3, k2=2)
# Dropout
norm5 = tf.nn.dropout(pool5, _dropout)
'''
# 全连接层,先把特征图转为向量
dense1 = tf.reshape(norm3, [-1, _weights['wd1'].get_shape().as_list()[0]])
dense1 = tf.nn.dropout(tf.nn.relu(tf.matmul(dense1, _weights['wd1']) + _biases['bd1'], name='fc1'), _dropout)
# 全连接层4096
dense2 = tf.nn.relu(tf.matmul(dense1, _weights['wd2']) + _biases['bd2'], name='fc2') # Relu activation

# 网络输出层384
out = tf.matmul(dense2, _weights['out']) + _biases['out']
return out

# 存储所有的网络参数48
'''
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
'wd2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 10]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([64])),
'bc2': tf.Variable(tf.random_normal([128])),
'bc3': tf.Variable(tf.random_normal([256])),
'bd1': tf.Variable(tf.random_normal([1024])),
'bd2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
'''
# 以字典的形式设置权重和偏置
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
'wd2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 10]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([64])),
'bc2': tf.Variable(tf.random_normal([128])),
'bc3': tf.Variable(tf.random_normal([256])),
'bd1': tf.Variable(tf.random_normal([1024])),
'bd2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}

# 构建模型
pred = alex_net(x, weights, biases, keep_prob)

# 定义损失函数和学习步骤
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost)

# 测试网络
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# 初始化所有的共享变量
init = tf.initialize_all_variables()

# 开启一个训练
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# 获取批数据
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
if step % display_step == 0:
# 计算精度
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
# 计算损失值
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) +
", Training Accuracy = " + "{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# 计算测试精度
print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 0.5}))
print('**********************')
print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 1.0}))

SPPNet-空间金字塔

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import pandas as pd

def spp_layer(input_, levels=4, name = 'SPP_layer',pool_type = 'max_pool'):

'''
Multiple Level SPP layer.

Works for levels=[1, 2, 3, 6].
'''

shape = input_.get_shape().as_list()

with tf.variable_scope(name):

for l in range(levels):
#设置池化参数
l = l + 1
ksize = [1, np.ceil(shape[1]/ l + 1).astype(np.int32), np.ceil(shape[2] / l + 1).astype(np.int32), 1]
strides = [1, np.floor(shape[1] / l + 1).astype(np.int32), np.floor(shape[2] / l + 1).astype(np.int32), 1]

if pool_type == 'max_pool':
pool = tf.nn.max_pool(input_, ksize=ksize, strides=strides, padding='SAME')
pool = tf.reshape(pool,(shape[0],-1),)

else :
pool = tf.nn.avg_pool(input_, ksize=ksize, strides=strides, padding='SAME')
pool = tf.reshape(pool,(shape[0],-1))
print("Pool Level {:}: shape {:}".format(l, pool.get_shape().as_list()))
if l == 1
x_flatten = tf.reshape(pool,(shape[0],-1))
else:
x_flatten = tf.concat((x_flatten,pool),axis=1) #四种尺度进行拼接
print("Pool Level {:}: shape {:}".format(l, x_flatten.get_shape().as_list()))
# pool_outputs.append(tf.reshape(pool, [tf.shape(pool)[1], -1]))

return x_flatten

VGGNet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

from datetime import datetime
import tensorflow as tf
import math
import time

batch_size = 32
num_batches = 100

# 用来创建卷积层并把本层的参数存入参数列表
# input_op:输入的tensor name:该层的名称 kh:卷积层的高 kw:卷积层的宽 n_out:输出通道数,dh:步长的高 dw:步长的宽,p是参数列表
def conv_op(input_op,name,kh,kw,n_out,dh,dw,p):
# 输入的通道数
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name) as scope:
kernel = tf.get_variable(scope + "w",shape=[kh,kw,n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
conv = tf.nn.conv2d(input_op, kernel, (1,dh,dw,1),padding='SAME')
bias_init_val = tf.constant(0.0, shape=[n_out],dtype=tf.float32)
biases = tf.Variable(bias_init_val , trainable=True , name='b')
z = tf.nn.bias_add(conv,biases)
activation = tf.nn.relu(z,name=scope)
p += [kernel,biases]
return activation

# 定义全连接层
def fc_op(input_op,name,n_out,p):
n_in = input_op.get_shape()[-1].value
with tf.name_scope(name) as scope:
kernel = tf.get_variable(scope+'w',shape=[n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name='b')
# tf.nn.relu_layer()用来对输入变量input_op与kernel做乘法并且加上偏置b
activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope)
p += [kernel,biases]
return activation

# 定义最大池化层
def mpool_op(input_op,name,kh,kw,dh,dw):
return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding='SAME',name=name)

#定义网络结构
def inference_op(input_op,keep_prob):
p = []
conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
pool1 = mpool_op(conv1_2,name='pool1',kh=2,kw=2,dw=2,dh=2)

conv2_1 = conv_op(pool1,name='conv2_1',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
conv2_2 = conv_op(conv2_1,name='conv2_2',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p)
pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2, dw=2, dh=2)

conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2, dw=2, dh=2)

conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dw=2, dh=2)

conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dw=2, dh=2)

shp = pool5.get_shape()
flattened_shape = shp[1].value * shp[2].value * shp[3].value
resh1 = tf.reshape(pool5,[-1,flattened_shape],name="resh1")

fc6 = fc_op(resh1,name="fc6",n_out=4096,p=p)
fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop')
fc7 = fc_op(fc6_drop,name="fc7",n_out=4096,p=p)
fc7_drop = tf.nn.dropout(fc7,keep_prob,name="fc7_drop")
fc8 = fc_op(fc7_drop,name="fc8",n_out=1000,p=p)
softmax = tf.nn.softmax(fc8)
predictions = tf.argmax(softmax,1)
return predictions,softmax,fc8,p

def time_tensorflow_run(session,target,feed,info_string):
num_steps_burn_in = 10 # 预热轮数
total_duration = 0.0 # 总时间
total_duration_squared = 0.0 # 总时间的平方和用以计算方差
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target,feed_dict=feed)
duration = time.time() - start_time
if i >= num_steps_burn_in: # 只考虑预热轮数之后的时间
if not i % 10:
print('%s:step %d,duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / num_batches # 平均每个batch的时间
vr = total_duration_squared / num_batches - mn * mn # 方差
sd = math.sqrt(vr) # 标准差
print('%s: %s across %d steps, %.3f +/- %.3f sec/batch' % (datetime.now(), info_string, num_batches, mn, sd))

def run_benchmark():
with tf.Graph().as_default():
image_size = 224 # 输入图像尺寸
images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1))
keep_prob = tf.placeholder(tf.float32)
prediction,softmax,fc8,p = inference_op(images,keep_prob)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
time_tensorflow_run(sess, prediction,{keep_prob:1.0}, "Forward")
# 用以模拟训练的过程
objective = tf.nn.l2_loss(fc8) # 给一个loss
grad = tf.gradients(objective, p) # 相对于loss的 所有模型参数的梯度
time_tensorflow_run(sess, grad, {keep_prob:0.5},"Forward-backward")

GoogLeNet

这里实现的是Inception V3卷积网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470

# -*- coding:utf-8 -*-
import tensorflow as tf
from datetime import datetime
import time
import math

slim=tf.contrib.slim
#产生截断的正态分布
trunc_normal =lambda stddev:tf.truncated_normal_initializer(0.0,stddev)
parameters =[] #储存参数


#why?为什么要定义这个函数?
#因为若事先定义好slim.conv2d各种默认参数,包括激活函数、标准化器,后面定义卷积层将会非常容易:
# 1.代码整体美观
# 2.网络设计的工作量会大大减轻

def inception_v3_arg_scope(weight_decay=0.00004,
stddev=0.1,
batch_norm_var_collection='moving_vars'):
"""
#定义inception_v3_arg_scope(),
#用来生成网络中经常用到的函数的默认参数(卷积的激活函数、权重初始化方式、标准化器等)
:param weight_decay: 权值衰减系数
:param stddev: 标准差
:param batch_norm_var_collection:
:return:
"""
batch_norm_params={
'decay':0.9997, #衰减系数decay
'epsilon':0.001, #极小值
'updates_collections':tf.GraphKeys.UPDATE_OPS,
'variables_collections':{
'beta':None,
'gamma':None,
'moving_mean':[batch_norm_var_collection],
'moving_variance':[batch_norm_var_collection],
}

}

with slim.arg_scope([slim.conv2d,slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
"""
slim.arg_scope()是一个非常有用的工具,可以给函数的参数自动赋予某些默认值

例如:
slim.arg_scope([slim.conv2d,slim.fully_connected],weights_regularizer=slim.l2_regularizer(weight_decay)):
会对[slim.conv2d,slim.fully_connected]这两个函数的参数自动赋值,
将参数weights_regularizer的默认值设为slim.l2_regularizer(weight_decay)

备注:使用了slim.arg_scope后就不需要每次重复设置参数,只需在修改时设置即可。
"""
# 设置默认值:对slim.conv2d函数的几个参数赋予默认值
with slim.arg_scope(
[slim.conv2d],
weights_initializer=tf.truncated_normal_initializer(stddev=stddev), #权重初始化
activation_fn=tf.nn.relu, #激励函数
normalizer_fn=slim.batch_norm, #标准化器
normalizer_params=batch_norm_params ) as sc: #normalizer_params标准化器的参数

return sc #返回定义好的scope


def inception_V3_base(input,scope=None):

end_points= {}
# 第一部分--基础部分:卷积和池化交错
with tf.variable_scope(scope,'inception_V3',[input]):
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,padding='VALID'):
net1=slim.conv2d(input,32,[3,3],stride=2,scope='conv2d_1a_3x3')
net2 = slim.conv2d(net1, 32, [3, 3],scope='conv2d_2a_3x3')
net3 = slim.conv2d(net2, 64, [3, 3], padding='SAME',
scope='conv2d_2b_3x3')
net4=slim.max_pool2d(net3,[3,3],stride=2,scope='maxPool_3a_3x3')
net5 = slim.conv2d(net4, 80, [1, 1], scope='conv2d_4a_3x3')
net6 = slim.conv2d(net5, 192, [3, 3], padding='SAME',
scope='conv2d_4b_3x3')
net = slim.max_pool2d(net6, [3, 3], stride=2, scope='maxPool_5a_3x3')

#第二部分--Inception模块组:inception_1\inception_2\inception_2
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
stride=1,padding='SAME'):
#inception_1:第一个模块组(共含3个inception_module)
#inception_1_m1: 第一组的1号module
with tf.variable_scope('inception_1_m1'):
with tf.variable_scope('Branch_0'):
branch_0=slim.conv2d(net,64,[1,1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 32, [1, 1],
scope='conv2d_3b_1x1')
#使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net=tf.concat([branch_0,branch1_2,branch2_3,branch3_2],3)

# inception_1_m2: 第一组的 2号module
with tf.variable_scope('inception_1_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 64, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3, branch3_2], 3)

# inception_1_m2: 第一组的 3号module
with tf.variable_scope('inception_1_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 48, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 64, [5, 5],
scope='conv2d_1b_5x5')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 96, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = slim.conv2d(branch2_2, 96, [3, 3],
scope='conv2d_2c_3x3')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 64, [1, 1],
scope='conv2d_3b_1x1')
# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3, branch3_2], 3)

#inception_2:第2个模块组(共含5个inception_module)
# inception_2_m1: 第2组的 1号module
with tf.variable_scope('inception_2_m1'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 384, [3, 3],stride=2,
padding='VALID',scope='conv2d_0a_3x3')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 96, [3, 3],
scope='conv2d_1b_3x3')
branch1_3 = slim.conv2d(branch1_2, 96, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_1c_3x3')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.max_pool2d(net, [3, 3],
stride=2,
padding='VALID',
scope='maxPool_2a_3x3')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_1], 3)

# inception_2_m2: 第2组的 2号module
with tf.variable_scope('inception_2_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 128, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 128, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 128, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 128, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 128, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 128, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 128, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 128, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)


# inception_2_m3: 第2组的 3号module
with tf.variable_scope('inception_2_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)

# inception_2_m4: 第2组的 4号module
with tf.variable_scope('inception_2_m4'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)

# inception_2_m5: 第2组的 5号module
with tf.variable_scope('inception_2_m5'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 160, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 160, [7, 1],
scope='conv2d_2b_7x1')
branch2_3 = slim.conv2d(branch2_2, 160, [1, 7],
scope='conv2d_2c_1x7')
branch2_4 = slim.conv2d(branch2_3, 160, [7, 1],
scope='conv2d_2d_7x1')
branch2_5 = slim.conv2d(branch2_4, 192, [1, 7],
scope='conv2d_2e_1x7')
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_3, branch2_5,branch3_2], 3)
#将inception_2_m5存储到end_points中,作为Auxiliary Classifier辅助模型的分类
end_points['inception_2_m5']=net

# 第3组
# inception_3_m1: 第3组的 1号module
with tf.variable_scope('inception_3_m1'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 192, [1, 1],scope='conv2d_0a_1x1')
branch_0 = slim.conv2d(branch_0,320, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_0b_3x3')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 192, [1, 1], scope='conv2d_1a_1x1')
branch1_2 = slim.conv2d(branch1_1, 192, [1, 7],
scope='conv2d_1b_1x7')
branch1_3 = slim.conv2d(branch1_2, 192, [7, 1],
scope='conv2d_1c_7x1')
branch1_4 = slim.conv2d(branch1_3, 192, [3, 3],
stride=2,
padding='VALID',
scope='conv2d_1c_3x3')
with tf.variable_scope('Branch_2'):
branch2_1 = slim.max_pool2d(net, [3, 3],
stride=2,
padding='VALID',
scope='maxPool_3a_3x3')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_4, branch2_1], 3)

# inception_3_m2: 第3组的 2号module
with tf.variable_scope('inception_3_m2'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_1a_1x1')
#特殊
branch1_2 = tf.concat([
slim.conv2d(branch1_1, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch1_1, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 488, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 384, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = tf.concat([
slim.conv2d(branch2_2, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch2_2, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3,branch3_2], 3)

# inception_3_m3: 第3组的 3号module
with tf.variable_scope('inception_3_m3'):
with tf.variable_scope('Branch_0'):
branch_0 = slim.conv2d(net, 320, [1, 1],scope='conv2d_0a_1x1')
with tf.variable_scope('Branch_1'):
branch1_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_1a_1x1')
#特殊
branch1_2 = tf.concat([
slim.conv2d(branch1_1, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch1_1, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_2'):
branch2_1 = slim.conv2d(net, 488, [1, 1], scope='conv2d_2a_1x1')
branch2_2 = slim.conv2d(branch2_1, 384, [3, 3],
scope='conv2d_2b_3x3')
branch2_3 = tf.concat([
slim.conv2d(branch2_2, 384, [1, 3], scope='conv2d_1a_1x3'),
slim.conv2d(branch2_2, 384, [3, 1], scope='conv2d_1a_3x1')
], 3)
with tf.variable_scope('Branch_3'):
branch3_1 = slim.avg_pool2d(net, [3, 3], scope='avgPool_3a_3x3')
branch3_2 = slim.conv2d(branch3_1, 192, [1, 1],
scope='conv2d_3b_1x1')

# 使用concat将4个分支的输出合并到一起(在第三个维度合并,即输出通道上合并)
net = tf.concat([branch_0, branch1_2, branch2_3,branch3_2], 3)

return net,end_points
############################## 卷积部分完成 ########################################


#第三部分:全局平均池化、softmax、Auxiliary Logits
def inception_v3(input,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.8,
prediction_fn=slim.softmax,
spatial_squeeze=True,
reuse=None,
scope='inceptionV3'):
with tf.variable_scope(scope,'inceptionV3',[input,num_classes],
reuse=reuse) as scope:
with slim.arg_scope([slim.batch_norm,slim.dropout],
is_training=is_training):
net,end_points=inception_V3_base(input,scope=scope)

#Auxiliary Logits
with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],
stride=1,padding='SAME'):
aux_logits=end_points['inception_2_m5']
with tf.variable_scope('Auxiliary_Logits'):
aux_logits=slim.avg_pool2d(
aux_logits,[5,5],stride=3,padding='VALID',
scope='AvgPool_1a_5x5' )
aux_logits=slim.conv2d(aux_logits,128,[1,1],
scope='conv2d_1b_1x1')
aux_logits=slim.conv2d(aux_logits,768,[5,5],
weights_initializer=trunc_normal(0.01),
padding='VALID',
scope='conv2d_2a_5x5')
aux_logits = slim.conv2d(aux_logits, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
weights_initializer=trunc_normal(0.001),
scope='conv2d_2b_1x1')
if spatial_squeeze:
aux_logits =tf.squeeze(aux_logits,[1,2],name='SpatialSqueeze')
end_points['Auxiliary_Logits']=aux_logits

with tf.variable_scope('Logits'):
net=slim.avg_pool2d(net,[8,8],padding='VALID',
scope='avgPool_1a_8x8')
net=slim.dropout(net,keep_prob=dropout_keep_prob,
scope='dropout_1b')
end_points['PreLogits']=net
logits=slim.conv2d(net,num_classes,[1,1],activation_fn=None,
normalizer_fn=None,
scope='conv2d_1c_1x1')
if spatial_squeeze:
logits=tf.squeeze(logits,[1,2],name='SpatialSqueeze')
end_points['Logits']=logits
end_points['Predictions']=prediction_fn(logits,scope='Predictions')

return logits,end_points

########################### 构建完成


def time_compute(session, target, info_string):
num_batch = 100 #100
num_step_burn_in = 10 # 预热轮数,头几轮迭代有显存加载、cache命中等问题可以因此跳过
total_duration = 0.0 # 总时间
total_duration_squared = 0.0
for i in range(num_batch + num_step_burn_in):
start_time = time.time()
_ = session.run(target )
duration = time.time() - start_time
if i >= num_step_burn_in:
if i % 10 == 0: # 每迭代10次显示一次duration
print("%s: step %d,duration=%.5f " % (datetime.now(), i - num_step_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
time_mean = total_duration / num_batch
time_variance = total_duration_squared / num_batch - time_mean * time_mean
time_stddev = math.sqrt(time_variance)
# 迭代完成,输出
print("%s: %s across %d steps,%.3f +/- %.3f sec per batch " %
(datetime.now(), info_string, num_batch, time_mean, time_stddev))


def main():
with tf.Graph().as_default():
batch_size=32
height,weight=299,299
input=tf.random_uniform( (batch_size,height,weight,3) )
with slim.arg_scope(inception_v3_arg_scope()):
logits,end_points=inception_v3(input,is_training=False)

init=tf.global_variables_initializer()
sess=tf.Session()
# 将网络结构图写到文件中
writer = tf.summary.FileWriter('logs/', sess.graph)
sess.run(init)
num_batches=100
time_compute(sess,logits,'Forward')

if __name__=='__main__':
main()

ResNet-残差网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import config
import random
import numpy as np
import tensorflow as tf
from config import resnet_config
from data_loader import DataLoader
from eval.evaluate import accuracy


class ResNet(object):
def __init__(self,
depth=resnet_config.depth,
height=config.height,
width=config.width,
channel=config.channel,
num_classes=config.num_classes,
learning_rate=resnet_config.learning_rate,
learning_decay_rate=resnet_config.learning_decay_rate,
learning_decay_steps=resnet_config.learning_decay_steps,
epoch=resnet_config.epoch,
batch_size=resnet_config.batch_size,
model_path=resnet_config.model_path,
summary_path=resnet_config.summary_path):
"""

:param depth:
"""
self.depth = depth
self.height = height
self.width = width
self.channel = channel
self.learning_rate = learning_rate
self.learning_decay_rate = learning_decay_rate
self.learning_decay_steps = learning_decay_steps
self.epoch = epoch
self.batch_size = batch_size
self.num_classes = num_classes
self.model_path = model_path
self.summary_path = summary_path
self.num_block_dict = {18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3]}
self.bottleneck_dict = {18: False,
34: False,
50: True,
101: True}
self.filter_out = [64, 128, 256, 512]
self.filter_out_last_layer = [256, 512, 1024, 2048]
self.conv_out_depth = self.filter_out[-1] if self.depth < 50 else self.filter_out_last_layer[-1]
assert self.depth in self.num_block_dict, 'depth should be in [18,34,50,101]'
self.num_block = self.num_block_dict[self.depth]
self.bottleneck = self.bottleneck_dict[self.depth]
self.input_x = tf.placeholder(tf.float32, shape=[None, self.height, self.width, self.channel], name='input_x')
self.input_y = tf.placeholder(tf.float32, shape=[None, self.num_classes], name='input_y')
self.prediction = None
self.loss = None
self.acc = None
self.global_step = None
self.data_loader = DataLoader()
self.model()

def model(self):
# first convolution layers
x = self.conv(x=self.input_x, k_size=7, filters_out=64, strides=2, activation=True, name='First_Conv')
x = tf.layers.max_pooling2d(x, pool_size=[3, 3], strides=2, padding='same', name='max_pool')
x = self.stack_block(x)
x = tf.layers.average_pooling2d(x, pool_size=x.get_shape()[1:3], strides=1, name='average_pool')
x = tf.reshape(x, [-1, 1 * 1 * self.conv_out_depth])
fc_W = tf.truncated_normal_initializer(stddev=0.1)
logits = tf.layers.dense(inputs=x, units=self.num_classes,kernel_initializer=fc_W)

# 预测值
self.prediction = tf.argmax(logits,axis=-1)
# 计算准确率
self.acc = accuracy(logits, self.input_y)
# 损失值
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.input_y))
# 全局步数
self.global_step = tf.train.get_or_create_global_step()
# 递减学习率
learning_rate = tf.train.exponential_decay(learning_rate=self.learning_rate,
global_step=self.global_step,
decay_rate=self.learning_decay_rate,
decay_steps=self.learning_decay_steps,
staircase=True)
self.optimize = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)

def stack_block(self, input_x):
for stack in range(4):
stack_strides = 1 if stack == 0 else 2
stack_name = 'stack_%s' % stack
with tf.name_scope(stack_name):
for block in range(self.num_block[stack]):
shortcut = input_x
block_strides = stack_strides if block == 0 else 1
block_name = stack_name + '_block_%s' % block
with tf.name_scope(block_name):
if self.bottleneck:
for layer in range(3):
with tf.name_scope(block_name + '_layer_%s' % layer):
filters = self.filter_out[stack] if layer < 2 else self.filter_out_last_layer[stack]
k_size = 3 if layer == 1 else 1
layer_strides = block_strides if layer < 1 else 1
activation = True if layer < 2 else False
layer_name = block_name + '_conv_%s' % layer
input_x = self.conv(x=input_x, filters_out=filters, k_size=k_size,
strides=layer_strides, activation=activation, name=layer_name)
else:
for layer in range(2):
with tf.name_scope(block_name + '_layer_%s' % layer):
filters = self.filter_out[stack]
k_size = 3
layer_strides = block_strides if layer < 1 else 1
activation = True if layer < 1 else False
layer_name = block_name + '_conv_%s' % layer
input_x = self.conv(x=input_x, filters_out=filters, k_size=k_size,
strides=layer_strides, activation=activation, name=layer_name)
shortcut_depth = shortcut.get_shape()[-1]
input_x_depth = input_x.get_shape()[-1]
with tf.name_scope('shortcut_connect'):
if shortcut_depth != input_x_depth:
connect_k_size = 1
connect_strides = block_strides
connect_filter = filters
shortcut_name = block_name + '_shortcut'
shortcut = self.conv(x=shortcut, filters_out=connect_filter, k_size=connect_k_size,
strides=connect_strides, activation=False, name=shortcut_name)
input_x = tf.nn.relu(shortcut + input_x)

return input_x

def conv(self, x, k_size, filters_out, strides, activation, name):
x = tf.layers.conv2d(x, filters=filters_out, kernel_size=k_size, strides=strides, padding='same', name=name)
x = tf.layers.batch_normalization(x, name=name + '_BN')
if activation:
x = tf.nn.relu(x)
return x

def fit(self, train_id_list, valid_img, valid_label):
"""
training model
:return:
"""
# 模型存储路径初始化
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
if not os.path.exists(self.summary_path):
os.makedirs(self.summary_path)

# train_steps初始化
train_steps = 0
best_valid_acc = 0.0

# summary初始化
tf.summary.scalar('loss', self.loss)
merged = tf.summary.merge_all()

# session初始化
sess = tf.Session()
writer = tf.summary.FileWriter(self.summary_path, sess.graph)
saver = tf.train.Saver(max_to_keep=10)
sess.run(tf.global_variables_initializer())
for epoch in range(self.epoch):
shuffle_id_list = random.sample(train_id_list.tolist(), len(train_id_list))
batch_num = int(np.ceil(len(shuffle_id_list) / self.batch_size))
train_id_batch = np.array_split(shuffle_id_list, batch_num)
for i in range(batch_num):
this_batch = train_id_batch[i]
batch_img, batch_label = self.data_loader.get_batch_data(this_batch)
train_steps += 1
feed_dict = {self.input_x: batch_img, self.input_y: batch_label}
_, train_loss, train_acc = sess.run([self.optimize, self.loss, self.acc], feed_dict=feed_dict)
if train_steps % 1 == 0:
val_loss, val_acc = sess.run([self.loss, self.acc],
feed_dict={self.input_x: valid_img, self.input_y: valid_label})
msg = 'epoch:%s | steps:%s | train_loss:%.4f | val_loss:%.4f | train_acc:%.4f | val_acc:%.4f' % (
epoch, train_steps, train_loss, val_loss, train_acc, val_acc)
print(msg)
summary = sess.run(merged, feed_dict={self.input_x: valid_img, self.input_y: valid_label})
writer.add_summary(summary, global_step=train_steps)
if val_acc >= best_valid_acc:
best_valid_acc = val_acc
saver.save(sess, save_path=self.model_path, global_step=train_steps)

sess.close()

def predict(self, x):
"""
predicting
:param x:
:return:
"""
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
ckpt = tf.train.get_checkpoint_state(self.model_path)
saver.restore(sess, ckpt.model_checkpoint_path)

prediction = sess.run(self.prediction, feed_dict={self.input_x: x})
return prediction

DenseNet-密连网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import tensorflow as tf
import tensorflow.contrib.slim as slim

def conv_layer(input, filters,kernel_size,stride=1, layer_name="conv"):
with tf.name_scope(layer_name):
net = slim.conv2d(input, filters, kernel_size, scope=layer_name)
return net

class DenseNet():
def __init__(self,x,nb_blocks, filters, sess):
self.nb_blocks = nb_blocks
self.filters = filters
self.model = self.build_model(x)
self.sess = sess

def bottleneck_layer(self,x, scope):
# [BN --> ReLU --> conv11 --> BN --> ReLU -->conv33]
with tf.name_scope(scope):
x = slim.batch_norm(x)
x = tf.nn.relu(x)
x = conv_layer(x,self.filters,kernel_size=(1,1), layer_name=scope+'_conv1')
x = slim.batch_norm(x)
x = tf.nn.relu(x)
x = conv_layer(x,self.filters,kernel_size=(3,3), layer_name=scope+'_conv2')
return x
def transition_layer(self,x, scope):
# [BN --> conv11 --> avg_pool2]
with tf.name_scope(scope):
x = slim.batch_norm(x)
x = conv_layer(x,self.filters,kernel_size=(1,1), layer_name=scope+'_conv1')
x = slim.avg_pool2d(x,2)
return x

def dense_block(self,input_x, nb_layers, layer_name):
with tf.name_scope(layer_name):
layers_concat = []
layers_concat.append(input_x)
x = self.bottleneck_layer(input_x,layer_name +'_bottleN_'+str(0))
layers_concat.append(x)
for i in xrange(nb_layers):
x = tf.concat(layers_concat,axis=3)
x = self.bottleneck_layer(x,layer_name+'_bottleN_'+str(i+1))
layers_concat.append(x)
return x


def build_model(self,input_x):
x = conv_layer(input_x,self.filters,kernel_size=(7,7), layer_name='conv0')
x = slim.max_pool2d(x,(3,3))
for i in xrange(self.nb_blocks):
print(i)
x = self.dense_block(x,4, 'dense_'+str(i))
x = self.transition_layer(x,'trans_'+str(i))
return x