TensorFlow简单卷积神经（CNN）网络实现

数据集

本次依然使用MNIST数据集来进行卷积神经网络训练。

MNIST是一个入门级的计算机视觉数据集，它包含各种手写数字图片：

之间用简单神经网络进行手写识别的博文：http://www.omegaxyz.com/2018/04/26/tensorflow_mnist_simple/

简单卷积神经网络的流程

第一层卷积操作

需要输入的参数值有卷积核尺寸大小，即patch、图像通道数，卷积核的数目，代表会出现多少个卷积特征图像。

对于每一个卷积核都有一个对应的偏置量。

图片乘以卷积核，并加上偏执量，卷积结果28*28*32。

池化结果14*14*32 卷积结果乘以池化卷积核。

第二层卷积操作

32通道卷积，卷积出64个特征。

原图像尺寸28*28，第一轮图像缩小为14*14，共有32张，第二轮后图像缩小为7*7，共有64张。

第三层全连接操作

二维张量，第一个参数7*7*64的patch，也可以认为是只有一行7*7*64个数据的卷积，第二个参数代表卷积个数共1024个。

卷积操作，结果是1*1*1024，单行乘以单列等于1*1矩阵，matmul实现最基本的矩阵相乘，不同于tf.nn.conv2d的遍历相乘，自动认为是前行向量后列向量。

第四层输出操作

二维张量，1*1024矩阵卷积，共10个卷积，对应我们开始的ys长度为10。

最后的分类，结果为1*1*10 ，Softmax和Sigmoid都是基于logistic分类算法，一个是多分类一个是二分类。

注意

dropout操作，减少过拟合，其实就是降低上一层某些输入的权重scale，甚至置为0，升高某些输入的权值，甚至置为2，防止评测曲线出现震荡。

使用占位符，由dropout自动确定scale，也可以自定义，比如0.5，根据TensorFlow文档可知，程序中真实使用的值为1/0.5=2，也就是某些输入乘以2，同时某些输入乘以0。

代码

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_DATA/', one_hot=True)

sess = tf.InteractiveSession()


def weight_variable(shape):
    # 正态分布，标准差为0.1，默认最大为1，最小为-1，均值为0
    initial = tf.truncated_normal(shape, stddev=0.1)
    weight = tf.Variable(initial_value=initial)
    return weight


# 定义bias
def bias_variable(shape):
    # 创建一个结构为shape矩阵也可以说是数组shape声明其行列，初始化所有值为0.1
    initial = tf.constant(0.1, shape=shape)
    bias = tf.Variable(initial_value=initial)
    return bias


# 卷积
def conv_op(in_tensor, kernel, strides=[1, 1, 1, 1], padding='SAME'):
    # 卷积遍历各方向步数为1，SAME：边缘外自动补0，遍历相乘
    conv_out = tf.nn.conv2d(in_tensor, kernel, strides=strides, padding=padding)
    return conv_out


# 池化
def max_pool_2x2(in_tensor, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME'):
    # 池化卷积结果（conv2d）池化层采用kernel大小为2*2，步数也为2，周围补0，取最大值。数据量缩小了4倍
    max_pool = tf.nn.max_pool(in_tensor, ksize, strides=strides, padding=padding)
    return max_pool


def simple_cnn():
    # 声明一个占位符，None表示输入图片的数量不定，28*28图片分辨率
    x = tf.placeholder(tf.float32, [None, 784])
    # 类别是0-9总共10个类别，对应输出分类结果
    y_ = tf.placeholder(tf.float32, [None, 10])
    # x_image又把xs reshape成了28*28*1的形状，因为是灰色图片，所以通道是1.作为训练时的input，-1代表图片数量不定
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # cnn structure
    # 第一二参数值得卷积核尺寸大小，即patch，第三个参数是图像通道数，第四个参数是卷积核的数目，代表会出现多少个卷积特征图像;
    w1 = [5, 5, 1, 32]
    b1 = [32]
    w2 = [5, 5, 32, 64]
    b2 = [64]
    wfc1 = [7*7*64, 1024]
    bfc1 = [1024]
    wfc2 = [1024, 10]
    bfc2 = [10]

    # 第一层卷积操作

    W_conv1 = weight_variable(w1)
    b_conv1 = bias_variable(b1)
    h_conv1 = tf.nn.relu(conv_op(x_image, W_conv1)+b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    # 第二层卷积操作
    W_conv2 = weight_variable(w2)
    b_conv2 = bias_variable(b2)
    h_conv2 = tf.nn.relu(conv_op(h_pool1, W_conv2)+b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    # 第三层全连接操作
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    # 二维张量，第一个参数7*7*64的patch，也可以认为是只有一行7*7*64个数据的卷积，第二个参数代表卷积个数共1024个
    W_fc1 = weight_variable(wfc1)
    b_fc1 = bias_variable(bfc1)
    # 卷积操作，结果是1 * 1 * 1024，单行乘以单列等于1 * 1矩阵，matmul实现最基本的矩阵相乘，不同于tf.nn.conv2d的遍历相乘，自动认为是前行向量后列向量
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)
    # drop out
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)
    # fc2
    W_fc2 = weight_variable(wfc2)
    b_fc2 = bias_variable(bfc2)
    # 最后的分类，结果为1*1*10 softmax和sigmoid都是基于logistic分类算法，一个是多分类一个是二分类
    y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2)+b_fc2)
    # loss function
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y_conv), reduction_indices=[1]))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    # 使用AdamOptimizer

    correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    accuarcy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    tf.global_variables_initializer().run()
    for i in range(400):
        batch = mnist.train.next_batch(50)
        if i % 100 == 0:
            train_accuracy = accuarcy.eval(feed_dict={x:batch[0],y_:batch[1],keep_prob:1.0}) # testify, or prediction
            print('step %d, training accuarcy %g'%(i, train_accuracy))
        train_step.run(feed_dict={x:batch[0],y_:batch[1],keep_prob:0.5})

    print("test accuracy %g"%accuarcy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels,keep_prob:1.0}))
    return

if __name__ == '__main__':
    simple_cnn()

100

101

102

103

104

import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_DATA/', one_hot=True)

sess = tf.InteractiveSession()

def weight_variable(shape):

# 正态分布，标准差为0.1，默认最大为1，最小为-1，均值为0

initial = tf.truncated_normal(shape, stddev=0.1)

weight = tf.Variable(initial_value=initial)

return weight

# 定义bias

def bias_variable(shape):

# 创建一个结构为shape矩阵也可以说是数组shape声明其行列，初始化所有值为0.1

initial = tf.constant(0.1, shape=shape)

bias = tf.Variable(initial_value=initial)

return bias

# 卷积

def conv_op(in_tensor, kernel, strides=[1, 1, 1, 1], padding='SAME'):

# 卷积遍历各方向步数为1，SAME：边缘外自动补0，遍历相乘

conv_out = tf.nn.conv2d(in_tensor, kernel, strides=strides, padding=padding)

return conv_out

# 池化

def max_pool_2x2(in_tensor, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME'):

# 池化卷积结果（conv2d）池化层采用kernel大小为2*2，步数也为2，周围补0，取最大值。数据量缩小了4倍

max_pool = tf.nn.max_pool(in_tensor, ksize, strides=strides, padding=padding)

return max_pool

def simple_cnn():

# 声明一个占位符，None表示输入图片的数量不定，28*28图片分辨率

x = tf.placeholder(tf.float32, [None, 784])

# 类别是0-9总共10个类别，对应输出分类结果

y_ = tf.placeholder(tf.float32, [None, 10])

# x_image又把xs reshape成了28*28*1的形状，因为是灰色图片，所以通道是1.作为训练时的input，-1代表图片数量不定

x_image = tf.reshape(x, [-1, 28, 28, 1])

# cnn structure

# 第一二参数值得卷积核尺寸大小，即patch，第三个参数是图像通道数，第四个参数是卷积核的数目，代表会出现多少个卷积特征图像;

w1 = [5, 5, 1, 32]

b1 = [32]

w2 = [5, 5, 32, 64]

b2 = [64]

wfc1 = [7*7*64, 1024]

bfc1 = [1024]

wfc2 = [1024, 10]

bfc2 = [10]

# 第一层卷积操作

W_conv1 = weight_variable(w1)

b_conv1 = bias_variable(b1)

h_conv1 = tf.nn.relu(conv_op(x_image, W_conv1)+b_conv1)

h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积操作

W_conv2 = weight_variable(w2)

b_conv2 = bias_variable(b2)

h_conv2 = tf.nn.relu(conv_op(h_pool1, W_conv2)+b_conv2)

h_pool2 = max_pool_2x2(h_conv2)

# 第三层全连接操作

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

# 二维张量，第一个参数7*7*64的patch，也可以认为是只有一行7*7*64个数据的卷积，第二个参数代表卷积个数共1024个

W_fc1 = weight_variable(wfc1)

b_fc1 = bias_variable(bfc1)

# 卷积操作，结果是1 * 1 * 1024，单行乘以单列等于1 * 1矩阵，matmul实现最基本的矩阵相乘，不同于tf.nn.conv2d的遍历相乘，自动认为是前行向量后列向量

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1)+b_fc1)

# drop out

keep_prob = tf.placeholder(tf.float32)

h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)

# fc2

W_fc2 = weight_variable(wfc2)

b_fc2 = bias_variable(bfc2)

# 最后的分类，结果为1*1*10 softmax和sigmoid都是基于logistic分类算法，一个是多分类一个是二分类

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2)+b_fc2)

# loss function

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y_conv), reduction_indices=[1]))

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# 使用AdamOptimizer

correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))

accuarcy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

tf.global_variables_initializer().run()

for i in range(400):

batch = mnist.train.next_batch(50)

if i % 100 == 0:

train_accuracy = accuarcy.eval(feed_dict={x:batch[0],y_:batch[1],keep_prob:1.0}) # testify, or prediction

print('step %d, training accuarcy %g'%(i, train_accuracy))

train_step.run(feed_dict={x:batch[0],y_:batch[1],keep_prob:0.5})

print("test accuracy %g"%accuarcy.eval(feed_dict={x:mnist.test.images,y_:mnist.test.labels,keep_prob:1.0}))

return

if __name__ == '__main__':

simple_cnn()

结果：

显然这个结果要比简单双层神经网络的效果要好。

TensorFlow简单卷积神经（CNN）网络实现

数据集

简单卷积神经网络的流程

第一层卷积操作

第二层卷积操作

第三层全连接操作

第四层输出操作

代码

大模型中的RepE表征工程

大模型也是一种优化器（LLM as Optimizer）

全栈开发与快速部署Demo

学术idea自动发现与生成

自回归语言模型（language model）Python实现

粉丝期待的三体电影宇宙（近四十部电影与电视剧集）

基于历史对比学习的时序知识图谱推理

泰拉瑞亚Terriaria快速部署Linux服务器

iPad生产力指南——编程

DeepScience：学术趋势预测与分析

留下评论取消回复

数据集

简单卷积神经网络的流程

第一层卷积操作

第二层卷积操作

第三层全连接操作

第四层输出操作

代码

相关文章

留下评论取消回复