Python通过TensorFlow卷积神经网络实现猫狗识别_Python

这份数据集来源于Kaggle，数据集有12500只猫和12500只狗。在这里简单介绍下整体思路

处理数据
设计神经网络
进行训练测试

1. 数据处理

将图片数据处理为 tf 能够识别的数据格式，并将数据设计批次。

第一步get_files() 方法读取图片，然后根据图片名，添加猫狗 label，然后再将 image和label 放到数组中，打乱顺序返回
将第一步处理好的图片和label 数组转化为 tensorflow 能够识别的格式，然后将图片裁剪和补充进行标准化处理，分批次返回。

新建数据处理文件，文件名 input_data.py

				?

									import tensorflow as tf

									import os 

									import numpy as np

									def get_files(file_dir):

									 cats = []

									 label_cats = []

									 dogs = []

									 label_dogs = []

									 for file in os.listdir(file_dir):

									 name = file.split(sep='.')

									 if 'cat' in name[0]:

									 cats.append(file_dir + file)

									 label_cats.append(0)

									 else:

									 if 'dog' in name[0]:

									 dogs.append(file_dir + file)

									 label_dogs.append(1)

									 image_list = np.hstack((cats,dogs))

									 label_list = np.hstack((label_cats,label_dogs))

									 # print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs)))

									 # 多个种类分别的时候需要把多个种类放在一起，打乱顺序,这里不需要

									 # 把标签和图片都放倒一个 temp 中 然后打乱顺序，然后取出来

									 temp = np.array([image_list,label_list])

									 temp = temp.transpose()

									 # 打乱顺序

									 np.random.shuffle(temp)

									 # 取出第一个元素作为 image 第二个元素作为 label

									 image_list = list(temp[:,0])

									 label_list = list(temp[:,1])

									 label_list = [int(i) for i in label_list] 

									 return image_list,label_list

									# 测试 get_files

									# imgs , label = get_files('/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/testImg/')

									# for i in imgs:

									# print("img:",i)

									# for i in label:

									# print('label:',i)

									# 测试 get_files end

									# image_W ,image_H 指定图片大小，batch_size 每批读取的个数 ，capacity队列中 最多容纳元素的个数

									def get_batch(image,label,image_W,image_H,batch_size,capacity):

									 # 转换数据为 ts 能识别的格式

									 image = tf.cast(image,tf.string)

									 label = tf.cast(label, tf.int32)

									 # 将image 和 label 放倒队列里 

									 input_queue = tf.train.slice_input_producer([image,label])

									 label = input_queue[1]

									 # 读取图片的全部信息

									 image_contents = tf.read_file(input_queue[0])

									 # 把图片解码，channels ＝3 为彩色图片, r，g ，b 黑白图片为 1 ，也可以理解为图片的厚度

									 image = tf.image.decode_jpeg(image_contents,channels =3)

									 # 将图片以图片中心进行裁剪或者扩充为 指定的image_W，image_H

									 image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)

									 # 对数据进行标准化,标准化，就是减去它的均值，除以他的方差

									 image = tf.image.per_image_standardization(image)

									 # 生成批次 num_threads 有多少个线程根据电脑配置设置 capacity 队列中 最多容纳图片的个数 tf.train.shuffle_batch 打乱顺序，

									 image_batch, label_batch = tf.train.batch([image, label],batch_size = batch_size, num_threads = 64, capacity = capacity)

									 # 重新定义下 label_batch 的形状

									 label_batch = tf.reshape(label_batch , [batch_size])

									 # 转化图片

									 image_batch = tf.cast(image_batch,tf.float32)

									 return image_batch, label_batch

									# test get_batch

									# import matplotlib.pyplot as plt

									# BATCH_SIZE = 2

									# CAPACITY = 256 

									# IMG_W = 208

									# IMG_H = 208

									# train_dir = '/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/testImg/'

									# image_list, label_list = get_files(train_dir)

									# image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

									# with tf.Session() as sess:

									# i = 0

									# # Coordinator 和 start_queue_runners 监控 queue 的状态，不停的入队出队

									# coord = tf.train.Coordinator()

									# threads = tf.train.start_queue_runners(coord=coord)

									# # coord.should_stop() 返回 true 时也就是 数据读完了应该调用 coord.request_stop()

									# try: 

									#  while not coord.should_stop() and i<1:

									#   # 测试一个步

									#   img, label = sess.run([image_batch, label_batch])

									#   for j in np.arange(BATCH_SIZE):

									#    print('label: %d' %label[j])

									#    # 因为是个4D 的数据所以第一个为 索引 其他的为冒号就行了

									#    plt.imshow(img[j,:,:,:])

									#    plt.show()

									#   i+=1

									# # 队列中没有数据

									# except tf.errors.OutOfRangeError:

									#  print('done!')

									# finally:

									#  coord.request_stop()

									# coord.join(threads)

									 # sess.close()

2. 设计神经网络

利用卷积神经网路处理，网络结构为

				?

									# conv1 卷积层 1

									# pooling1_lrn 池化层 1

									# conv2 卷积层 2

									# pooling2_lrn 池化层 2

									# local3 全连接层 1

									# local4 全连接层 2

									# softmax 全连接层 3

新建神经网络文件，文件名 model.py

				?

									#coding=utf-8 

									import tensorflow as tf 

									def inference(images, batch_size, n_classes): 

									 with tf.variable_scope('conv1') as scope: 

									  # 卷积盒的为 3*3 的卷积盒，图片厚度是3，输出是16个featuremap

									  weights = tf.get_variable('weights', 

									         shape=[3, 3, 3, 16], 

									         dtype=tf.float32, 

									         initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) 

									  biases = tf.get_variable('biases', 

									         shape=[16], 

									         dtype=tf.float32, 

									         initializer=tf.constant_initializer(0.1)) 

									  conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME') 

									  pre_activation = tf.nn.bias_add(conv, biases) 

									  conv1 = tf.nn.relu(pre_activation, name=scope.name) 

									 with tf.variable_scope('pooling1_lrn') as scope: 

									   pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1') 

									   norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') 

									 with tf.variable_scope('conv2') as scope: 

									    weights = tf.get_variable('weights', 

									           shape=[3, 3, 16, 16], 

									           dtype=tf.float32, 

									           initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)) 

									    biases = tf.get_variable('biases', 

									           shape=[16], 

									           dtype=tf.float32, 

									           initializer=tf.constant_initializer(0.1)) 

									    conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME') 

									    pre_activation = tf.nn.bias_add(conv, biases) 

									    conv2 = tf.nn.relu(pre_activation, name='conv2') 

									 # pool2 and norm2 

									 with tf.variable_scope('pooling2_lrn') as scope: 

									  norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') 

									  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2') 

									 with tf.variable_scope('local3') as scope: 

									  reshape = tf.reshape(pool2, shape=[batch_size, -1]) 

									  dim = reshape.get_shape()[1].value 

									  weights = tf.get_variable('weights', 

									         shape=[dim, 128], 

									         dtype=tf.float32, 

									         initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) 

									  biases = tf.get_variable('biases', 

									         shape=[128], 

									         dtype=tf.float32, 

									         initializer=tf.constant_initializer(0.1)) 

									 local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) 

									 # local4 

									 with tf.variable_scope('local4') as scope: 

									  weights = tf.get_variable('weights', 

									         shape=[128, 128], 

									         dtype=tf.float32, 

									         initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) 

									  biases = tf.get_variable('biases', 

									         shape=[128], 

									         dtype=tf.float32, 

									         initializer=tf.constant_initializer(0.1)) 

									  local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4') 

									 # softmax 

									 with tf.variable_scope('softmax_linear') as scope: 

									  weights = tf.get_variable('softmax_linear', 

									         shape=[128, n_classes], 

									         dtype=tf.float32, 

									         initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32)) 

									  biases = tf.get_variable('biases', 

									         shape=[n_classes], 

									         dtype=tf.float32, 

									         initializer=tf.constant_initializer(0.1)) 

									  softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear') 

									 return softmax_linear 

									def losses(logits, labels): 

									 with tf.variable_scope('loss') as scope: 

									  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \

									      (logits=logits, labels=labels, name='xentropy_per_example') 

									  loss = tf.reduce_mean(cross_entropy, name='loss') 

									  tf.summary.scalar(scope.name + '/loss', loss) 

									 return loss 

									def trainning(loss, learning_rate): 

									 with tf.name_scope('optimizer'): 

									  optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate) 

									  global_step = tf.Variable(0, name='global_step', trainable=False) 

									  train_op = optimizer.minimize(loss, global_step= global_step) 

									 return train_op 

									def evaluation(logits, labels): 

									 with tf.variable_scope('accuracy') as scope: 

									  correct = tf.nn.in_top_k(logits, labels, 1) 

									  correct = tf.cast(correct, tf.float16) 

									  accuracy = tf.reduce_mean(correct) 

									  tf.summary.scalar(scope.name + '/accuracy', accuracy) 

									 return accuracy

3. 训练数据，并将训练的模型存储

				?

									import os 

									import numpy as np 

									import tensorflow as tf 

									import input_data  

									import model 

									N_CLASSES = 2 # 2个输出神经元，［1，0］ 或者 ［0，1］猫和狗的概率

									IMG_W = 208 # 重新定义图片的大小，图片如果过大则训练比较慢 

									IMG_H = 208

									BATCH_SIZE = 32 #每批数据的大小

									CAPACITY = 256

									MAX_STEP = 15000 # 训练的步数，应当 >= 10000

									learning_rate = 0.0001 # 学习率，建议刚开始的 learning_rate <= 0.0001

									def run_training(): 

									 # 数据集

									 train_dir = '/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/img/' #My dir--20170727-csq 

									 #logs_train_dir 存放训练模型的过程的数据，在tensorboard 中查看 

									 logs_train_dir = '/Users/yangyibo/GitWork/pythonLean/AI/猫狗识别/saveNet/'

									 # 获取图片和标签集

									 train, train_label = input_data.get_files(train_dir) 

									 # 生成批次

									 train_batch, train_label_batch = input_data.get_batch(train, 

									               train_label, 

									               IMG_W, 

									               IMG_H, 

									               BATCH_SIZE, 

									               CAPACITY)

									 # 进入模型

									 train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) 

									 # 获取 loss 

									 train_loss = model.losses(train_logits, train_label_batch)

									 # 训练 

									 train_op = model.trainning(train_loss, learning_rate)

									 # 获取准确率 

									 train__acc = model.evaluation(train_logits, train_label_batch) 

									 # 合并 summary

									 summary_op = tf.summary.merge_all() 

									 sess = tf.Session()

									 # 保存summary

									 train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) 

									 saver = tf.train.Saver() 

									 sess.run(tf.global_variables_initializer()) 

									 coord = tf.train.Coordinator() 

									 threads = tf.train.start_queue_runners(sess=sess, coord=coord) 

									 try: 

									  for step in np.arange(MAX_STEP): 

									   if coord.should_stop(): 

									     break

									   _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) 

									   if step % 50 == 0: 

									    print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0)) 

									    summary_str = sess.run(summary_op) 

									    train_writer.add_summary(summary_str, step) 

									   if step % 2000 == 0 or (step + 1) == MAX_STEP: 

									    # 每隔2000步保存一下模型，模型保存在 checkpoint_path 中

									    checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') 

									    saver.save(sess, checkpoint_path, global_step=step) 

									 except tf.errors.OutOfRangeError: 

									  print('Done training -- epoch limit reached') 

									 finally: 

									  coord.request_stop()

									 coord.join(threads) 

									 sess.close() 

									# train

									run_training()