TensorFlow 入门 ---- 手势识别-摩杜云开发者社区

1 - 导入TensorFlow库

import numpy as np
import h5py
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.framework import ops
import tf_utils
import time
%matplotlib inline
np.random.seed(1)

Exercise----1

=============================

y_hat = tf.constant(36, name='y_hat') #定义y_hat为固定值36
y = tf.constant(39, name='y')         #定义y为固定值39

loss = tf.Variable((y-y_hat)**2, name='loss')   #为损失函数创建一个变量
init = tf.global_variables_initializer()  #运行之后的初始化(ession.run(init)
sess = tf.Session()  #损失变量将被初始化并准备计算
sess.run(init)          #初始化变量
print(sess.run(loss))  #创建一个session并打印输出

占位符是一个对象，它的值只能在稍后指定，要指定占位符的值，可以使用一个feed字典（feed_dict变量）来传入，接下来，我们为x创建一个占位符，这将允许我们在稍后运行会话时传入一个数字。

x = tf.placeholder(tf.int64, name='x')
print(sess.run(2*x, feed_dict={x:3}))
sess.close()

1.1 - 线性函数

让我们通过计算以下等式来开始编程：Y=WX+b ,W和X是随机矩阵，b是随机向量。
我们计算WX+b，其中W，X和b是从随机正态分布中抽取的。 W的维度是（4,3），X是（3,1），b是（4,1）。我们开始定义一个shape=（3,1）的常量X：

def linear_function():
    """
    实现一个线性功能：
        初始化W，类型为tensor的随机变量，维度为(4,3)
        初始化X，类型为tensor的随机变量，维度为(3,1)
        初始化b，类型为tensor的随机变量，维度为(4,1)
    返回：
        result - 运行了session后的结果，运行的是Y = WX + b 

    """
    
    np.random.seed(1)
    
    X = np.random.randn(3,1)
    W = np.random.randn(4,3)
    b = np.random.randn(4,1)
    
    # Y = tf.add(tf.matmul(W,X)+b)
    Y = tf.matmul(W,X) + b
    sess = tf.Session()
    result = sess.run(Y)
    sess.close()  #session使用完毕，关闭它
    return result

print('result = ' + str(linear_function()))

result = [[-2.15657382]
 [ 2.95891446]
 [-1.08926781]
 [-0.84538042]]

1.2 - 计算sigmoid

def sigmoid(z):
    x = tf.placeholder(tf.float32, name='x')
    sigmoid = tf.sigmoid(x)
    with tf.Session() as sess:
        result = sess.run(sigmoid, feed_dict={x:z})
    return result

print ("sigmoid(12) = " + str(sigmoid(12)))
print ("sigmoid(0) = " + str(sigmoid(0)))

sigmoid(12) = 0.999994
sigmoid(0) = 0.5

1.3 - 计算成本

1.4 - 使用独热编码（0、1编码）

独热编码 ------> one_hot_coding

很多时候在深度学习中y向量的维度是从0到C−1的，C是指分类的类别数量，如果C=4，那么对y而言你可能需要有以下的转换方式:

def one_hot_matrix(lables, C):
    """
    创建一个矩阵，其中第i行对应第i个类号，第j列对应第j个训练样本
    所以如果第j个样本对应着第i个标签，那么entry (i,j)将会是1

    参数：
        lables - 标签向量
        C - 分类数

    返回：
        one_hot - 独热矩阵
    """
    C = tf.constant(C, name='C')
    one_hot_matrix = tf.one_hot(indices=lables, depth=C, axis=0) 
    # axis the direction of depth (0->row, 1->column)
    sess = tf.Session()
    one_hot = sess.run(one_hot_matrix)
    sess.close()
    return one_hot

lables = np.array([1, 2, 3, 0, 2, 1])
one_hot = one_hot_matrix(lables, 4)
print(str(one_hot))
print("------------------------------------")
lable2 = np.array([1,2,3,4,5,6,7,8,9])
two_hot = one_hot_matrix(lable2, 10)
print(str(two_hot))

[[ 0.  0.  0.  1.  0.  0.]
 [ 1.  0.  0.  0.  0.  1.]
 [ 0.  1.  0.  0.  1.  0.]
 [ 0.  0.  1.  0.  0.  0.]]
------------------------------------
[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.]]

1.5 - 初始化为0和1

现在我们将学习如何用0或者1初始化一个向量，我们要用到tf.ones()和tf.zeros()，给定这些函数一个维度值那么它们将会返回全是1或0的满足条件的向量/矩阵

def ones(shape):
    ones = tf.ones(shape)
    sess = tf.Session()
    ones = sess.run(ones)
    sess.close()
    return ones

def zeros(shape):
    ones = tf.zeros(shape)
    sess = tf.Session()
    ones = sess.run(ones)
    sess.close()
    return ones

print('ones = ' + str(ones([3,1])))
print('zeros= ' + str(zeros([4,1])))

ones = [[ 1.]
 [ 1.]
 [ 1.]]
zeros= [[ 0.]
 [ 0.]
 [ 0.]
 [ 0.]]

2 - 使用TensorFlow构建你的第一个神经网络

X_train_orig , Y_train_orig , X_test_orig , Y_test_orig , classes = tf_utils.load_dataset()

index = 111
plt.imshow(X_train_orig[index])
print('Y = ' + str(np.squeeze(Y_train_orig[:, index])))

Y = 2

TensorFlow 入门 ---- 手势识别_tensorflow

数字二

# X_train_orig.reshape(X_train_orig.shape[0], -1) # ? why is -1
#  anwerser : (number, -1) this mean number is the cow, shape/number is the column 

test_one = np.random.randn(4,5)
print(test_one.shape)
print(test_one.reshape(10, -1))

(4, 5)
[[ 0.58281521 -1.10061918]
 [ 1.14472371  0.90159072]
 [ 0.50249434  0.90085595]
 [-0.68372786 -0.12289023]
 [-0.93576943 -0.26788808]
 [ 0.53035547 -0.69166075]
 [-0.39675353 -0.6871727 ]
 [-0.84520564 -0.67124613]
 [-0.0126646  -1.11731035]
 [ 0.2344157   1.65980218]]

和往常一样，我们要对数据集进行扁平化，然后再除以255以归一化数据，除此之外，我们要需要把每个标签转化为独热向量，像上面的图一样。

X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0], -1).T #每一列就是一个样本
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0],-1).T
print(X_train_flatten.shape)
print(X_train_orig.shape)

#归一化数据
X_train = X_train_flatten /255
X_test = X_test_flatten/255

#转换为独热矩阵
Y_train = tf_utils.convert_to_one_hot(Y_train_orig, 6)
Y_test = tf_utils.convert_to_one_hot(Y_test_orig, 6)

print("训练集样本数 = " + str(X_train.shape[1]))
print("测试集样本数 = " + str(X_test.shape[1]))
print("X_train.shape: " + str(X_train.shape))
print("Y_train.shape: " + str(Y_train.shape))
print("X_test.shape: " + str(X_test.shape))
print("Y_test.shape: " + str(Y_test.shape))

(12288, 1080)
(1080, 64, 64, 3)
训练集样本数 = 1080
测试集样本数 = 120
X_train.shape: (12288, 1080)
Y_train.shape: (6, 1080)
X_test.shape: (12288, 120)
Y_test.shape: (6, 120)

我们的目标是构建能够高准确度识别符号的算法。要做到这一点，你要建立一个TensorFlow模型，这个模型几乎和你之前在猫识别中使用的numpy一样（但现在使用softmax输出）。要将您的numpy实现与tensorflow实现进行比较的话这是一个很好的机会。

目前的模型是：LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX，SIGMOID输出层已经转换为SOFTMAX。当有两个以上的类时，一个SOFTMAX层将SIGMOID一般化。

2.1 - 创建placeholders

def create_placeholders(n_x, n_y):
    """
    为TensorFlow会话创建占位符
    参数：
        n_x - 一个实数，图片向量的大小（64*64*3 = 12288）
        n_y - 一个实数，分类数（从0到5，所以n_y = 6）

    返回：
        X - 一个数据输入的占位符，维度为[n_x, None]，dtype = "float"
        Y - 一个对应输入的标签的占位符，维度为[n_Y,None]，dtype = "float"

    提示：
        使用None，因为它让我们可以灵活处理占位符提供的样本数量。事实上，测试/训练期间的样本数量是不同的。

    """
    X = tf.placeholder(tf.float32, [n_x, None], name='X')
    Y = tf.placeholder(tf.float32, [n_y, None], name='Y')
    return X, Y

X, Y = create_placeholders(12288,6)
print('X = ' + str(X))
print('Y = ' + str(Y))

X = Tensor("X_2:0", shape=(12288, ?), dtype=float32)
Y = Tensor("Y_2:0", shape=(6, ?), dtype=float32)

2.2 - 初始化参数

初始化tensorflow中的参数，我们将使用Xavier初始化权重和用零来初始化偏差

def initialize_parameters():
    tf.set_random_seed(1)
    W1 = tf.get_variable('W1', [25, 12288], initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable("b1",[25,1],initializer=tf.zeros_initializer())
    W2 = tf.get_variable("W2", [12, 25], initializer = tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable("b2", [12, 1], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", [6, 12], initializer = tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable("b3", [6, 1], initializer = tf.zeros_initializer())
    
    parameters = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
        'W3': W3,
        'b3': b3
    }
    return parameters

tf.reset_default_graph() #用于清除默认图形堆栈并重置全局默认图形。
with tf.Session() as sess:
    parameters = initialize_parameters()
    print("W1 = " + str(parameters["W1"]))
    print("b1 = " + str(parameters["b1"]))
    print("W2 = " + str(parameters["W2"]))
    print("b2 = " + str(parameters["b2"]))

W1 = <tf.Variable 'W1:0' shape=(25, 12288) dtype=float32_ref>
b1 = <tf.Variable 'b1:0' shape=(25, 1) dtype=float32_ref>
W2 = <tf.Variable 'W2:0' shape=(12, 25) dtype=float32_ref>
b2 = <tf.Variable 'b2:0' shape=(12, 1) dtype=float32_ref>

2.3 - 前向传播

我们将要在TensorFlow中实现前向传播，该函数将接受一个字典参数并完成前向传播，它会用到以下代码：

1. tf.add(…) ：加法
2. tf.matmul(… , …) ：矩阵乘法
3. tf.nn.relu(…) ：Relu激活函数

def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    Z1 = tf.add(tf.matmul(W1, X), b1)
    A1 = tf.nn.relu(Z1)
    Z2 = tf.add(tf.matmul(W2, A1), b2)
    A2 = tf.nn.relu(Z2)
    Z3 = tf.add(tf.matmul(W3, A2), b3)
    
    return Z3

tf.reset_default_graph()
with tf.Session() as sess:
    X,Y = create_placeholders(12288, 6)
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    print('Z3 = ' + str(Z3))

Z3 = Tensor("Add_2:0", shape=(6, ?), dtype=float32)

2.4 - 计算成本

def compute_cost(Z3,Y):
    logits = tf.transpose(Z3) #转置
    labels = tf.transpose(Y)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels))
    return cost

tf.reset_default_graph()
with tf.Session() as sess:
    X,Y = create_placeholders(12288,6)
    parameters = initialize_parameters()
    Z3 = forward_propagation(X, parameters)
    cost = compute_cost(Z3,Y)
    print('cost =' +str(cost))

cost =Tensor("Mean:0", shape=(), dtype=float32)

2.5 - 反向传播&更新参数

得益于编程框架，所有反向传播和参数更新都在1行代码中处理。计算成本函数后，将创建一个“optimizer”对象。运行tf.session时，必须将此对象与成本函数一起调用，当被调用时，它将使用所选择的方法和学习速率对给定成本进行优化。

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)

(n_x, m) = X_train.shape
print(n_x)
print(m)

12288
1080

2.6 - 构建模型

def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001,
          num_epochs=1500,minibatch_size=32,print_cost=True, is_plot=True):
    """
    实现一个三层的TensorFlow神经网络：LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX

    参数：
        X_train - 训练集，维度为（输入大小（输入节点数量） = 12288, 样本数量 = 1080）
        Y_train - 训练集分类数量，维度为（输出大小(输出节点数量) = 6, 样本数量 = 1080）
        X_test - 测试集，维度为（输入大小（输入节点数量） = 12288, 样本数量 = 120）
        Y_test - 测试集分类数量，维度为（输出大小(输出节点数量) = 6, 样本数量 = 120）
        learning_rate - 学习速率
        num_epochs - 整个训练集的遍历次数
        mini_batch_size - 每个小批量数据集的大小
        print_cost - 是否打印成本，每100代打印一次
        is_plot - 是否绘制曲线图

    返回：
        parameters - 学习后的参数

    """
    ops.reset_default_graph() #能够重新运行模型而不覆盖tf变量
    tf.set_random_seed(1)
    seed = 3
    (n_x, m) = X_train.shape  #获取输入节点数量和样本数
    n_y = Y_train.shape[0]
    costs = []                #成本集
    
    
    #给X和Y创建placeholder
    X,Y = create_placeholders(n_x, n_y)
    
    #初始化参数
    parameters = initialize_parameters()
    
    #前向传播
    Z3 = forward_propagation(X, parameters)
    
    #计算成本
    cost = compute_cost(Z3,Y)
    
    #反向传播，使用Adam优化
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    #初始化所有的变量
    init = tf.global_variables_initializer()
    
    #开始会话并计算
    with tf.Session() as sess:
        #初始化
        sess.run(init)
        
        #正常训练的循环
        for epoch in range(num_epochs):
            epoch_cost = 0        #每代的成本
            num_minibatches = int(m / minibatch_size)  #minibatch的总数量
            seed = seed +1
            minibatches = tf_utils.random_mini_batches(X_train, Y_train)
            
            for minibatch in minibatches:
                
                #选择一个minibatch
                (minibatch_X, minibatch_Y) = minibatch
                
                #数据已经准备好了，开始运行session
                _, minibatch_cost = sess.run([optimizer, cost], feed_dict={X:minibatch_X,Y:minibatch_Y})
                
                #计算这个minibatch在这一代中所占的误差
                epoch_cost = epoch_cost + minibatch_cost / num_minibatches
            
            #记录并打印成本
            ## 记录成本
            if epoch % 5 == 0:
                costs.append(epoch_cost)
                if print_cost and epoch % 100 ==0:
                    print("epoch = " + str(epoch) + "    epoch_cost = " + str(epoch_cost))
         
        #是否绘制图谱
        if is_plot:
            plt.plot(np.squeeze(costs))
            plt.ylabel('cost')
            plt.xlabel('iterations (per tens)')
            plt.title("Learning rate =" + str(learning_rate))
            plt.show()
            
        parameters = sess.run(parameters)
        print('参数已经保存到session。')
        #计算当前的预测结果
        correct_prediction = tf.equal(tf.argmax(Z3),tf.argmax(Y))
         #计算准确率
        accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))
        
        print("训练集的准确率：", accuracy