最近在学吴恩达深度学习,但是碰到一些问题,想请教大神。我学到了第一课第四周,我的程序是我参照吴老师的程序自己写了一遍,但是我遇到了问题。在双层网络时,程序结果和吴老师的一模一样,但是L层网络时,我程序代价函数的值收敛在0.64处,检查了两天都没有发现错误在哪里,所以想在这里问问,希望大家能告诉我在哪里错了。非常感谢
原版资源可以在下面这个链接找到
https://github.com/robbertliu/deeplearning.ai-andrewNG
以下是我的python程序:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_app_utils_v2 import *
def Sigmoid(z):
    return 1 / (1 + np.exp(- z))
def ReLU(z):
    return np.maximum(0, z)
def SigmoidBackward(z):
    a = 1 / (1 + np.exp(- z))
    dz = a * (1 - a)
    return dz
def ReLUBackward(z):
    dz = np.ones(z.shape)
    dz[z < 0] = 0
    return dz
def InitializeParameters(dims):
    np.random.seed(1)
    parameters = {}
    L = len(dims) - 1
    for l in range(1, L + 1):
        parameters['W' + str(l)] = np.random.randn(dims[l], dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((dims[l], 1))
    return parameters
def LinearForward(A_pre,W,b):
    Z = W @ A_pre + b
    return Z
def ActivationForward(A_pre,W,b,activation):
    Z = LinearForward(A_pre, W, b)
    if activation == 'sigmoid':
        A = Sigmoid(Z)
    elif activation == 'relu':
        A = ReLU(Z)
    return Z,A
def ForwardPropagation(X,parameters):
    cache = {}
    cache['Z0'], cache['A0'] = np.zeros(X.shape), X
    L = len(parameters) // 2
    for l in range(1,L+1):
        if l == L:
            cache['Z' + str(l)],cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)],activation = 'sigmoid')
        else:
            cache['Z' + str(l)], cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')
    #print(cache.keys())
    return cache
def ComputeCost(Y, cache):
    m = Y.shape[1]
    L = len(cache) // 2 - 1
    J = - Y @ np.log(cache['A' + str(L)].T) - (1 - Y) @ np.log(1 - cache['A' + str(L)].T)
    return J / m
def LinearBackward(A_pre,dZ,W,b):
    m = A_pre.shape[1]
    dW = dZ @ A_pre.T / m
    db = np.sum(dZ, axis = 1, keepdims = True) / m
    dA_pre = W.T @ dZ
    return dA_pre,dW,db
def ActivationBackward(Z,A_pre,dA,W,b,activation):
    if activation == 'sigmoid':
        dZ = dA * SigmoidBackward(Z)
    elif activation == 'relu':
        dZ = dA * ReLUBackward(Z)
    dA_pre, dW, db = LinearBackward(A_pre, dZ, W, b)
    return dZ,dW,db,dA_pre
def BackwardPropagation(Y,cache,parameters):
    L = len(parameters) // 2
    m = Y.shape[1]
    grads = {}
    grads['dA' + str(L)] = - Y / cache['A' + str(L)] + (1 - Y) / (1 - cache['A' + str(L)])
    for l in range(L,0,-1):
        if l == L:
            grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
                                                                                                        A_pre = cache['A' + str(l - 1)],
                                                                                                              dA = grads['dA' + str(l)],
                                                                                                                            W = parameters['W' + str(l)],
                                                                                                                            b = parameters['b' + str(l)],activation = 'sigmoid')
        else:
            grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
                                                                                                                            A_pre = cache['A' + str(l - 1)],
                                                                                                                            dA = grads['dA' + str(l)],
                                                                                                                            W = parameters['W' + str(l)],
                                                                                                                            b = parameters['b' + str(l)],activation='relu')
    grads['dZ0'] = np.array([0]).reshape(1,1)
    grads['dW0'] = np.array([0]).reshape(1,1)
    grads['db0'] = np.array([0]).reshape(1,1)
    #print(grads.keys())
    return grads
def UpdateParameters(parameters,grads,learning_rate):
    L = len(parameters) // 2
    for l in range(1,L+1):
        parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)]
    return parameters
def Model(X,Y,dims,learning_rate,iteration_num):
    parameters = InitializeParameters(dims)
    costs = []
    for i in range(iteration_num):
        cache = ForwardPropagation(X, parameters)
        if i % 100 == 0:
            costs.append(np.squeeze(ComputeCost(Y, cache)))
            print('time:',i,'cost:',costs[len(costs) - 1])
        grads = BackwardPropagation(Y, cache, parameters)
        parameters = UpdateParameters(parameters, grads, learning_rate)
    return parameters,costs
def Prediction(parameters,X,Y):
    L = len(parameters) // 2
    cache = ForwardPropagation(X, parameters)
    A = cache['A' + str(L)]
    p = np.zeros(Y.shape)
    m = A.shape[1]
    for i in range(m):
        if A[0,i] > 0.5:
            p[0,i] = 1
        else:
            p[0,i] = 0
    print("Accuracy: " + str(np.sum((p == Y) / m)))
    return p
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
test_x = test_x_flatten / 255
n_x = train_x.shape[0]
n_y = train_y.shape[0]
dims = (n_x,20,7,5,n_y)
parameters,costs = Model(train_x, train_y, dims, learning_rate = 0.0075,iteration_num = 2500)
print('train set:')
Prediction(parameters,train_x,train_y)
print('test set:')
Prediction(parameters,test_x,test_y)
plt.plot(list(range(25)),costs)
plt.show()2个回答
def ReLUBackward(z):  
    dz = np.ones(z.shape)  
    dz[z < 0] = 0  
    return dz dz = np.ones(z.shape)这里dZ应该是dA
SofaSofa数据科学社区DS面试题库 DS面经
    
  相关讨论
  随便看看