1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
| import torch import matplotlib.pyplot as plt from lr_utils import load_dataset
def __init__(dim): ''' initialization dim - dimension w - (dim, 1) b - is initialized to zero tensor ''' w = torch.zeros((dim,1), requires_grad = True) b = torch.zeros(1, requires_grad = True) assert w.shape == (dim, 1), "The dimension of w is incorrect, it should be (dim, 1)" assert b.shape == (1,), "Parameter b is incorrect, it should be (1,)"
return (w, b)
def propagate_torch(w, b, X, Y): ''' The cost functions and gradients of forward and backward propagation parameter: w - weight,(num_px * num_px * 3, 1) b - deviation, a scalar X - the matrix type is (num_px * num_px * 3, training quantity) Y - the true label vector(1 or 0) with a matrix dimension of (1, number of training data) return: cost - Negative log likelihood cost of cost logistic regression dw - loss gradient relative to w db - loss gradient relative to b ''' Z = w.T @ X + b A = torch.sigmoid(Z) m = X.shape[1] cost = (-1 / m) * torch.sum(Y * torch.log(A) + (1 - Y) * torch.log(1 - A)) return cost
def optimize(w, b, X, Y, iter, lr, print_cost = False): costs = [] for i in range(iter): w.grad = None b.grad = None cost = propagate_torch(w = w, b = b, X = X, Y = Y) cost.backward() with torch.no_grad(): w -= lr * w.grad b -= lr * b.grad if i % 100 == 0: costs.append(cost) if (print_cost) and (i % 100 == 0): print(f"number of iterations: {i}, error value: {cost}%") params = { "w" : w, "b" : b }
return (params, costs)
def predict(w, b, X): ''' Using logistic regression parameters logistic(w, b) to predict whether a label is 0 or 1 parameters: w - weights, (num_px * num_px * 3, 1) b - deviation, a scalar X - (num_px * num_px * 3, training quantity)
return: Y_prediction - a torch tensor containing all predictions [0 | 1] for all pictures in X '''
m = X.shape[1] Y_prediction = torch.zeros((1, m)) w = w.reshape(X.shape[0], 1)
A = torch.sigmoid(w.T @ X + b) for i in range(A.shape[1]): Y_prediction[0, i] = 1 if A[0, i] > 0.5 else 0 assert Y_prediction.shape == (1, m) return Y_prediction
def cat_model(X_train, Y_train, X_test, Y_test, iter = 2000, lr = 0.5, print_cost = False): ''' Model for identifying cats
parameters: X_train - (num_px * num_px * 3, m_train) Y_train - (1, m_train), label X_test - (num_px * num_px * 3, m_test) Y_test - (1, m_test) label iter - Hyperparameters of the number of iterations used to optimize for parameters lr - learning rate print_cost - if true, print the cost every 100 iterations
return: d - dictionary containing information about the model '''
w, b = __init__(X_train.shape[0])
params, costs = optimize(w = w, b = b, X = X_train, Y = Y_train, iter = iter, lr = lr, print_cost = print_cost)
w, b = params["w"], params["b"]
Y_prediction_test = predict(w, b, X_test) Y_prediction_train = predict(w, b, X_train)
print(f"Training set accuracy: {100 - torch.mean(torch.abs(Y_prediction_train - Y_train)) * 100}%") print(f"Test set accuracy: {100 - torch.mean(torch.abs(Y_prediction_test - Y_test)) * 100}%")
d = { "costs" : costs, "Y_prediction_test" : Y_prediction_test, "Y_prediction_train" : Y_prediction_train, "w" : w, "b" : b, "lr" : lr, "iter" : iter } return d
if __name__ == "__main__": train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() ''' train_set_x_orig - (m_train, num_px, num_px, 3) m_train - number of pictures in the training set m_test - number of pictures in the testing set num_px - the width and height of the pictures in the training and testing sets (both 64 * 64) '''
d = cat_model( X_train = train_set_x_orig, Y_train = train_set_y, X_test = test_set_x_orig, Y_test = test_set_y, print_cost = True, lr = 0.01 )
costs = [c.detach().item() for c in d["costs"]] plt.plot(costs) plt.xlabel("cost") plt.ylabel("iterations (every hundreds)") plt.title(f"Learning rate = {d["lr"]}") plt.show()
|