1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
| import struct import numpy as np import gzip try: from simple_ml_ext import * except: pass
def add(x, y): """ A trivial 'add' function you should implement to get used to the autograder and submission system. The solution to this problem is in the the homework notebook.
Args: x (Python number or numpy array) y (Python number or numpy array)
Return: Sum of x + y """ return x+y
def parse_mnist(image_filename, label_filename): """ Read an images and labels file in MNIST format. See this page: http://yann.lecun.com/exdb/mnist/ for a description of the file format.
Args: image_filename (str): name of gzipped images file in MNIST format label_filename (str): name of gzipped labels file in MNIST format
Returns: Tuple (X,y): X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded data. The dimensionality of the data should be (num_examples x input_dim) where 'input_dim' is the full dimension of the data, e.g., since MNIST images are 28x28, it will be 784. Values should be of type np.float32, and the data should be normalized to have a minimum value of 0.0 and a maximum value of 1.0 (i.e., scale original values of 0 to 0.0 and 255 to 1.0).
y (numpy.ndarray[dtype=np.uint8]): 1D numpy array containing the labels of the examples. Values should be of type np.uint8 and for MNIST will contain the values 0-9. """ with gzip.open(image_filename,'rb') as f: f.seek(4) len = int.from_bytes(f.read(4),byteorder="big",signed=True) f.seek(16) image = f.read() X = np.frombuffer(image,dtype = np.uint8).astype(np.float32) X = X.reshape(len,28*28)/255 with gzip.open(label_filename,'rb') as f1: f1.seek(4) lenth = int.from_bytes(f1.read(4),byteorder="big",signed=True) f1.seek(8) labels = f1.read() y = np.frombuffer(labels,dtype=np.uint8)
return X,y
def softmax_loss(Z, y): """ Return softmax loss. Note that for the purposes of this assignment, you don't need to worry about "nicely" scaling the numerical properties of the log-sum-exp computation, but can just compute this directly.
Args: Z (np.ndarray[np.float32]): 2D numpy array of shape (batch_size, num_classes), containing the logit predictions for each class. y (np.ndarray[np.uint8]): 1D numpy array of shape (batch_size, ) containing the true label of each example.
Returns: Average softmax loss over the sample. """ Z_exp=np.exp(Z) Z_exp_sum = np.sum(Z_exp,axis=1) Z_exp_sum_log = np.log(Z_exp_sum) rows = np.arange(Z.shape[0]) Z_y = Z[rows,y] return np.mean(Z_exp_sum_log-Z_y)
def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100): """ Run a single epoch of SGD for softmax regression on the data, using the step size lr and specified batch size. This function should modify the theta matrix in place, and you should iterate through batches in X _without_ randomizing the order.
Args: X (np.ndarray[np.float32]): 2D input array of size (num_examples x input_dim). y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,) theta (np.ndarrray[np.float32]): 2D array of softmax regression parameters, of shape (input_dim, num_classes) lr (float): step size (learning rate) for SGD batch (int): size of SGD minibatch
Returns: None """ for i in range (0,X.shape[0],batch): end = min(i+batch,X.shape[0]) Z = X[i:end,:] @ theta Z_exp = np.exp(Z) Z_norm = Z_exp/(np.sum(Z_exp,axis=1)[:,np.newaxis]) I_y = np.zeros((end-i,theta.shape[1]),dtype= int) for j in range (end-i): I_y[j,y[i+j]]=1 delta_batch = X[i:end,:].T delta_batch_temp = (Z_norm-I_y)
print(delta_batch_temp) delta_batch = delta_batch @ delta_batch_temp delta = delta_batch/batch theta[:] =theta - delta*lr
def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100): """ Run a single epoch of SGD for a two-layer neural network defined by the weights W1 and W2 (with no bias terms): logits = ReLU(X * W1) * W2 The function should use the step size lr, and the specified batch size (and again, without randomizing the order of X). It should modify the W1 and W2 matrices in place.
Args: X (np.ndarray[np.float32]): 2D input array of size (num_examples x input_dim). y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,) W1 (np.ndarray[np.float32]): 2D array of first layer weights, of shape (input_dim, hidden_dim) W2 (np.ndarray[np.float32]): 2D array of second layer weights, of shape (hidden_dim, num_classes) lr (float): step size (learning rate) for SGD batch (int): size of SGD minibatch
Returns: None """ for i in range (0,X.shape[0],batch): end = min(i+batch,X.shape[0]) X_batch = X[i:end,:] y_batch = y[i:end] I_y = np.zeros((len(y_batch),W2.shape[1]),dtype= int) for j in range (len(y_batch)): I_y[j,y_batch[j]]=1 Z_1 = np.maximum(X_batch @ W1,0) G_2 = np.exp(Z_1 @ W2)/(np.sum(np.exp(Z_1 @ W2),axis = 1)[:,np.newaxis])-I_y G_1 = (Z_1>0).astype(int) * (G_2 @ W2.T) delta_W_1 = X_batch.T @ G_1/(end-i) delta_W_2 = Z_1.T @ G_2/(end-i) W1 -= lr*delta_W_1 W2 -= lr*delta_W_2
def loss_err(h,y): """ Helper funciton to compute both loss and error""" return softmax_loss(h,y), np.mean(h.argmax(axis=1) != y)
def train_softmax(X_tr, y_tr, X_te, y_te, epochs=10, lr=0.5, batch=100, cpp=False): """ Example function to fully train a softmax regression classifier """ theta = np.zeros((X_tr.shape[1], y_tr.max()+1), dtype=np.float32) print("| Epoch | Train Loss | Train Err | Test Loss | Test Err |") for epoch in range(epochs): if not cpp: softmax_regression_epoch(X_tr, y_tr, theta, lr=lr, batch=batch) else: softmax_regression_epoch_cpp(X_tr, y_tr, theta, lr=lr, batch=batch) train_loss, train_err = loss_err(X_tr @ theta, y_tr) test_loss, test_err = loss_err(X_te @ theta, y_te) print("| {:>4} | {:.5f} | {:.5f} | {:.5f} | {:.5f} |"\ .format(epoch, train_loss, train_err, test_loss, test_err))
def train_nn(X_tr, y_tr, X_te, y_te, hidden_dim = 500, epochs=10, lr=0.5, batch=100): """ Example function to train two layer neural network """ n, k = X_tr.shape[1], y_tr.max() + 1 np.random.seed(0) W1 = np.random.randn(n, hidden_dim).astype(np.float32) / np.sqrt(hidden_dim) W2 = np.random.randn(hidden_dim, k).astype(np.float32) / np.sqrt(k)
print("| Epoch | Train Loss | Train Err | Test Loss | Test Err |") for epoch in range(epochs): nn_epoch(X_tr, y_tr, W1, W2, lr=lr, batch=batch) train_loss, train_err = loss_err(np.maximum(X_tr@W1,0)@W2, y_tr) test_loss, test_err = loss_err(np.maximum(X_te@W1,0)@W2, y_te) print("| {:>4} | {:.5f} | {:.5f} | {:.5f} | {:.5f} |"\ .format(epoch, train_loss, train_err, test_loss, test_err))
if __name__ == "__main__": X_tr, y_tr = parse_mnist("data/train-images-idx3-ubyte.gz", "data/train-labels-idx1-ubyte.gz") X_te, y_te = parse_mnist("data/t10k-images-idx3-ubyte.gz", "data/t10k-labels-idx1-ubyte.gz")
print("Training softmax regression") train_softmax(X_tr, y_tr, X_te, y_te, epochs=10, lr = 0.1)
print("\nTraining two layer neural network w/ 100 hidden units") train_nn(X_tr, y_tr, X_te, y_te, hidden_dim=100, epochs=20, lr = 0.2)
|