Ich habe vor kurzem angefangen, über Neuronale Netze zu lernen und beschloss, meine eigenen einfachen 2-Schicht-KNN zu kodieren und es mit dem MNIST-Datensatz zu benchmarken. Ich habe versucht, es mit Batch-SGD zu programmieren, wo die Batch-Größe vom Benutzer bereitgestellt wird. Mein Code ist wie folgt:Optimieren von selbstkodierten 2-Schicht Künstliche Neuronale Netze
class NeuralNetwork:
def __init__(self, inodes, hnodes, outnodes, activation_func, learning_rate):
self.inodes = inodes
self.hnodes = hnodes
self.onodes = outnodes
self.activation_function = activation_func
self.lr = learning_rate
self.wih = np.random.randn(self.hnodes, self.inodes)/pow(self.inodes, 0.5)
self.who = np.random.randn(self.onodes, self.hnodes)/pow(self.hnodes, 0.5)
def train(self, training_data, target_labels, batch=1, l2_penalty=0, verbose=False):
batch_size = len(training_data)/batch
print "Starting to train........"
for i in range(batch):
train_data_batch = training_data[batch_size*i : batch_size*(i+1)]
label_batch = target_labels[batch_size*i : batch_size*(i+1)]
batch_error = self.train_batch(train_data_batch, label_batch, l2_penalty)
if verbose:
print "Batch : " + str(i+1) + " ; Error : " + str(batch_error)
print "..........Finished!"
def train_batch(self, training_data, target_labels, l2_penalty=0):
train = np.array(training_data, ndmin=2).T
label = np.array(target_labels, ndmin=2).T
inputs = train # IxN
hidden_input = np.dot(self.wih, inputs) # (HxI).(IxN) = HxN
hidden_ouputs = self.activation_function(hidden_input) # (HxN) -> (HxN)
final_input = np.dot(self.who, hidden_ouputs) # (OxH).(HxN) -> OxN
final_outputs = self.activation_function(final_input) # OxN -> OxN
final_outputs = np.exp(final_outputs) # OxN
for f in range(len(final_outputs)):
final_outputs[f] = final_outputs[f]/sum(final_outputs[f])
final_error_wrt_out = label - final_outputs # OxN
hidden_error_wrt_out = np.dot(self.who.T, final_outputs) # HxN
final_in_wrt_out = self.activation_function(final_input, der=True) # OxN
hidden_in_wrt_out = self.activation_function(hidden_input, der=True) # HxN
grad_who = np.dot(final_error_wrt_out * final_in_wrt_out, hidden_ouputs.T) # (OxN).(NxH) -> OxH
grad_wih = np.dot(hidden_error_wrt_out * hidden_in_wrt_out, inputs.T) # (HxN).(NxI) -> HxI
self.who = self.who - self.lr * (grad_who + l2_penalty*(self.who))
self.wih = self.wih - self.lr * (grad_wih + l2_penalty*(self.wih))
return np.sum(final_error_wrt_out * final_error_wrt_out)/(2*len(training_data))
def query(self, inputs):
if len(inputs) != self.inodes:
print "Invalid input size"
return
inputs = np.array(inputs)
hidden_input = np.dot(self.wih, inputs)
hidden_ouputs = self.activation_function(hidden_input)
final_input = np.dot(self.who, hidden_ouputs)
final_outputs = self.activation_function(final_input)
final_outputs = np.exp(final_outputs)
total = sum(final_outputs)
probs = final_outputs/total
return probs
ich einen ähnlichen Code von Tariq Rashid auf github gefunden, die etwa 95% Genauigkeit gibt. Mein Code andererseits gibt nur 10%.
Ich habe versucht, den Code mehrmals mit Bezug auf verschiedene Tutorials auf Backpropogation zu debuggen, aber nicht in der Lage, meine Genauigkeit zu verbessern. Ich würde mich über jeden Einblick in das Thema freuen.
Bearbeiten 1: Dies folgt der Antwort von mattdeak.
Ich hatte zuvor MSE statt Negative Log Likelihood Fehler für die Softmax-Ebene verwendet, ein Fehler meinerseits. Nach der Antwort habe ich die Zugfunktion wie folgt geändert:
def train_batch(self, training_data, target_labels, l2_penalty=0):
train = np.array(training_data, ndmin=2).T
label = np.array(target_labels, ndmin=2).T
inputs = train # IxN
hidden_input = np.dot(self.wih, inputs) # (HxI).(IxN) = HxN
hidden_ouputs = self.activation_function(hidden_input) # (HxN) -> (HxN)
final_input = np.dot(self.who, hidden_ouputs) # (OxH).(HxN) -> OxN
final_outputs = self.activation_function(final_input) # OxN -> OxN
final_outputs = np.exp(final_outputs) # OxN
for f in range(len(final_outputs)):
final_outputs[f] = final_outputs[f]/sum(final_outputs[f])
error = label - final_outputs
final_error_wrt_out = final_outputs - 1 # OxN
hidden_error_wrt_out = np.dot(self.who.T, -np.log(final_outputs)) # (HxO).(OxN) -> HxN
final_in_wrt_out = self.activation_function(final_input, der=True) # OxN
hidden_in_wrt_out = self.activation_function(hidden_input, der=True) # HxN
grad_who = np.dot(final_error_wrt_out * final_in_wrt_out, hidden_ouputs.T) # (OxN).(NxH) -> OxH
grad_wih = np.dot(hidden_error_wrt_out * hidden_in_wrt_out, inputs.T) # (HxN).(NxI) -> HxI
self.who = self.who - self.lr * (grad_who + l2_penalty*(self.who))
self.wih = self.wih - self.lr * (grad_wih + l2_penalty*(self.wih))
return np.sum(final_error_wrt_out * final_error_wrt_out)/(2*len(training_data))
Dies hat jedoch zu keinem Leistungsgewinn geführt.
@mattdeak es in der Tat softmax Regression ist, und ich bin schließlich np.exp tun (final_outputs) /np.sum (np.exp (final_outputs)). Dieses Ergebnis wird in der Variablen 'probs' mit der Schleife 'for' unmittelbar nach final_outputs = np.exp (final_outputs) gespeichert. Ich fand es einfacher, diese Operation über mehrere Zeilen durchzuführen, da es mir half, das Programm besser zu debuggen. – Chaitanya