Neural Network schlechte Konvergenz

Ich lese viel über NN letzten zwei Wochen, ich glaube, ich sah so ziemlich jede "XOR" Ansatz Tutorials im Netz. Aber ich konnte meine Arbeit nicht selbst machen. Ich begann mit einem einfachen "OR" Neuron-Ansatz. Gute Ergebnisse geben. Ich denke, mein Problem ist die Implementierung der Backpropagation. Ich habe einen Objektansatz gemacht, also hier sind die Hauptlinien.Neural Network schlechte Konvergenz

Drei Klassen:

Neuron

public class Neuron { 

/* 
* Attributes 
*/ 

double[] inputs; 
double[] weights; 

double output; 
double error; 

double delta; 
double deltaWeight; 

/* 
* Constructors 
*/ 

public Neuron(int nInputs) 
{ 
    inputs = new double[nInputs + 1]; 
    inputs[inputs.length - 1] = 1; // bias 
    weights = new double[nInputs + 1]; 
} 

/* 
* Methods 
*/ 

/** 
* Reset all weights of the neuron to random values between -1 and 1 
*/ 
public void reset() 
{  
    Random random = new Random(); 
    for (int i = 0; i < weights.length; i++) 
     weights[i] = (random.nextDouble() * ((0.5d - (-0.5d))) + (-0.5d)); 
} 

/** 
* Compute output for given inputs 
* @param inputs 
*/ 
public void computeOutput(double inputs[]) 
{ 
    setInputs(inputs); 
    output = Sigmoid.activation(getDotProduct()); 
} 

/** 
* Compute error for given ideal 
* @param ideal 
*/ 
public void computeError(double ideal) 
{ 
    error = ideal - output; 
    delta = error; 
} 

/** 
* Compute error for hidden neurons 
*/ 
public void computeError(FeedForwardLayer previousLayer, int position) 
{ 
    double sum = 0; 
    for (int i = 0; i < previousLayer.neurons.length; i++) 
     sum += (previousLayer.neurons[i].delta * previousLayer.neurons[i].weights[position]); 

    delta = Sigmoid.derivative(getDotProduct()) * sum; 
    error = delta; 
} 

/** 
* Adjust every weight of the neuron 
*/ 
public void adjustWeights(double lambda, double momentum) 
{ 
    for (int i = 0; i < weights.length; i++) 
    { 
     double lastDeltaWeight = deltaWeight; 
     deltaWeight = lambda * (delta * inputs[i]) + momentum * lastDeltaWeight; 
     weights[i] += deltaWeight; 
    } 
} 

@Override 
public String toString() 
{ 
    String str = ""; 
    for (int i = 0; i < weights.length; i++) 
     str = str.concat(String.format("IN|W --> %.6f | %.6f \n", (float) inputs[i], (float) weights[i])); 

    str = str.concat("Output = " + output + "\n"); 
    str = str.concat("Error = " + error + "\n"); 
    return str; 
} 

/* 
* Getters & Setters 
*/ 

/** 
* @return weights * inputs + bias 
*/ 
public double getDotProduct() 
{ 
    double sum = 0; 
    for (int i = 0; i < inputs.length; i++) 
     sum += (weights[i] * inputs[i]); 

    return sum; 
} 

/** 
* Set inputs (keep bias input) 
* @param inputs 
*/ 
public void setInputs(double[] inputs) 
{ 
    for (int i = 0; i < inputs.length; i++) 
     this.inputs[i] = inputs[i]; 
} 

/** 
* Set every weight to a single value 
* @param weight 
*/ 
public void setWeights(double weight) 
{ 
    for (int i = 0; i < weights.length; i++) 
     this.weights[i] = weight; 
} 
}

FeedForwardLayer (die Neuronen enthalten)

public class FeedForwardLayer { 

/* 
* Attributes 
*/ 

Neuron[] neurons; 
LayerTypes type; 

/* 
* Constructors 
*/ 

/** 
* First layer constructor 
* @param nNeurons 
*/ 
public FeedForwardLayer(int nInputs, int nNeurons, LayerTypes type) 
{ 
    neurons = new Neuron[nNeurons]; 
    for (int i = 0; i < neurons.length; i++) 
     neurons[i] = new Neuron(nInputs); 

    this.type = type; 
} 

/* 
* Methods 
*/ 

/** 
* Reset all weights of the layer's neurons to random values between -1 and 1 
*/ 
public void reset() 
{ 
    for (Neuron neuron : neurons) 
     neuron.reset(); 
} 

/** 
* Compute output, if layer isn't input one, you can pass null into parameter 
* @param inputs 
*/ 
public void computeOutputs(double[] inputs) 
{ 
    for (int i = 0; i < neurons.length; i++) 
     neurons[i].computeOutput(inputs); 
} 

/** 
* Compute error, if layer is output one 
* @param ideals 
*/ 
public void computeErrors(double[] ideals) 
{ 
    for (int i = 0; i < neurons.length; i++) 
     neurons[i].computeError(ideals[i]); 
} 

/** 
* Compute error, if layer isn't output one 
* @param layer n+1 
*/ 
public void computeErrors(FeedForwardLayer next) 
{ 
    for (int i = 0; i < neurons.length; i++) 
     neurons[i].computeError(next, i); 
} 

/** 
* Adjust weights for every neurons 
*/ 
public void adjustWeights(double lambda, double momentum) 
{ 
    for (Neuron neuron : neurons) 
     neuron.adjustWeights(lambda, momentum); 
} 

@Override 
public String toString() 
{ 
    String str = ""; 
    for (int i = 0; i < neurons.length; i++) 
     str = str.concat("Neuron " + i + "\n" + neurons[i]); 
    return str; 
} 

/* 
* Getters - Setters 
*/ 

/** 
* @return true if layer is input, false otherwise 
*/ 
public boolean isInput() 
{ 
    if (type == LayerTypes.INPUT) 
     return true; 

    return false; 
} 

/** 
* @return true if layer is input, false otherwise 
*/ 
public boolean isOutput() 
{ 
    if (type == LayerTypes.OUTPUT) 
     return true; 

    return false; 
} 

/** 
* @return an array of layer's outputs 
*/ 
public double[] getOutputs() 
{ 
    double[] outputs = new double[neurons.length]; 

    for (int i = 0; i < neurons.length; i++) 
     outputs[i] = neurons[i].output; 

    return outputs; 
} 

/** 
* @return array of layer's errors 
*/ 
public double[] getErrors() 
{ 
    double[] errors = new double[neurons.length]; 

    for (int i = 0; i < neurons.length; i++) 
     errors[i] = neurons[i].error; 

    return errors; 
} 

/** 
* Set all the weights of the layer to given weight 
* @param weight 
*/ 
public void setWeights(double weight) 
{ 
    for (int i = 0; i < neurons.length; i++) 
     neurons[i].setWeights(weight); 
} 
}

FeedForwardNetwork (die FeedForwardLayers enthalten)

public class FeedForwardNetwork { 

static final double lambda = 0.1; 
static final double momentum = 0; 

/* 
* Attributes 
*/ 

private ArrayList<FeedForwardLayer> layers; 

/* 
* Constructors 
*/ 

public FeedForwardNetwork() 
{ 
    layers = new ArrayList<FeedForwardLayer>(); 
} 

/* 
* Methods 
*/ 

/** 
* Init all the weights to random values 
*/ 
public void reset() 
{  
    for (int i = 0; i < layers.size(); i++) 
     layers.get(i).reset();; 
} 

/** 
* Compute output for all the neurons of all the layers for given inputs 
* @param inputs 
*/ 
public void feedForward(double[] inputs) 
{ 
    //System.err.println("FeedForwardNetwork.feedForward(" + inputs[0] + ", " + inputs[1] +")"); 
    for (int i = 0; i < layers.size(); i++) 
    { 
     //System.err.println("\n*** COMPUTING OUTPUT FOR LAYER " + i + "***\n"); 
     if (layers.get(i).isInput()) 
      layers.get(i).computeOutputs(inputs); 
     else 
      layers.get(i).computeOutputs(layers.get(i - 1).getOutputs()); 
    } 
} 

/** 
* Compute errors for all the neurons of all the layers starting by output layer 
* @param ideals 
*/ 
public void feedBackward(double[] ideals) 
{ 
    //System.err.println("FeedForwardNetwork.feedBackward(" + ideals[0] + ")"); 
    // For each layers starting by output one 
    for (int i = layers.size() - 1; i > 0; i--) 
    { 
     //System.err.println("*** COMPUTING ERROR FOR LAYER " + i + "***"); 
     if (layers.get(i).isOutput()) 
      layers.get(i).computeErrors(ideals); 
     else 
      layers.get(i).computeErrors(layers.get(i + 1)); 
    } 
} 

/** 
* Adjust weights of every layer 
*/ 
public void adjustWeights() 
{ 
    for (FeedForwardLayer feedForwardLayer : layers) 
     feedForwardLayer.adjustWeights(lambda, momentum); 
} 

/** 
* Train the nn with given inputs and outputs 
* @param inputs 
* @param outputs 
*/ 
public void train(double[] inputs, double... outputs) 
{ 
    feedForward(inputs); 
    feedBackward(outputs); 
    adjustWeights(); 
} 

/** 
* Add a layer to the network 
* @param layer 
*/ 
public void addLayer(FeedForwardLayer layer) 
{ 
    layers.add(layer); 
} 

@Override 
public String toString() 
{ 
    String str = ""; 
    for (int i = 0; i < layers.size(); i++) 
     str = str.concat("Layer " + LayerTypes.values()[i] + "\n" + layers.get(i)); 

    str = str.concat("\n"); 
    str = str.concat("OUTPUT = " + getOutputs()[0] + "\n"); 
    str = str.concat("ERROR = " + getError(false) + "\n"); 
    return str; 
} 
/* 
* Getters & Setters 
*/ 

public FeedForwardLayer getInputLayer() 
{ 
    return layers.get(0); 
} 

public FeedForwardLayer getOutputLayer() 
{ 
    return layers.get(layers.size() - 1); 
} 

public FeedForwardLayer getLayer(int index) 
{ 
    return layers.get(index); 
} 

public double getError(boolean abs) 
{ 
    if (abs) 
     return Math.abs(getOutputLayer().neurons[0].error); 

    return getOutputLayer().neurons[0].error; 
} 

public double[] getOutputs() 
{ 
    return getOutputLayer().getOutputs(); 
} 
}

Also ich das Netzwerk trainieren, indem sie Epoche des xor Tabelle XOR Tabelle

Das Netz geben wird ausgegeben, nachdem Tausende Epoche etwa 0,5 ... interessante Tatsache ist, Wenn ich den Trainingssatz durch eine AND-Tabelle, eine ODER-Tabelle oder eine NAND-Tabelle ersetze, gibt nn die Nummer in der S-Spalte des tr aus aining set .. (es wird 0,25 für AND und NAND-Tabelle und 0,75 für OP-Tabelle ausgeben)

Ich möchte nur wissen, ob meine Implementierung ist gut genug, um es zu arbeiten, ty!

Quelle

2016-03-23 Sebastien Servouze

Sollte diese Frage in der Programmierergruppe versuchen. –

Also, nach ein paar Recherchen, erkannte ich, dass meine Implementierung gut war, außer dass ich nicht verstand, wie die Eingabeebene funktioniert. Das war es, die Eingabeebene funktioniert wie In = Out

Quelle

2016-03-25 15:51:19

Neural Network schlechte Konvergenz

Antwort

Verwandte Themen