2017-12-07 2 views
3

Ich habe den folgenden Code ein, dass ich einen Vorwärtspass von einem 2-Schicht LSTM zu erhalten bin die Hoffnung:Foward in LSTM netwok passiert gelernt von keras

""" 
this is a simple numerical example of LSTM forward pass to allow deep understanding 
the LSTM is trying to learn the sin function by learning to predict the next value after a sequence of 3 inputs 
    example 1: {0.583, 0.633, 0.681} --> {0.725}, these values correspond to 
       {sin(35.66), sin(39.27}, sin(42.92)} --> {sin(46.47)} 
    example 2: {0.725, 0.767, 0.801} --> {0.849}, these values correspond to 
       {sin(46.47), sin(50.09), sin(53.23)} --> {sin(58.10)} 

example tested: [[['0.725323664'] 
        ['0.7671179'] 
        ['0.805884672']]] 
predicted_instance: [ 0.83467698] 


training example pair: [['0.680666907'] 
['0.725323664'] 
['0.7671179']] 0.805884672 

""" 
import numpy as np 


# linear activation matrix-wise (works also element-wise) 
def linear(x): 
    return x 


# sigmoid function matrix-wise (works also element-wise) 
def sigmoid(x): 
    return 1/(1 + np.exp(-x)) 


# hard sigmoid function element wise 
def hard_sig(x): 
    # in Keras for both tensorflow and theano backend 
    return np.max(np.array([0.0, np.min(np.array([1.0, x * 0.2 + 0.5]))])) 
    # Courbariaux et al. 2016 (Binarized Neural Networks) 
    # return np.max(np.array([0.0, np.min(np.array([1.0, (x + 1.0)/2.0]))])) 


# hard sigmoid function matrix wise 
def hard_sigmoid(x, fun=hard_sig): 
    return np.vectorize(fun)(x) 


# hyperbolic tangent function matrix wise (works also element-wise) 
def hyperbolic_tangent(x): 
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x)) 


print(sigmoid(np.array([-100, 0, 100]))) 
print(hard_sigmoid(np.array([-100, 0, 0.1, 100]))) 
print(hyperbolic_tangent(np.array([-100, 0, 100]))) 

parameter_names = ['lstm_1_kernel_0.npy', 
        'lstm_1_recurrent_kernel_0.npy', 
        'lstm_1_bias_0.npy', 
        'lstm_2_kernel_0.npy', 
        'lstm_2_recurrent_kernel_0.npy', 
        'lstm_2_bias_0.npy', 
        'dense_1_kernel_0.npy', 
        'dense_1_bias_0.npy'] 


# LSTM 1 Weights 
lstm_1_kernel_0 = np.load('lstm_1_kernel_0.npy') 
print('lstm_1_kernel_0: ', lstm_1_kernel_0.shape) 
lstm_1_recurrent_kernel_0 = np.load('lstm_1_recurrent_kernel_0.npy') 
print('lstm_1_recurrent_kernel_0: ', lstm_1_recurrent_kernel_0.shape) 
lstm_1_bias_0 = np.load('lstm_1_bias_0.npy') 
print('lstm_1_bias_0: ', lstm_1_bias_0.shape) 

# LSTM 2 Wights 
lstm_2_kernel_0 = np.load('lstm_2_kernel_0.npy') 
print('lstm_2_kernel_0: ', lstm_2_kernel_0.shape) 
lstm_2_recurrent_kernel_0 = np.load('lstm_2_recurrent_kernel_0.npy') 
print('lstm_2_recurrent_kernel_0: ', lstm_2_recurrent_kernel_0.shape) 
lstm_2_bias_0 = np.load('lstm_2_bias_0.npy') 
print('lstm_2_bias_0: ', lstm_2_bias_0.shape) 

# Dense layer 
dense_1_kernel_0 = np.load('dense_1_kernel_0.npy') 
print('dense_1_kernel_0: ', dense_1_kernel_0.shape) 
dense_1_bias_0 = np.load('dense_1_bias_0.npy') 
print('dense_1_bias_0: ', dense_1_bias_0.shape) 

time_seq = [0, 1, 2] 
""" 
input_seq = np.array([[[0.725323664], 
         [0.7671179], 
         [0.805884672]]]) 
""" 
input_seq = np.array([[[0.680666907], 
         [0.725323664], 
         [0.7671179]]]) 
print('input_seq: ', input_seq.shape) 
for time in time_seq: 
    print('input t', time, ':', input_seq[0, time, 0]) 

""" 
# z0 = z[:, :self.units] 
# z1 = z[:, self.units: 2 * self.units] 
# z2 = z[:, 2 * self.units: 3 * self.units] 
# z3 = z[:, 3 * self.units:] 

# i = self.recurrent_activation(z0) 
# f = self.recurrent_activation(z1) 
# c = f * c_tm1 + i * self.activation(z2) 
# o = self.recurrent_activation(z3) 

# activation =' tanh' 
# recurrent_activation = 'hard_sigmoid' 
""" 


# LSTM 1 
x_1_lstm_1 = input_seq[0, 0, 0] 
print('x_1: ', x_1_lstm_1) 
x_2_lstm_1 = input_seq[0, 1, 0] 
print('x_2: ', x_2_lstm_1) 
x_3_lstm_1 = input_seq[0, 2, 0] 
print('x_3: ', x_3_lstm_1) 

c_0_lstm_1 = np.zeros((1, 3)) 
h_0_lstm_1 = np.zeros((1, 3)) 

z_1_lstm_1 = np.dot(x_1_lstm_1, lstm_1_kernel_0) + np.dot(h_0_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0 
print(z_1_lstm_1.shape) 
i_1_lstm_1 = sigmoid(z_1_lstm_1[:, 0:3]) 
f_1_lstm_1 = sigmoid(z_1_lstm_1[:, 3:6]) 
input_to_c_1_lstm_1 = z_1_lstm_1[:, 6:9] 
o_1_lstm_1 = sigmoid(z_1_lstm_1[:, 9:12]) 
c_1_lstm_1 = np.multiply(f_1_lstm_1, c_0_lstm_1) + np.multiply(i_1_lstm_1, hyperbolic_tangent(input_to_c_1_lstm_1)) 
h_1_lstm_1 = np.multiply(o_1_lstm_1, hyperbolic_tangent(c_1_lstm_1)) 
print('h_1_lstm_1: ', h_1_lstm_1.shape, h_1_lstm_1) 

z_2_lstm_1 = np.dot(x_2_lstm_1, lstm_1_kernel_0) + np.dot(h_1_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0 
print(z_2_lstm_1.shape) 
i_2_lstm_1 = sigmoid(z_2_lstm_1[:, 0:3]) 
f_2_lstm_1 = sigmoid(z_2_lstm_1[:, 3:6]) 
input_to_c_2_lstm_1 = z_2_lstm_1[:, 6:9] 
o_2_lstm_1 = sigmoid(z_2_lstm_1[:, 9:12]) 
c_2_lstm_1 = np.multiply(f_2_lstm_1, c_1_lstm_1) + np.multiply(i_2_lstm_1, hyperbolic_tangent(input_to_c_2_lstm_1)) 
h_2_lstm_1 = np.multiply(o_2_lstm_1, hyperbolic_tangent(c_2_lstm_1)) 
print('h_2_lstm_1: ', h_2_lstm_1.shape, h_2_lstm_1) 

z_3_lstm_1 = np.dot(x_3_lstm_1, lstm_1_kernel_0) + np.dot(h_2_lstm_1, lstm_1_recurrent_kernel_0) + lstm_1_bias_0 
print(z_3_lstm_1.shape) 
i_3_lstm_1 = sigmoid(z_3_lstm_1[:, 0:3]) 
f_3_lstm_1 = sigmoid(z_3_lstm_1[:, 3:6]) 
input_to_c_3_lstm_1 = z_3_lstm_1[:, 6:9] 
o_3_lstm_1 = sigmoid(z_3_lstm_1[:, 9:12]) 
c_3_lstm_1 = np.multiply(f_3_lstm_1, c_2_lstm_1) + np.multiply(i_3_lstm_1, hyperbolic_tangent(input_to_c_3_lstm_1)) 
h_3_lstm_1 = np.multiply(o_3_lstm_1, hyperbolic_tangent(c_3_lstm_1)) 
print('h_3_lstm_1: ', h_3_lstm_1.shape, h_3_lstm_1) 

# LSTM 2 
x_1_lstm_2 = h_1_lstm_1 
x_2_lstm_2 = h_2_lstm_1 
x_3_lstm_2 = h_3_lstm_1 

c_0_lstm_2 = np.zeros((1, 1)) 
h_0_lstm_2 = np.zeros((1, 1)) 

z_1_lstm_2 = np.dot(x_1_lstm_2, lstm_2_kernel_0) + np.dot(h_0_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0 
print(z_1_lstm_2.shape) 
i_1_lstm_2 = sigmoid(z_1_lstm_2[:, 0]) 
f_1_lstm_2 = sigmoid(z_1_lstm_2[:, 1]) 
input_to_c_1_lstm_2 = z_1_lstm_2[:, 2] 
o_1_lstm_2 = sigmoid(z_1_lstm_2[:, 3]) 
c_1_lstm_2 = np.multiply(f_1_lstm_2, c_0_lstm_2) + np.multiply(i_1_lstm_2, hyperbolic_tangent(input_to_c_1_lstm_2)) 
h_1_lstm_2 = np.multiply(o_1_lstm_2, hyperbolic_tangent(c_1_lstm_2)) 
print('h_1_lstm_2: ', h_1_lstm_2.shape, h_1_lstm_2) 

z_2_lstm_2 = np.dot(x_2_lstm_2, lstm_2_kernel_0) + np.dot(h_1_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0 
print(z_2_lstm_2.shape) 
i_2_lstm_2 = sigmoid(z_2_lstm_2[:, 0]) 
f_2_lstm_2 = sigmoid(z_2_lstm_2[:, 1]) 
input_to_c_2_lstm_2 = z_2_lstm_2[:, 2] 
o_2_lstm_2 = sigmoid(z_2_lstm_2[:, 3]) 
c_2_lstm_2 = np.multiply(f_2_lstm_2, c_1_lstm_2) + np.multiply(i_2_lstm_2, hyperbolic_tangent(input_to_c_2_lstm_2)) 
h_2_lstm_2 = np.multiply(o_2_lstm_2, hyperbolic_tangent(c_2_lstm_2)) 
print('h_2_lstm_2: ', h_2_lstm_2.shape, h_2_lstm_2) 

z_3_lstm_2 = np.dot(x_3_lstm_2, lstm_2_kernel_0) + np.dot(h_2_lstm_2, lstm_2_recurrent_kernel_0) + lstm_2_bias_0 
print(z_3_lstm_2.shape) 
i_3_lstm_2 = sigmoid(z_3_lstm_2[:, 0]) 
f_3_lstm_2 = sigmoid(z_3_lstm_2[:, 1]) 
input_to_c_3_lstm_2 = z_3_lstm_2[:, 2] 
o_3_lstm_2 = sigmoid(z_3_lstm_2[:, 3]) 
c_3_lstm_2 = np.multiply(f_3_lstm_2, c_2_lstm_2) + np.multiply(i_3_lstm_2, hyperbolic_tangent(input_to_c_3_lstm_2)) 
h_3_lstm_2 = np.multiply(o_3_lstm_2, hyperbolic_tangent(c_3_lstm_2)) 
print('h_3_lstm_2: ', h_3_lstm_2.shape, h_3_lstm_2) 

output = np.dot(h_3_lstm_2, dense_1_kernel_0) + dense_1_bias_0 
print('output: ', output) 

Die Gewichte gespeichert wurde bei Zugzeit Datei und sie können unter der folgenden Adresse abgerufen werden:

LSTM weights

um die LSTM zu schaffen, die ein Sinuswellen-Signal ist passend I den folgenden Code in Keras verwendet haben:

def build_simple_model(layers): 
    model = Sequential() 

    model.add(LSTM(input_shape=(layers[1], layers[0]), 
        output_dim=layers[1], 
        return_sequences=True, 
        activation='tanh', 
        recurrent_activation='sigmoid')) # 'hard_sigmoid' 
    # model.add(Dropout(0.2)) 
    model.add(LSTM(layers[2], 
        return_sequences=False, 
        activation='tanh', 
        recurrent_activation='sigmoid')) # 'hard_sigmoid' 

    # model.add(Dropout(0.2)) 
    model.add(Dense(output_dim=layers[3])) 
    model.add(Activation("linear")) 

    start = time.time() 
    model.compile(loss="mse", optimizer="rmsprop") 
    print("> Compilation Time : ", time.time() - start) 
    plot_model(model, to_file='lstm_model.png', show_shapes=True, show_layer_names=True) 
    print(model.summary()) 
    return model 

Dies führte zu folgendem Modell:

LSTM Network

ich die Trainingsprozedur verwendet haben, wie folgt:

seq_len = 3   
    model = lstm.build_simple_model([1, seq_len, 1, 1]) 

    model.fit(X_train, 
       y_train, 
       batch_size=512, 
       nb_epoch=epochs, 
       validation_split=0.05) 

es möglich wäre, zu verstehen, warum mein Vorwärtspass tut nicht die gewünschte Ausgabe erzeugen, indem ein zukünftiger sin() - Signalwert auf der Basis von drei vorhergehenden aufeinanderfolgenden Signalen vorhergesagt wird.

Das ursprüngliche Beispiel, auf dem ich versuche, meine Vorwärtsdurchgangsübung zu begründen, stammt here. Die im Format .npy hochgeladenen Gewichte stammen aus einem Netzwerk, das in der Lage ist, den nächsten sin() -Wert in einer Serie perfekt vorherzusagen.

Antwort

1

Ich erkannte, was das Problem war. Ich habe versucht, meine Modellgewichte mithilfe der Tensorflow-Sitzung (nach der Anpassung des Modells) und nicht direkt über die Keras-Methoden zu extrahieren. Dies führte zu Gewichtungsmatrizen, die (dimensionsmäßig) vollkommen Sinn ergaben, aber die Werte aus dem Initialisierungsschritt enthielten.

model.fit(X_train, 
      y_train, 
      batch_size=batch_size, 
      nb_epoch=epochs, 
      validation_split=0.05, 
      callbacks=callbacks_list) 

print('n_parameters: ', len(model.weights)) 
sess = tf.Session() 
init = tf.global_variables_initializer() 
sess.run(init) 

parameter_names = ['lstm_1_kernel_0', 
        'lstm_1_recurrent_kernel_0', 
        'lstm_1_bias_0', 
        'lstm_2_kernel_0', 
        'lstm_2_recurrent_kernel_0', 
        'lstm_2_bias_0', 
        'dense_1_kernel_0', 
        'dense_1_bias_0'] 

weights = model.get_weights() 
trainable_weights = model.trainable_weights 
for parameter in range(len(model.weights)): 
    print('') 
    # using Keras methods is the correct way 
    print('parameter: ', trainable_weights[parameter]) 
    print('parameter Keras: ', weights[parameter]) 
    # using session with TF is the wrong way 
    print('parameter TF: ', model.weights[parameter].eval(session=sess)) 
    #np.save(parameter_names[parameter], model.weights[parameter].eval(session=sess)) 
    #np.save(parameter_names[parameter], weights[parameter]) 

Dies druckt die folgenden Bildschirm:

parameter: <tf.Variable 'lstm_1/kernel:0' shape=(1, 12) dtype=float32_ref> 
parameter Keras: [[ 0.02005039 0.59627813 -0.77670902 -0.17643917 0.64905447 -0.49418128 
    0.01204901 0.79791737 -1.58887422 -0.3566488 0.67758918 0.77245694]] 
parameter TF: [[-0.20346385 -0.07166874 -0.58842945 0.03744811 0.46911311 -0.0469712 
    -0.07291448 0.27316415 -0.53298378 0.08367682 0.10194337 0.20933461]] 

parameter: <tf.Variable 'lstm_1/recurrent_kernel:0' shape=(3, 12) dtype=float32_ref> 
parameter Keras: [[ 0.01916649 -0.30881727 -0.07018201 0.28770521 -0.45713434 -0.33738521 
    0.53091544 -0.78456688 0.50647908 0.12326431 -0.18517831 -0.28752103] 
[ 0.44490865 -0.09020164 1.00983524 0.43070397 -0.14646551 -0.53908533 
    1.33833826 0.76106179 -1.28808987 0.71029669 -0.19338571 -0.30499896] 
[ 0.76727188 -0.10291406 0.53285897 0.31021088 0.46876401 0.04961515 
    0.0573149 1.17765784 -0.45716232 0.26181531 0.60458028 -0.6042906 ]] 
parameter TF: [[-0.044281 -0.42013288 -0.06702472 0.16710882 0.07229936 0.20263752 
    0.01935999 -0.65925431 0.21676332 0.02481769 0.50321299 -0.08369029] 
[-0.17725646 -0.14031938 -0.07758044 -0.39292315 0.36675838 -0.20198873 
    0.59491426 -0.12469263 0.14705807 0.39603388 -0.25511321 -0.01221756] 
[ 0.51603764 0.34401873 0.36002275 0.05344227 -0.00293417 -0.36086732 
    0.1636388 -0.24916036 0.09064917 -0.04246153 0.05563453 -0.5006755 ]] 

parameter: <tf.Variable 'lstm_1/bias:0' shape=(12,) dtype=float32_ref> 
parameter Keras: [ 3.91339064e-01 -2.09703773e-01 -4.88098420e-04 1.15376031e+00 
    6.24452651e-01 2.24053934e-01 4.06851530e-01 4.78419960e-01 
    1.77846551e-01 3.19107175e-01 5.16630232e-01 -2.22970009e-01] 
parameter TF: [ 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.] 

parameter: <tf.Variable 'lstm_2/kernel:0' shape=(3, 4) dtype=float32_ref> 
parameter Keras: [[ 2.01334882 1.9168334 1.77633524 -0.90856379] 
[ 1.17618477 1.02978265 -0.06435115 0.66180402] 
[-1.33014703 -0.71629387 -0.87376142 1.35648465]] 
parameter TF: [[ 0.83115911 0.72150767 0.51600969 -0.52725452] 
[ 0.53043616 0.59162521 -0.59219611 0.0951736 ] 
[-0.8030411 -0.00424314 -0.06715947 0.67533839]] 

parameter: <tf.Variable 'lstm_2/recurrent_kernel:0' shape=(1, 4) dtype=float32_ref> 
parameter Keras: [[-0.09348518 -0.7667768 0.24031806 -0.39155772]] 
parameter TF: [[-0.085137 -0.59010917 0.61000961 -0.52193022]] 

parameter: <tf.Variable 'lstm_2/bias:0' shape=(4,) dtype=float32_ref> 
parameter Keras: [ 1.21466994 2.22224903 1.34946632 0.19186479] 
parameter TF: [ 0. 1. 0. 0.] 

parameter: <tf.Variable 'dense_1/kernel:0' shape=(1, 1) dtype=float32_ref> 
parameter Keras: [[ 2.69569159]] 
parameter TF: [[ 1.5422312]] 

parameter: <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32_ref> 
parameter Keras: [ 0.20767514] 
parameter TF: [ 0.] 

Der Vorwärts-Code Pass war daher waren correct.The Gewichte falsch.Die richtigen Gewichte .npy Dateien auch auf den Link in der genannten Frage aktualisiert . Dieser Vorwärtsdurchlauf kann verwendet werden, um die Sequenzerzeugung mit LSTM durch Rückführung der Ausgabe zu veranschaulichen.