0

Ich versuche derzeit eine RNN für die Regression zu implementieren. Ich muss ein neuronales Netzwerk erstellen, das Audio-Samples in einen Vektor der mfcc-Funktion konvertieren kann. Ich weiß bereits, was das Merkmal für jedes Audio-Samples ist, also ist es die Aufgabe selbst ein neuronales Netzwerk zu erstellen, das eine Liste von Audio-Samples in das gewünschte MFCC-Feature konvertieren kann. Das zweite Problem, mit dem ich konfrontiert bin, ist, dass die Liste mit dem Audio-Sample unterschiedliche Länge hat, da die Audiodateien, die ich sampling, unterschiedliche Länge haben, was zu Problemen mit der Anzahl der Eingänge führen würde das neurale Netzwerk. Ich fand this post auf, wie man variable Sequenzlänge behandelt, und versuchte, in meine Implementierung eines RNN einzubauen, aber scheinen nicht fähig zu sein, viele Fehler aus unerklärlichen Gründen zu erhalten.RNN-Regression mit Tensorflow?

Konnte jeder sehen, was falsch geht mit meiner Umsetzung? Hier

ist der Code:

def length(sequence): ##Zero padding to fit the max lenght... Question whether that is a good idea. 
    used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2)) 
    length = tf.reduce_sum(used, reduction_indices=1) 
    length = tf.cast(length, tf.int32) 
    return length 

def cost(output, target): 
    # Compute cross entropy for each frame. 
    cross_entropy = target * tf.log(output) 
    cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) 
    mask = tf.sign(tf.reduce_max(tf.abs(target), reduction_indices=2)) 
    cross_entropy *= mask 
    # Average over actual sequence lengths. 
    cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) 
    cross_entropy /= tf.reduce_sum(mask, reduction_indices=1) 
    return tf.reduce_mean(cross_entropy) 

def last_relevant(output): 
    max_length = int(output.get_shape()[1]) 
    relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1) 
    return relevant 

files_train_path = [dnn_train+f for f in listdir(dnn_train) if isfile(join(dnn_train, f))] 
files_test_path = [dnn_test+f for f in listdir(dnn_test) if isfile(join(dnn_test, f))] 

files_train_name = [f for f in listdir(dnn_train) if isfile(join(dnn_train, f))] 
files_test_name = [f for f in listdir(dnn_test) if isfile(join(dnn_test, f))] 

os.chdir(dnn_train) 

train_name,train_data = generate_list_of_names_data(files_train_path) 
train_data, train_names, train_output_data, train_class_output = load_sound_files(files_train_path,train_name,train_data) 

max_length = 0 ## Used for variable sequence input 

for element in train_data: 
    if element.size > max_length: 
     max_length = element.size 

NUM_EXAMPLES = len(train_data)/2 

test_data = train_data[NUM_EXAMPLES:] 
test_output = train_output_data[NUM_EXAMPLES:] 

train_data = train_data[:NUM_EXAMPLES] 
train_output = train_output_data[:NUM_EXAMPLES] 
print("--- %s seconds ---" % (time.time() - start_time)) 

#----------------------------------------------------------------------# 
#----------------------------Main--------------------------------------# 
### Tensorflow neural network setup 

batch_size = None 
sequence_length_max = max_length 
input_dimension=1 

data = tf.placeholder(tf.float32,[batch_size,sequence_length_max,input_dimension]) 
target = tf.placeholder(tf.float32,[None,14]) 

num_hidden = 24 ## Hidden layer 
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True) ## Long short term memory 

output, state = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,sequence_length = length(data)) ## Creates the Rnn skeleton 

last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last 

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])])) 
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]])) 

prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) 

cross_entropy = cost(output,target)# How far am I from correct value? 

optimizer = tf.train.AdamOptimizer() ## TensorflowOptimizer 
minimize = optimizer.minimize(cross_entropy) 

mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1)) 
error = tf.reduce_mean(tf.cast(mistakes, tf.float32)) 

## Training ## 

init_op = tf.initialize_all_variables() 
sess = tf.Session() 
sess.run(init_op) 

batch_size = 1000 
no_of_batches = int(len(train_data)/batch_size) 
epoch = 5000 
for i in range(epoch): 
    ptr = 0 
    for j in range(no_of_batches): 
     inp, out = train_data[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size] 
     ptr+=batch_size 
     sess.run(minimize,{data: inp, target: out}) 
    print "Epoch - ",str(i) 
incorrect = sess.run(error,{data: test_data, target: test_output}) 
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect)) 
sess.close() 

Fehlermeldung:

Traceback (most recent call last): 
    File "tensorflow_test.py", line 177, in <module> 
    last = last_relevant(output)#tf.gather(val, int(val.get_shape()[0]) - 1) ## Appedning as last 
    File "tensorflow_test.py", line 132, in last_relevant 
    relevant = tf.reduce_sum(tf.mul(output, tf.expand_dims(tf.one_hot(length, max_length), -1)), 1) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 2778, in one_hot 
    name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1413, in _one_hot 
    axis=axis, name=name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 454, in apply_op 
    as_ref=input_arg.is_ref) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 621, in convert_to_tensor 
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 180, in _constant_tensor_conversion_function 
    return constant(v, dtype=dtype, name=name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/constant_op.py", line 163, in constant 
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape)) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/tensor_util.py", line 421, in make_tensor_proto 
    tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values]) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/util/compat.py", line 45, in as_bytes 
    (bytes_or_text,)) 
TypeError: Expected binary or unicode string, got <function length at 0x7f51a7a3ede8> 

Edit:

Ändern der tf.one_hot (Länge (Ausgang), max_length) gibt mir diese Fehlermeldung:

Traceback (most recent call last): 
    File "tensorflow_test.py", line 184, in <module> 
    cross_entropy = cost(output,target)# How far am I from correct value? 
    File "tensorflow_test.py", line 121, in cost 
    cross_entropy = target * tf.log(output) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 754, in binary_op_wrapper 
    return func(x, y, name=name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 903, in _mul_dispatch 
    return gen_math_ops.mul(x, y, name=name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 1427, in mul 
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op 
    op_def=op_def) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2312, in create_op 
    set_shapes_for_outputs(ret) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1704, in set_shapes_for_outputs 
    shapes = shape_func(op) 
    File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py", line 1801, in _BroadcastShape 
    % (shape_x, shape_y)) 
ValueError: Incompatible shapes for broadcasting: (?, 14) and (?, 138915, 24) 

Antwort

0
tf.one_hot(length, ...) 

Hier ist Länge eine Funktion, kein Tensor. Probieren Sie stattdessen Länge (etwas).

+0

'last_relevant' soll die relevante Ausgabe extrahieren, was bedeutet, dass das nicht gepolstert ist, was dann Sinn machen würde, es' length (output) 'zu füttern, was mir dann eine neue Fehlermeldung gibt. –