Dieser Code als Seq2Seq Modell gedacht ...Tensorflow Seq2Seq Valueerror
# imports
from utils import get_sorted_buckets
import logging
from six.moves import xrange
import numpy as np
import tensorflow as tf
# classes
class Seq2Seq:
def __init__(self,
input_vocab_size,
output_vocab_size,
buckets,
layer_size=256,
n_layers=3,
max_gradient_norm=5.0,
batch_size=64,
learning_rate=0.5,
learning_rate_decay_factor=0.99,
rnn_cell=tf.contrib.rnn.GRUCell,
n_samples=512,
forward_only=False):
logging.info('initializing Seq2Seq model')
buckets = get_sorted_buckets(buckets)
self.input_vocab_size = input_vocab_size
self.output_vocab_size = output_vocab_size
self.buckets = buckets
self.layer_size = layer_size
self.n_layers = n_layers
self.max_gradient_norm = max_gradient_norm
self.batch_size = batch_size
self.learning_rate = learning_rate
self.learning_rate_decay_factor = learning_rate_decay_factor
self.rnn_cell = rnn_cell
self.n_samples = n_samples
self.forward_only = forward_only
self.learning_rate = tf.Variable(float(self.learning_rate),
trainable=False)
self.learning_rate_decay_operation = self.learning_rate.assign(
self.learning_rate * self.learning_rate_decay_factor)
self.global_step = tf.Variable(0, trainable=False)
self.encoder_inputs = [self._get_val_placeholder('encoder', i)
for i in xrange(buckets[-1][0])]
self.decoder_inputs = [self._get_val_placeholder('decoder', i)
for i in xrange(buckets[-1][1] + 1)]
self.target_weights = [self._get_val_placeholder('weight', i,
dtype=tf.float32)
for i in xrange(buckets[-1][1] + 1)]
logging.debug('getting model')
self.outputs, self.losses = self._get_model_with_buckets()
self.saver = tf.train.Saver(tf.all_variables())
if not forward_only:
logging.debug('setting gradient norms and updates')
out = self._get_gradient_norms_and_updates()
self.gradient_norms, self.updates = out
def _get_softmax_loss_func_and_output_proj(self):
logging.debug('function: _get_softmax_loss_func_and_output_proj')
use_sampled_softmax = self.n_samples > 0 and \
self.n_samples < self.output_vocab_size
if use_sampled_softmax:
w = tf.get_variable('proj_w',
[self.layer_size, self.output_vocab_size],
dtype=tf.int32)
w_t = tf.transpose(w)
b = tf.get_variable('proj_b',
[self.output_vocab_size],
dtype=tf.int32)
def get_sampled_loss(inputs, labels):
labels = tf.reshape(labels, [-1, 1])
print(w_t)
print(b)
print(labels)
print(inputs)
return tf.nn.sampled_softmax_loss(
weights=w_t, biases=b, labels=labels, inputs=inputs,
num_sampled=self.n_samples,
num_classes=self.output_vocab_size)
softmax_loss_function = get_sampled_loss
output_projection = (w, b)
else:
softmax_loss_function = None
output_projection = None
return softmax_loss_function, output_projection
def _get_cell(self):
logging.debug('function: _get_cell')
single_cell = self.rnn_cell(self.layer_size)
# single_cell = self.rnn_cell
if self.n_layers > 1:
cell = tf.contrib.rnn.MultiRNNCell([single_cell] * self.n_layers)
else:
cell = single_cell
return cell
def _get_val_placeholder(self, name, idx, dtype=tf.int32):
return tf.placeholder(dtype, shape=[None], name='{}_{}'.format(name,
idx))
def _get_model_with_buckets(self):
targets = [self.decoder_inputs[i + 1]
for i in xrange(len(self.decoder_inputs) - 1)]
out = self._get_softmax_loss_func_and_output_proj()
softmax_loss_function, output_projection = out
cell = self._get_cell()
def seq2seq_func(encoder_inputs, decoder_inputs, do_decode):
print('seq2seq: {} {} {}'.format(encoder_inputs, decoder_inputs, do_decode))
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs, decoder_inputs, cell,
num_encoder_symbols=self.input_vocab_size,
num_decoder_symbols=self.output_vocab_size,
embedding_size=self.layer_size,
output_projection=output_projection,
feed_previous=do_decode)
logging.debug('getting model with buckets')
outputs, losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, self.buckets,
lambda x, y: seq2seq_func(x, y, self.forward_only),
softmax_loss_function=softmax_loss_function)
logging.debug('forward only')
if self.forward_only:
if output_projection is not None:
for i in xrange(len(self.buckets)):
w = output_projection[0]
b = output_projection[1]
new_bucket_outputs = [tf.matmul(output, w) + b
for output in outputs[i]]
self.outputs[i] = new_bucket_outputs
return outputs, losses
def _get_gradient_norms_and_updates(self):
params = tf.trainable_variables()
gradient_norms = []
updates = []
optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
for i in xrange(len(self.buckets)):
gradients = tf.gradients(self.losses[i], params)
clipped_gradients, norm = tf.clip_by_global_norm(
gradients, self.max_gradient_norm)
gradient_norms.append(norm)
updates.append(optimizer.apply_gradients(
zip(clipped_gradients, params), global_step=self.global_step))
return gradient_norms, updates
def step(self, session, encoder_inputs, decoder_inputs, target_weights,
bucket_id, forward_only):
encoder_size, decoder_size = self.buckets[bucket_id]
if len(encoder_inputs) != encoder_size:
raise ValueError('Encoder length must be equal to one in bucket.')
elif len(decoder_inputs) != decoder_size:
raise ValueError('Decoder length must be equal to one in bucket.')
elif len(target_weights) != decoder_size:
raise ValueError('Weights length must be equal to one in bucket.')
input_feed = {}
for i in xrange(encoder_size):
input_feed[self.encoder_inputs[i].name] = encoder_inputs[i]
for i in xrange(decoder_size):
input_feed[self.decoder_inputs[i].name] = decoder_inputs[i]
input_feed[self.target_weights[i].name] = target_weights[i]
last_target = self.decoder_inputs[decoder_size].name
input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
if forward_only:
output_feed = [self.losses[bucket_id]]
for i in xrange(decoder_size):
output_feed.append(self.outputs[bucket_id][i])
else:
output_feed = [self.updates[bucket_id],
self.gradient_norms[bucket_id],
self.losses[bucket_id]]
outputs = session.run(output_feed, input_feed)
if not forward_only:
return outputs[1], outputs[2], None
else:
return None, outputs[0], outputs[1:]
diesen Fehler generiert ...
INFO:root:initializing Seq2Seq model
DEBUG:root:getting model
DEBUG:root:function: _get_softmax_loss_func_and_output_proj
DEBUG:root:function: _get_cell
DEBUG:root:getting model with buckets
seq2seq: [<tf.Tensor 'encoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_9:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_10:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_11:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_12:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_13:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_14:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_15:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_16:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_17:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_18:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_19:0' shape=(?,) dtype=int32>] [<tf.Tensor 'decoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_9:0' shape=(?,) dtype=int32>] False
Tensor("transpose:0", shape=(20000, 256), dtype=int32)
Tensor("proj_b/read:0", shape=(20000,), dtype=int32)
Tensor("model_with_buckets/sequence_loss/sequence_loss_by_example/Reshape:0", shape=(?, 1), dtype=float32)
Tensor("decoder_1:0", shape=(?,), dtype=int32)
Traceback (most recent call last):
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 670, in _call_cpp_shape_fn_impl
status)
File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 87, in <module>
main()
File "train.py", line 82, in main
model = get_model()
File "train.py", line 76, in get_model
get_rnn_cell(), FLAGS.n_samples, FLAGS.forward_only)
File "/Users/edholm/Desktop/Seq2Seq/model.py", line 58, in __init__
self.outputs, self.losses = self._get_model_with_buckets()
File "/Users/edholm/Desktop/Seq2Seq/model.py", line 141, in _get_model_with_buckets
softmax_loss_function=softmax_loss_function)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1195, in model_with_buckets
softmax_loss_function=softmax_loss_function))
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1110, in sequence_loss
softmax_loss_function=softmax_loss_function))
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1067, in sequence_loss_by_example
crossent = softmax_loss_function(target, logit)
File "/Users/edholm/Desktop/Seq2Seq/model.py", line 91, in get_sampled_loss
num_classes=self.output_vocab_size)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 1191, in sampled_softmax_loss
name=name)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 995, in _compute_sampled_logits
inputs, sampled_w, transpose_b=True) + sampled_b
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1855, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1454, in _mat_mul
transpose_b=transpose_b, name=name)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2397, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1757, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1707, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn
debug_python_shape_fn, require_shape_fn)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 675, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256].
Der Fehler wahrscheinlich mit dem abgetasteten softmax Verlust zu tun hat, ich Es hatte viele Fehler, bevor es mit der neuen Aktualisierung von Tensorflow, der neuen Syntaxreihenfolge von Argumenten und so weiter zu tun hat. Es gibt vielleicht mehr Fehler im Code, aber ich muss diesen Code lösen, bevor ich auf mehr stoße.
Zwei Tage sind vergangen und ich weiß immer noch nicht, was ich tun soll. Welche Änderungen im Code sollte ich vornehmen, damit dies funktioniert?