2017-09-21 3 views
1

Dieser Code als Seq2Seq Modell gedacht ...Tensorflow Seq2Seq Valueerror

# imports 
from utils import get_sorted_buckets 

import logging 
from six.moves import xrange 

import numpy as np 
import tensorflow as tf 


# classes 
class Seq2Seq: 
    def __init__(self, 
       input_vocab_size, 
       output_vocab_size, 
       buckets, 
       layer_size=256, 
       n_layers=3, 
       max_gradient_norm=5.0, 
       batch_size=64, 
       learning_rate=0.5, 
       learning_rate_decay_factor=0.99, 
       rnn_cell=tf.contrib.rnn.GRUCell, 
       n_samples=512, 
       forward_only=False): 
     logging.info('initializing Seq2Seq model') 
     buckets = get_sorted_buckets(buckets) 

     self.input_vocab_size = input_vocab_size 
     self.output_vocab_size = output_vocab_size 
     self.buckets = buckets 
     self.layer_size = layer_size 
     self.n_layers = n_layers 
     self.max_gradient_norm = max_gradient_norm 
     self.batch_size = batch_size 
     self.learning_rate = learning_rate 
     self.learning_rate_decay_factor = learning_rate_decay_factor 
     self.rnn_cell = rnn_cell 
     self.n_samples = n_samples 
     self.forward_only = forward_only 

     self.learning_rate = tf.Variable(float(self.learning_rate), 
             trainable=False) 
     self.learning_rate_decay_operation = self.learning_rate.assign(
         self.learning_rate * self.learning_rate_decay_factor) 
     self.global_step = tf.Variable(0, trainable=False) 

     self.encoder_inputs = [self._get_val_placeholder('encoder', i) 
           for i in xrange(buckets[-1][0])] 

     self.decoder_inputs = [self._get_val_placeholder('decoder', i) 
           for i in xrange(buckets[-1][1] + 1)] 
     self.target_weights = [self._get_val_placeholder('weight', i, 
                 dtype=tf.float32) 
           for i in xrange(buckets[-1][1] + 1)] 

     logging.debug('getting model') 
     self.outputs, self.losses = self._get_model_with_buckets() 

     self.saver = tf.train.Saver(tf.all_variables()) 

     if not forward_only: 
      logging.debug('setting gradient norms and updates') 
      out = self._get_gradient_norms_and_updates() 
      self.gradient_norms, self.updates = out 

    def _get_softmax_loss_func_and_output_proj(self): 
     logging.debug('function: _get_softmax_loss_func_and_output_proj') 
     use_sampled_softmax = self.n_samples > 0 and \ 
           self.n_samples < self.output_vocab_size 
     if use_sampled_softmax: 
      w = tf.get_variable('proj_w', 
           [self.layer_size, self.output_vocab_size], 
           dtype=tf.int32) 
      w_t = tf.transpose(w) 
      b = tf.get_variable('proj_b', 
           [self.output_vocab_size], 
           dtype=tf.int32) 

      def get_sampled_loss(inputs, labels): 
       labels = tf.reshape(labels, [-1, 1]) 

       print(w_t) 
       print(b) 
       print(labels) 
       print(inputs) 

       return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

      softmax_loss_function = get_sampled_loss 
      output_projection = (w, b) 
     else: 
      softmax_loss_function = None 
      output_projection = None 

     return softmax_loss_function, output_projection 

    def _get_cell(self): 
     logging.debug('function: _get_cell') 
     single_cell = self.rnn_cell(self.layer_size) 
     # single_cell = self.rnn_cell 

     if self.n_layers > 1: 
      cell = tf.contrib.rnn.MultiRNNCell([single_cell] * self.n_layers) 
     else: 
      cell = single_cell 

     return cell 

    def _get_val_placeholder(self, name, idx, dtype=tf.int32): 
     return tf.placeholder(dtype, shape=[None], name='{}_{}'.format(name, 
                     idx)) 

    def _get_model_with_buckets(self): 
     targets = [self.decoder_inputs[i + 1] 
        for i in xrange(len(self.decoder_inputs) - 1)] 

     out = self._get_softmax_loss_func_and_output_proj() 
     softmax_loss_function, output_projection = out 

     cell = self._get_cell() 

     def seq2seq_func(encoder_inputs, decoder_inputs, do_decode): 
      print('seq2seq: {} {} {}'.format(encoder_inputs, decoder_inputs, do_decode)) 
      return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
       encoder_inputs, decoder_inputs, cell, 
       num_encoder_symbols=self.input_vocab_size, 
       num_decoder_symbols=self.output_vocab_size, 
       embedding_size=self.layer_size, 
       output_projection=output_projection, 
       feed_previous=do_decode) 

     logging.debug('getting model with buckets') 
     outputs, losses = tf.contrib.legacy_seq2seq.model_with_buckets(
      self.encoder_inputs, self.decoder_inputs, targets, 
      self.target_weights, self.buckets, 
      lambda x, y: seq2seq_func(x, y, self.forward_only), 
      softmax_loss_function=softmax_loss_function) 

     logging.debug('forward only') 
     if self.forward_only: 
      if output_projection is not None: 
       for i in xrange(len(self.buckets)): 
        w = output_projection[0] 
        b = output_projection[1] 
        new_bucket_outputs = [tf.matmul(output, w) + b 
              for output in outputs[i]] 

        self.outputs[i] = new_bucket_outputs 

     return outputs, losses 

    def _get_gradient_norms_and_updates(self): 
     params = tf.trainable_variables() 

     gradient_norms = [] 
     updates = [] 

     optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) 
     for i in xrange(len(self.buckets)): 
      gradients = tf.gradients(self.losses[i], params) 
      clipped_gradients, norm = tf.clip_by_global_norm(
       gradients, self.max_gradient_norm) 
      gradient_norms.append(norm) 
      updates.append(optimizer.apply_gradients(
       zip(clipped_gradients, params), global_step=self.global_step)) 

     return gradient_norms, updates 

    def step(self, session, encoder_inputs, decoder_inputs, target_weights, 
      bucket_id, forward_only): 
     encoder_size, decoder_size = self.buckets[bucket_id] 
     if len(encoder_inputs) != encoder_size: 
      raise ValueError('Encoder length must be equal to one in bucket.') 
     elif len(decoder_inputs) != decoder_size: 
      raise ValueError('Decoder length must be equal to one in bucket.') 
     elif len(target_weights) != decoder_size: 
      raise ValueError('Weights length must be equal to one in bucket.') 

     input_feed = {} 
     for i in xrange(encoder_size): 
      input_feed[self.encoder_inputs[i].name] = encoder_inputs[i] 
     for i in xrange(decoder_size): 
      input_feed[self.decoder_inputs[i].name] = decoder_inputs[i] 
      input_feed[self.target_weights[i].name] = target_weights[i] 

     last_target = self.decoder_inputs[decoder_size].name 
     input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32) 

     if forward_only: 
      output_feed = [self.losses[bucket_id]] 

      for i in xrange(decoder_size): 
       output_feed.append(self.outputs[bucket_id][i]) 
     else: 
      output_feed = [self.updates[bucket_id], 
          self.gradient_norms[bucket_id], 
          self.losses[bucket_id]] 

     outputs = session.run(output_feed, input_feed) 

     if not forward_only: 
      return outputs[1], outputs[2], None 
     else: 
      return None, outputs[0], outputs[1:] 

diesen Fehler generiert ...

INFO:root:initializing Seq2Seq model 
DEBUG:root:getting model 
DEBUG:root:function: _get_softmax_loss_func_and_output_proj 
DEBUG:root:function: _get_cell 
DEBUG:root:getting model with buckets 
seq2seq: [<tf.Tensor 'encoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_9:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_10:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_11:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_12:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_13:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_14:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_15:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_16:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_17:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_18:0' shape=(?,) dtype=int32>, <tf.Tensor 'encoder_19:0' shape=(?,) dtype=int32>] [<tf.Tensor 'decoder_0:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_1:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_2:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_3:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_4:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_5:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_6:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_7:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_8:0' shape=(?,) dtype=int32>, <tf.Tensor 'decoder_9:0' shape=(?,) dtype=int32>] False 
Tensor("transpose:0", shape=(20000, 256), dtype=int32) 
Tensor("proj_b/read:0", shape=(20000,), dtype=int32) 
Tensor("model_with_buckets/sequence_loss/sequence_loss_by_example/Reshape:0", shape=(?, 1), dtype=float32) 
Tensor("decoder_1:0", shape=(?,), dtype=int32) 
Traceback (most recent call last): 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 670, in _call_cpp_shape_fn_impl 
    status) 
    File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 89, in __exit__ 
    next(self.gen) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status 
    pywrap_tensorflow.TF_GetCode(status)) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256]. 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "train.py", line 87, in <module> 
    main() 
    File "train.py", line 82, in main 
    model = get_model() 
    File "train.py", line 76, in get_model 
    get_rnn_cell(), FLAGS.n_samples, FLAGS.forward_only) 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 58, in __init__ 
    self.outputs, self.losses = self._get_model_with_buckets() 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 141, in _get_model_with_buckets 
    softmax_loss_function=softmax_loss_function) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1195, in model_with_buckets 
    softmax_loss_function=softmax_loss_function)) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1110, in sequence_loss 
    softmax_loss_function=softmax_loss_function)) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1067, in sequence_loss_by_example 
    crossent = softmax_loss_function(target, logit) 
    File "/Users/edholm/Desktop/Seq2Seq/model.py", line 91, in get_sampled_loss 
    num_classes=self.output_vocab_size) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 1191, in sampled_softmax_loss 
    name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/nn_impl.py", line 995, in _compute_sampled_logits 
    inputs, sampled_w, transpose_b=True) + sampled_b 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 1855, in matmul 
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1454, in _mat_mul 
    transpose_b=transpose_b, name=name) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op 
    op_def=op_def) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2397, in create_op 
    set_shapes_for_outputs(ret) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1757, in set_shapes_for_outputs 
    shapes = shape_func(op) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1707, in call_with_requiring 
    return call_cpp_shape_fn(op, require_shape_fn=True) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn 
    debug_python_shape_fn, require_shape_fn) 
    File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py", line 675, in _call_cpp_shape_fn_impl 
    raise ValueError(err.message) 
ValueError: Shape must be rank 2 but is rank 1 for 'model_with_buckets/sequence_loss/sequence_loss_by_example/sampled_softmax_loss/MatMul_1' (op: 'MatMul') with input shapes: [?], [?,256]. 

Der Fehler wahrscheinlich mit dem abgetasteten softmax Verlust zu tun hat, ich Es hatte viele Fehler, bevor es mit der neuen Aktualisierung von Tensorflow, der neuen Syntaxreihenfolge von Argumenten und so weiter zu tun hat. Es gibt vielleicht mehr Fehler im Code, aber ich muss diesen Code lösen, bevor ich auf mehr stoße.

Zwei Tage sind vergangen und ich weiß immer noch nicht, was ich tun soll. Welche Änderungen im Code sollte ich vornehmen, damit dies funktioniert?

Antwort

1

In einem neuen Tensorflow-Update haben sie die Reihenfolge der Argumente in sampled_softmax_loss umgekehrt.

Nicht verwenden.

def get_sampled_loss(inputs, labels): 
    labels = tf.reshape(labels, [-1, 1]) 

    return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

Verwenden Sie stattdessen diese.

def get_sampled_loss(labels, inputs): 
    labels = tf.reshape(labels, [-1, 1]) 

    return tf.nn.sampled_softmax_loss(
        weights=w_t, biases=b, labels=labels, inputs=inputs, 
        num_sampled=self.n_samples, 
        num_classes=self.output_vocab_size) 

Der Unterschied zwischen den beiden Schnipsel ist stattdessen die Argumente in dieser Reihenfolge aufweist: get_sampled_loss(inputs, labels).

Verwenden Sie diese Reihenfolge: get_sampled_loss(labels, inputs)

Verwandte Themen