Ich versuche ein Regressor-Modell zu trainieren, das 4 skalare Float-Ausgaben vorhersagen kann. Wie es derzeit ist, divergiert das Netzwerk sehr schnell mit einem Anstieg der Verluste auf NaN. Ich kann nicht herausfinden, was vor sich geht.TensorFlow CNN Verlust steigt schnell auf NaN
Im Folgenden finden Sie ein Beispiel mit TensorFlow 1.1.0 unter Windows 10 mit einer NVidia-GPU.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy
import tensorflow as tf
IMAGE_HEIGHT = 320
IMAGE_WIDTH = 160
NUM_CHANNELS = 3
PIXEL_DEPTH = 255
SEED = 66479 # Set to None for random seed.
BATCH_SIZE=5
NUM_OUTPUTS = 4 # the four outputs
def data_type():
return tf.float32
# The variables below hold all the trainable weights. They are passed an
# initial value which will be assigned when we call:
# {tf.global_variables_initializer().run()}
conv1_weights = tf.Variable(
tf.truncated_normal([5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32.
stddev=0.1,
seed=SEED, dtype=data_type()))
conv1_biases = tf.Variable(tf.zeros([32], dtype=data_type()))
conv2_weights = tf.Variable(tf.truncated_normal(
[5, 5, 32, 64], stddev=0.1,
seed=SEED, dtype=data_type()))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64], dtype=data_type()))
fc1_weights = tf.Variable( # fully connected, depth 512.
tf.truncated_normal([IMAGE_HEIGHT // 4 * IMAGE_WIDTH // 4 * 64, 512],
stddev=0.1,
seed=SEED,
dtype=data_type()))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512], dtype=data_type()))
fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_OUTPUTS],
stddev=0.1,
seed=SEED,
dtype=data_type()))
fc2_biases = tf.Variable(tf.constant(
0.1, shape=[NUM_OUTPUTS], dtype=data_type()))
# We will replicate the model structure for the training subgraph, as well
# as the evaluation subgraphs, while sharing the trainable parameters.
def model(data, train=False):
"""The Model definition."""
# 2D convolution, with 'SAME' padding (i.e. the output feature map has
# the same size as the input). Note that {strides} is a 4D array whose
# shape matches the data layout: [image index, y, x, depth].
conv = tf.nn.conv2d(data,
conv1_weights,
strides=[1, 1, 1, 1],
padding='SAME')
# Bias and rectified linear non-linearity.
relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
# Max pooling. The kernel size spec {ksize} also follows the layout of
# the data. Here we have a pooling window of 2, and a stride of 2.
pool = tf.nn.max_pool(relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
conv = tf.nn.conv2d(pool,
conv2_weights,
strides=[1, 1, 1, 1],
padding='SAME')
relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
pool = tf.nn.max_pool(relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
# Reshape the feature map cuboid into a 2D matrix to feed it to the
# fully connected layers.
pool_shape = pool.get_shape().as_list()
reshape = tf.reshape(
pool,
[pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
# Add a 50% dropout during training only. Dropout also scales
# activations such that no rescaling is needed at evaluation time.
if train:
hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
return tf.matmul(hidden, fc2_weights) + fc2_biases
def main():
train_data_batch = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS))
train_label_batch = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_OUTPUTS))
with tf.name_scope('pred'):
train_pred = model(train_data_batch, train=True)
with tf.name_scope('loss'):
loss = tf.reduce_sum(tf.square(train_pred - train_label_batch))
tf.summary.scalar('loss', loss)
# L2 regularization for the fully connected parameters.
regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
# Add the regularization term to the loss.
loss += 5e-4 * regularizers
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_op = optimizer.minimize(loss)
with tf.Session() as sess:
# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init_op)
while True:
predictions, l, _ = sess.run([train_pred, loss, train_op], feed_dict={
train_data_batch: numpy.zeros([BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])+0.2,
train_label_batch: numpy.zeros([BATCH_SIZE, 4])})
print(l)
if __name__ == "__main__":
main()
Ausgang:
9031.0
5.6838e+22
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
Sie könnten auch [this thread] (https://stackoverflow.com/q/33962226/1714410) relevant finden. Es ist markiert [Tag: Caffe], aber es kann für andere Deep Learning-Tools relevant sein, wie [Tag: Tensorflow]. – Shai