Ich habe ein Klassifikationsmodell im Tensorflow auf zwei Arten definiert, von denen ich dachte, dass sie denselben Effekt haben. Aber eigentlich habe ich andere Ergebnisse bekommen. Der erste Weg ist die Definition Modell durch Funktionen:Wie definiere ich das Modell im Tensorflow?
def network(x, mode_name):
conv1 = conv_layer(x, conv_size=[13, 13, 1, 32], stride_size=[1, 1, 1, 1], name=mode_name + "conv1")
maxp1 = pooling_layer(conv1, ksize=[1, 4, 4, 1], stride_size=[1, 4, 4, 1])
conv2 = conv_layer(maxp1, conv_size=[7, 7, 32, 64], stride_size=[1, 1, 1, 1], name=mode_name + "conv2")
maxp2 = pooling_layer(conv2, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
conv3 = conv_layer(maxp2, conv_size=[3, 3, 64, 256], stride_size=[1, 1, 1, 1], name=mode_name + "conv3")
maxp3 = pooling_layer(conv3, ksize=[1, 2, 2, 1], stride_size=[1, 2, 2, 1])
shape = maxp3.get_shape().as_list()
reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
fc = fc_layer(reshape, label_number, name=mode_name + "fc")
return fc
def fc_layer(prev_layer, n_weight, name):
n_prev_weight = prev_layer.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.0001)
W = tf.get_variable(name + 'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
b = tf.get_variable(name + "b", dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[n_weight], dtype=tf.float32))
fc = tf.nn.bias_add(tf.matmul(prev_layer, W), b)
return fc
def conv_layer(prev_layer, conv_size, stride_size, name):
initer = tf.truncated_normal_initializer(stddev=0.0001)
W = tf.get_variable(name + 'W', dtype=tf.float32, shape=conv_size,
initializer=initer)
b = tf.get_variable(name + 'b', dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[conv_size[3]], dtype=tf.float32))
return tf.nn.relu(tf.nn.conv2d(prev_layer, W, stride_size, padding='VALID') + b)
def pooling_layer(prev_layer, ksize, stride_size):
return tf.nn.max_pool(prev_layer, ksize=ksize, strides=stride_size, padding='VALID')
und dann diese Funktion in main
verwenden:
graph = tf.Graph()
with graph.as_default():
# input data
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size, image_size, image_channel))
tf_train_labels = tf.placeholder(tf.float32,
shape=(batch_size, label_number))
tf_test_dataset = tf.constant(test_dataset)
with tf.variable_scope("simple_cnn") as scope:
logits = network(tf_train_dataset, "simple_cnn")
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
train_prediction = tf.nn.softmax(logits=logits)
with tf.variable_scope("simple_cnn") as scope:
scope.reuse_variables()
test_prediction = tf.nn.softmax(network(tf_test_dataset, "simple_cnn"))
Die andere Möglichkeit, alle Variablen zuzuordnen ist und definieren, das Modell in main
:
graph = tf.Graph()
with graph.as_default():
# input data
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size, image_size, image_channel))
tf_train_labels = tf.placeholder(tf.float32,
shape=(batch_size, label_number))
tf_test_dataset = tf.constant(test_dataset)
initer = tf.truncated_normal_initializer(stddev=0.01)
conv_w1 = tf.get_variable(name="conv_w1", dtype=tf.float32, shape=[13, 13, 1, 32], initializer=initer)
conv_b1 = tf.get_variable(name="conv_b1", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[32, ], dtype=tf.float32))
conv_w2 = tf.get_variable(name="conv_w2", dtype=tf.float32, shape=[7, 7, 32, 64], initializer=initer)
conv_b2 = tf.get_variable(name="conv_b2", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[64, ], dtype=tf.float32))
conv_w3 = tf.get_variable(name="conv_w3", dtype=tf.float32, shape=[3, 3, 64, 256], initializer=initer)
conv_b3 = tf.get_variable(name="conv_b3", dtype=tf.float32,
initializer=tf.constant(0.01, shape=[256, ], dtype=tf.float32))
fc_w = tf.get_variable(name='fc_w', dtype=tf.float32, shape=[2304, label_number], initializer=initer)
fc_b = tf.get_variable(name="fc_b", dtype=tf.float32,
initializer=tf.constant(0.0001, shape=[label_number, ], dtype=tf.float32))
def model(x):
conv1 = tf.nn.conv2d(x, conv_w1, strides=[1, 1, 1, 1], padding='VALID') + conv_b1
relu1 = tf.nn.relu(conv1)
maxp1 = tf.nn.max_pool(relu1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='VALID')
conv2 = tf.nn.conv2d(maxp1, conv_w2, strides=[1, 1, 1, 1], padding="VALID") + conv_b2
relu2 = tf.nn.relu(conv2)
maxp2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv3 = tf.nn.conv2d(maxp2, conv_w3, strides=[1, 1, 1, 1], padding='VALID')
relu3 = tf.nn.relu(conv3)
maxp3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
shape = maxp3.get_shape().as_list()
reshape = tf.reshape(maxp3, [shape[0], shape[1] * shape[2] * shape[3]])
fc = tf.nn.bias_add(tf.matmul(reshape, fc_w), fc_b)
return fc
logits = model(tf_train_dataset)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
train_prediction = tf.nn.softmax(logits=logits)
test_prediction = tf.nn.softmax(model(tf_test_dataset))
Ich denke, die beiden Möglichkeiten definieren das gleiche Modell. Aber als ich sie trainierte, kam es vor, dass der erste Weg keinen Effekt hat, die Kosten zu verringern, und das Modell, das in der zweiten Weise definiert wird, ist erfolgreich . Kümmern Sie sich nicht um das Modell selbst, ich frage mich nur, was solch einen Unterschied verursacht hat? Die Eingabedaten sind beide gleich.
Sieht so aus, als hätten Sie die Variablen nicht aktualisiert. – velikodniy