Ich habe ein Projekt, in dem ich ein Faltungsnetzwerk verwenden muss, die ein Bild anstelle von Logit Klasse Prädiktoren ausgeben wird, unternommen. Zu diesem Zweck habe ich Adapter der CNN-Code, den ich heruntergeladen habe https://github.com/aymericdamien/TensorFlow-ExamplesTensorflow Convolutional Network, das ein Bild (keine Logits)
Adapter Meine Daten sind 64x64 Bilder aus einer Binärdatei gelesen. Die Binärdatei besteht aus Aufzeichnungen von zwei 64 × 64-Bildern in Folge. Ich muss eine Kostenfunktion minimieren, die die Differenz zwischen dem zweiten Bild und der 64x64-Ausgabe des Netzwerks ist.
Dies ist das Modul die ich geschrieben habe, die Eingangsdaten zu lesen:
import tensorflow as tf
# various initialization variables
BATCH_SIZE = 128
N_FEATURES = 9
# This function accepts a tensor of size [batch_size, 2 ,record_size]
# and segments in into two tensors of size [batch_size, record] along the second dimension
# IMPORTANT: to be executed within an active session
def segment_batch(batch_p, batch_size, n_input):
batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor
batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input]) # GT data tensor
optical = tf.reshape([batch_xs], [batch_size, n_input])
gt = tf.reshape([batch_ys], [batch_size, n_input])
return [optical, gt]
def batch_generator(filenames, record_size, batch_size):
""" filenames is the list of files you want to read from.
record_bytes: The size of a record in bytes
batch_size: The size a data batch (examples/batch)
"""
filename_queue = tf.train.string_input_producer(filenames)
reader = tf.FixedLengthRecordReader(record_bytes=2*record_size) # record size is double the value given (optical + ground truth images)
_, value = reader.read(filename_queue)
# read in the data (UINT8)
content = tf.decode_raw(value, out_type=tf.uint8)
# The bytes read represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
# read optical data slice
depth_major = tf.reshape(
tf.strided_slice(content, [0],
[record_size]),
[1, 64, 64])
# read GT (ground truth) data slice
depth_major1 = tf.reshape(
tf.strided_slice(content, [record_size],
[2*record_size]),
[1, 64, 64])
# Optical data
# Convert from [depth, height, width] to [height, width, depth].
uint8image = tf.transpose(depth_major, [1, 2, 0])
uint8image = tf.reshape(uint8image, [record_size]) # reshape into a single-dimensional vector
uint8image = tf.cast(uint8image, tf.float32) # cast into a float32
uint8image = uint8image/255 # normalize
# Ground Truth data
# Convert from [depth, height, width] to [height, width, depth].
gt_image = tf.transpose(depth_major1, [1, 2, 0])
gt_image = tf.reshape(gt_image, [record_size]) # reshape into a single-dimensional vector
gt_image = tf.cast(gt_image, tf.float32) # cast into a float32
gt_image = gt_image/255 # normalize
# stack them into a single features tensor
features = tf.stack([uint8image, gt_image])
# minimum number elements in the queue after a dequeue, used to ensure
# that the samples are sufficiently mixed
# I think 10 times the BATCH_SIZE is sufficient
min_after_dequeue = 10 * batch_size
# the maximum number of elements in the queue
capacity = 20 * batch_size
# shuffle the data to generate BATCH_SIZE sample pairs
data_batch = tf.train.shuffle_batch([features], batch_size=batch_size,
capacity=capacity, min_after_dequeue=min_after_dequeue)
return data_batch
Dies ist der Hauptcode meiner Umsetzung:
from __future__ import print_function
# Various initialization variables
DATA_PATH_OPTICAL_TRAIN = 'data/building_ground_truth_for_training.bin'
DATA_PATH_EVAL = 'data/building_ground_truth_for_eval.bin'
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
# custom imports
import data_reader2
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
epochs = 10
display_step = 10
rows = 64
cols = 64
# Network Parameters
n_input = 4096 # optical image data (img shape: 64*64)
n_classes = 4096 # output is an image of same resolution as initial image
dropout = 0.75 # Dropout, probability to keep units
# input data parameters
record_size = 64**2
total_bytes_of_optical_binary_file = 893329408 # total size of binary file containing training data ([64z64 optical] [64x64 GT])
# create the data batches (queue)
# Accepts two parameters. The tensor containing the binary files and the size of a record
data_batch = data_reader2.batch_generator([DATA_PATH_OPTICAL_TRAIN],record_size, batch_size) # train set
data_batch_eval = data_reader2.batch_generator([DATA_PATH_EVAL],record_size, batch_size) # train set
##############################################################
######################### FUNCTIONS ##########################
##############################################################
# extract optical array from list
# A helper function. Data returned from segment_batch is a list which contains two arrays.
# The first array contains the optical data while the second contains the ground truth data
def extract_optical_from_list(full_batch):
optical = full_batch[0] # extract array from list
return optical
# extract ground truth array from list
# A helper function. Data returned from segment_batch is a list which contains two arrays.
# The first array contains the optical data while the second contains the ground truth data
def extract_gt_from_list(full_batch):
gt = full_batch[1] # extract array from list
return gt
# This function accepts a tensor of size [batch_size, 2 ,record_size]
# and segments in into two tensors of size [batch_size, record] along the second dimension
# IMPORTANT: to be executed within an active session
def segment_batch(batch_p):
batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor
batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input]) # GT data tensor
optical = tf.reshape([batch_xs], [batch_size, n_input])
gt = tf.reshape([batch_ys], [batch_size, n_input])
return [optical, gt]
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture into 64x64 subimages [rows, rows, cols, channels]
x1 = tf.reshape(x, shape=[-1, rows, cols, 1]) # this is the 4-dimensional that tf.conv2D expects as Input
# Convolution Layer
conv1 = conv2d(x1, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
#fc1 = tf.nn.dropout(fc1, dropout)
# Output image (edge), prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
# Add print operation
out = tf.Print(out, [out], message="This is out: ")
return [out, x]
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([16*16*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
####################################################################
##################### PLACEHOLDERS #################################
####################################################################
# tf Graph input (only pictures)
X = tf.placeholder_with_default(extract_optical_from_list(segment_batch(data_batch)), [batch_size, n_input])
####################################################################
##################### END OF PLACEHOLDERS ##########################
####################################################################
# tf Graph input
keep_prob = tf.Variable(dropout) #dropout (keep probability)
# Construct model
pred = conv_net(extract_optical_from_list(X), weights, biases, keep_prob) # x[0] is the optical data
y_true = extract_gt_from_list(extract_gt_from_list(X)) # y_true is the ground truth data
# Define loss and optimizer
cost = tf.reduce_mean(tf.pow(y_true - pred[0], 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print("Optimizing")
sess.run(optimizer)
print("Iter " + str(step*batch_size))
step += 1
print("Optimization Finished!")
Nach viel mit der Form zwicken Den Tensoren gelang es, die Syntaxfehler zu beheben. Unglücklicherweise hängt es einfach in dem Moment, in dem es beginnt, den Optimierungsteil des Graphen auszuführen. Da ich keine Möglichkeit habe, dies zu debuggen (fand sehr knappe Informationen zur Verwendung des Tensorflow-Debuggers), bin ich wirklich ratlos, was falsch gelaufen ist! Wenn jemand mit mehr Erfahrung auf Tensorflow darauf hinweisen kann, was mit diesem Code falsch ist, würde es mir sehr helfen.
Vielen Dank im Voraus
Was meine ich mit "es hängt einfach"? Nichts passiert? Wie lange hast du gewartet? Die Warteschlange muss zuerst initialisiert und mit _capacity_ images gefüllt werden. Aus Gründen des "Debuggens" können Sie versuchen, die Kapazität auf eine niedrigere Zahl zu reduzieren, um zu prüfen, ob dies tatsächlich der Fehler ist. – aseipel