Zunächst, vielen Dank für das Lesen und für die Hilfe.Queue.dequeue hängen in Tensorflow Eingang Pipeline
Ich versuche, eine Eingabe-Pipeline für Mnist-Klassifizierung mit Conv2d in Tersorflow gebaut. Meine Daten-Set ist die Kaggle csv-Datei und die Datei Zeilen: Etikett, features1, Features2, Feature3, ..., Funktion 784
Heres der Code:
"""
TENSORFLOW CNN IMPLEMENTTATION FOR MNIST CLASSIFICATION WITH KAGGLE MINIST DATASET
AUTHOR: FERNANDO H'' CANTERUCCIO
"""
#Importa dependencias
import numpy as np
import pandas as pd
import time
import tensorflow as tf
from math import ceil
import matplotlib.pyplot as plt
import matplotlib.cm as cm
#PARAMETROS DE TREINAMENTO
test_state = False
submission_state = True
continue_training = True
verbose = False
run_n = 1
if test_state:
epochs = 1
submission_state = False
else:
epochs = 33 #N de epocas de treinamento
batch_size = 128 #Quatidade de amostras para casa epoca
save_freq = 1000
summaries_freq = 33
save_dir = "./save/"
save_name = "cnn.ckpt"
summaries_train_dir = './summaries/train/train{0}'.format(run_n)
summaries_eval_dir = './summaries/eval/eval{0}'.format(run_n)
if submission_state:
save_dir = "./savesubmission/"
save_name = "cnnsubmit{0}.ckpt".format(run_n)
summaries_train_dir = './summaries/train/submit{0}'.format(run_n)
submit_file = "MNIST_Kaggle_submission_file{0}.csv".format(run_n)
#DEBUG
display_img = False
n_display = 5
##############################################FUNÇOES AUXILIARES########################################################
def get_data(file): #realiza a aquisiçao dos dados no formato apropriado para serem utilizados no treinamento
data = pd.read_csv(file) #le o arquivo csv com a biblioteca pandas
if(verbose == True):
print('data({0[0]},{0[1]})'.format(data.shape)) #printa o formato dos dados adquiridos (n_samples, n_pixels+n_labels)
images = data.iloc[:,1:].values #Desfaz o DataFrame que é criado quando importa-se os dados pelo pandas
images = images.astype(np.float) #Converte dados para float32
images = np.multiply(images, 1.0/255.0)# Normaliza a intensidade de pixel de [0:255]
# para [0.0:1.0], ajuda na aprendizagem
image_size = images.shape[1]
image_width = image_height = np.ceil(np.sqrt(image_size)).astype(np.uint8)
if(verbose == True):
print('image_width => {0}\nimage_height => {1}'.format(image_width,image_height))
images = images.reshape([-1, image_width, image_height, 1])
labels = data[[0]].values.ravel() #adquire os rotulos das imagens em ordem
n_classes = np.unique(labels).shape[0] #Retorna o numero de rotulos unicos no array (sem contar repetidos)
n_labels = labels.shape[0]
index_offset = np.arange(n_labels) * n_classes
labels_one_hot = np.zeros((n_labels, n_classes))
labels_one_hot.flat[index_offset + labels.ravel()] = 1
labels_one_hot = labels_one_hot.astype(np.uint8)
if(verbose == True):
print("X shape: {0}, Y shape: {1}".format(images.shape, labels_one_hot.shape))
return dict(
Y = labels_one_hot,
X = images,
n_samples = int(data.shape[0]),
n_features = int(images.shape[1]),
n_classes = int(n_classes)
)
def split_data(dataset, eval_size=0.2): #divide o set de dados em treinamento e validaçao
eval_size = int(dataset['n_samples'] * eval_size)
eval_images = dataset['X'][:eval_size]
eval_labels = dataset['Y'][:eval_size]
train_images = dataset['X'][eval_size:]
train_labels = dataset['Y'][eval_size:]
if(verbose == True):
print('train images shape: {0}'.format(train_images.shape))
print('eval images shape: {0}'.format(eval_images.shape))
return dict(
Y = train_labels,
X = train_images,
Y_eval = eval_labels,
X_eval = eval_images,
n_samples = int(train_images.shape[0]),
n_eval = int(eval_images.shape[0]),
n_features = int(train_images.shape[1]),
n_classes = int(train_labels.shape[1])
)
def iterator(data, batch_index, batch_size, shuffle):
if shuffle:
index = np.random.choice(data['n_samples'], batch_size)
X_iter = data['X'][index]
Y_iter = data['Y'][index]
yield (X_iter, Y_iter)
else:
if ((batch_index + 1) > (data['n_samples'] // batch_size)):
index = np.arange((data['n_samples'] // batch_size) * batch_size,
(data['n_samples'] // batch_size) * batch_size + ceil((data['n_samples'] % batch_size)))
X_iter = data['X'][index]
Y_iter = data['Y'][index]
yield (X_iter, Y_iter)
else:
index = np.arange(batch_index * batch_size, (batch_index + 1) * batch_size)
X_iter = data['X'][index]
Y_iter = data['Y'][index]
yield (X_iter, Y_iter)
def get_epoch(data, epochs, batch_size, shuffle):
n_batches = (data['n_samples'] // batch_size) + 1
for _ in range(epochs):
for batch_index in range(n_batches):
yield iterator(data, batch_index, batch_size, shuffle)
def display(X, label):
for index in range(n_display):
image = X[index].reshape(28,28)
plt.axis('off')
plt.imshow(image, cmap=cm.binary)
plt.title(str(np.argmax(label[index])))
plt.show()
##################################################CNN GRAPH#############################################################
def reset_graph():
if 'sess' in globals() and sess:
sess.close()
tf.reset_default_graph()
def model():
reset_graph()
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])
dropout = tf.placeholder(tf.float32)
with tf.name_scope('trainning_time'):
time = tf.placeholder(tf.float32)
with tf.name_scope('mi_params'):
window = tf.placeholder(tf.int32)
loss = tf.placeholder(tf.float32, [None])
p_keep_conv = p_keep_hidden = dropout
with tf.variable_scope('weights') as scope:
w1a = init_var('w1a', [3, 3, 1, 8]) # 3x3x1 conv, 32 outputs
B1a = init_var('B1a', [8])
w1b = init_var('w1b', [3, 3, 8, 16]) # 3x3x1 conv, 32 outputs
B1b = init_var('B1b', [16])
w2a = init_var('w2a', [3, 3, 16, 32]) # 3x3x32 conv, 64 outputs
B2a = init_var('B2a', [32])
w3a = init_var('w3a', [3, 3, 32, 64]) # 3x3x32 conv, 64 outputs
B3a = init_var('B3a', [64])
w4 = init_var('w4', [64 * 4 * 4, 1024]) # FC 64 * 4 * 4 inputs, 128 outputs
B4 = init_var('B4' ,[1024])
w_o = init_var('w_o', [1024, 10]) # FC 625 inputs, 10 outputs (labels)
B_o = init_var('B_o', [10])
adami = tf.Variable(tf.constant(0.999999, dtype=tf.float32), 'adami')
with tf.variable_scope('cells') as scope:
l1a = tf.nn.elu(tf.add(tf.nn.conv2d(X, w1a,
strides=[1, 1, 1, 1], padding='SAME'), B1a))
l1b = tf.nn.elu(tf.add(tf.nn.conv2d(l1a, w1b,
strides=[1, 1, 1, 1], padding='SAME'), B1b))
l1 = tf.nn.max_pool(l1b, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l1 = tf.nn.dropout(l1, p_keep_conv)
l2a = tf.nn.elu(tf.add(tf.nn.conv2d(l1, w2a,
strides=[1, 1, 1, 1], padding='SAME'), B2a))
l2 = tf.nn.max_pool(l2a, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l2 = tf.nn.dropout(l2, p_keep_conv)
l3a = tf.nn.elu(tf.add(tf.nn.conv2d(l2, w3a,
strides=[1, 1, 1, 1], padding='SAME'), B3a))
l3 = tf.nn.max_pool(l3a, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
l3 = tf.nn.dropout(l3, p_keep_conv)
l3_plain = tf.reshape(l3, [-1, w4.get_shape().as_list()[0]])
l4 = tf.nn.elu(tf.add(tf.matmul(l3_plain, w4), B4))
l4 = tf.nn.dropout(l4, p_keep_hidden)
logits = tf.add(tf.matmul(l4, w_o), B_o) #printa o formato da matriz de rotulos para conferencia
prediction = tf.argmax(tf.nn.softmax(logits), 1)
with tf.name_scope('total_loss'):
total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, Y))
with tf.name_scope('learning_rate'):
learning_rate = tf.add(tf.nn.relu(tf.multiply(7e-4, adami)), 1e-12)
def adami_down(adami, learning_rate):
new = tf.add(tf.nn.relu(tf.sub(adami, tf.multiply(adami, learning_rate))), 1e-6)
with tf.control_dependencies([tf.assign(adami, new)]):
return tf.identity(adami)
adami = tf.cond(tf.reduce_mean(loss[-window:]) >= tf.reduce_mean(loss[-(2 * window):-(window)]),
lambda: adami_down(adami, learning_rate), lambda: tf.identity(adami))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
tf.add_to_collection('train_step', train_step)
with tf.name_scope('correct_prediction'):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.add_to_collection('accuracy', accuracy)
tf.summary.histogram('w1a', w1a)
tf.summary.histogram('B1a', B1a)
tf.summary.histogram('w1b', w1b)
tf.summary.histogram('B1b', B1b)
tf.summary.histogram('w2a', w2a)
tf.summary.histogram('B2a', B2a)
tf.summary.histogram('w3a', w3a)
tf.summary.histogram('B3a', B3a)
tf.summary.histogram('w_o', w_o)
tf.summary.histogram('B_o', B_o)
tf.summary.histogram('l1a', l1a)
tf.summary.histogram('l1b', l1b)
tf.summary.histogram('l1_pool', l1)
tf.summary.histogram('l2a', l2a)
tf.summary.histogram('l2_pool', l2)
tf.summary.histogram('l3a', l3a)
tf.summary.histogram('l3_pool', l3)
tf.summary.histogram('l4_fully', l4)
elapsed_time = tf.summary.scalar('trainning_time', time)
tf.summary.scalar('total_loss', total_loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('learning_rate', learning_rate)
tf.summary.scalar('adami', adami)
tf.summary.scalar('dropout_keep_probability', dropout)
summary_op = tf.summary.merge_all()
return dict(
x = X,
y = Y,
learning_rate = learning_rate,
pred = prediction,
train_step = train_step,
total_loss = total_loss,
accuracy = accuracy,
adami = adami,
window = window,
loss = loss,
dropout = dropout,
time = time,
elapsed_time = elapsed_time,
summaries = summary_op,
saver=tf.train.Saver()
)
def init_var(name, shape):
return tf.get_variable(name, shape, initializer=tf.contrib.layers.xavier_initializer())
################################################# TRAINING #############################################################
def train_network(data, g, epochs, batch_size, save, verbose=True):
with tf.Session(graph=tf.get_default_graph()) as sess:
train_writer = tf.train.SummaryWriter(summaries_train_dir,
sess.graph)
if(continue_training == False):
sess.run(tf.global_variables_initializer())
print("Model created.")
if isinstance(save, str):
g['saver'].save(sess, save)
print('Model Saved.')
else:
new_saver = tf.train.import_meta_graph('{0}.meta'.format(tf.train.latest_checkpoint('{0}'.format(save_dir))))
new_saver.restore(sess, tf.train.latest_checkpoint('{0}'.format(save_dir)))
print("Model Restored.")
print("Resuming Training.")
sess.graph.finalize()
training_losses = []
accu = []
window = int(42)
for k in range(2 * window):
training_losses.append(2.5)
for idx, epoch in enumerate(get_epoch(data, epochs, batch_size, shuffle=True)):
t = time.time()
training_loss = 0
accuracy = 0
steps = 0
for X, Y in epoch:
feed_dict={g['x']: X,
g['y']: Y,
g['window']: window,
g['loss']: training_losses,
g['dropout']: 0.72,
g['time']: time.time() - t,
}
training_loss_ , _, accuracy_, adami, learning_rate = sess.run([g['total_loss'],
g['train_step'],
g['accuracy'],
g['adami'],
g['learning_rate']],
feed_dict)
training_loss += training_loss_
accuracy += accuracy_
elapsed_time = sess.run(g['elapsed_time'], feed_dict={g['time']: time.time() - t})
train_writer.add_summary(elapsed_time, global_step=int((idx * batch_size) + steps))
if(idx%summaries_freq == 0):
summary = sess.run(g['summaries'], feed_dict)
train_writer.add_summary(summary, global_step=int((idx * batch_size) + steps))
steps += 1
training_losses.append(training_loss/steps)
accu.append(accuracy/steps)
if(idx%save_freq == 0 and idx != 0):
if isinstance(save, str):
g['saver'].save(sess, save, global_step=idx)
print('Model Saved.')
n_iterations = ((epochs * data['n_samples']) // batch_size)
if verbose:
print("Training loss for iteration {0}/{1}: {2}, Learning Rate:{6}, AdaMi: {8},"
" Accuracy: {7} %, ETC: {3}:{4}:{5}".format(idx,
n_iterations,
training_loss/steps,
int(((time.time() - t) * (n_iterations - idx + 1)) // 3600),
int((((time.time() - t) * (n_iterations - idx + 1)) % 3600) // 60),
int((((time.time() - t) * (n_iterations - idx + 1)) % 3600) % 60),
learning_rate,
(accuracy/steps) * 100,
adami))
print("It took {0} seconds to train this epoch.".format(time.time() - t))
g['saver'].save(sess, save)
print('Model Saved.')
return [training_losses, accu]
# output image
if(display_img == True):
for i in range(n_display):
display(data['X'], data['Y'])
g = model()
t = time.time()
mnist = get_data(file='train.csv') # chama a funçao get_data
if (verbose == True):
# printa o formato da matriz de rotulos para conferencia
print('labels shape: ({0} samples, {1} classes)'.format(mnist['n_samples'], mnist['n_classes']))
# printa o formato da matriz de imagens para conferencia
print(
'images shape: ({0} samples, {1[1]} x {1[2]} x {1[3]} pixels)'.format(mnist['n_samples'], mnist['X'].shape))
# chama a funçao split_data, dividindo o set de dados em treinamento e eval
if not submission_state:
data = split_data(mnist)
if submission_state:
data = mnist
print("Starting Trainning")
# dos dados e no tamanho do batch
losses , accuracy = train_network(data, g, epochs ,batch_size, save="{0}{1}".format(save_dir,save_name))
print("It took {0} seconds to train for {1} epochs.".format(time.time()-t, epochs))
print("The average loss on the final epoch was:", np.mean(losses[-1]))
##################################################### EVAL #############################################################
def eval_network(data, g, batch_size, verbose):
with tf.Session(graph=tf.get_default_graph()) as sess:
eval_writer = tf.train.SummaryWriter(summaries_eval_dir)
new_saver = tf.train.import_meta_graph('{0}.meta'.format(tf.train.latest_checkpoint('{0}'.format(save_dir))))
new_saver.restore(sess, tf.train.latest_checkpoint('{0}'.format(save_dir)))
print("Model Restored.")
sess.graph.finalize()
eval_losses = []
accu = []
window = int(7)
for k in range(2 * window):
eval_losses.append(0)
for idx, epoch in enumerate(get_epoch(data, 1, batch_size, shuffle=False)):
eval_loss = 0
steps = 0
accuracy = 0
t = time.time()
for X, Y in epoch:
feed_dict={g['x']: X,
g['y']: Y,
g['window']: window,
g['loss']: eval_losses,
g['dropout']: 1.0,
g['time']: time.time() - t,
}
eval_loss_ , pred , accuracy_ = sess.run([g['total_loss'],
g['pred'],
g['accuracy']],
feed_dict)
if(idx % summaries_freq == 0):
summary = sess.run(g['summaries'], feed_dict)
eval_writer.add_summary(summary, global_step=int((idx * batch_size) + steps))
eval_loss += eval_loss_
accuracy += accuracy_
steps += 1
if verbose:
print("Average eval loss: {0}, Accuracy: {1} % ".format(eval_loss/steps, (accuracy/steps) * 100))
print("It took", time.time() - t, "seconds to eval this epoch.")
eval_losses.append(eval_loss/steps)
accu.append(accuracy/steps)
return [eval_losses, accu]
t = time.time()
g = model()
if not submission_state:
data = dict(X = data['X_eval'], Y = data['Y_eval'], n_samples = data['n_eval'])
# dos dados e no tamanho do batch
print("Evaluating NN")
losses, accuracy = eval_network(data, g, batch_size, verbose)
print("It took {0} seconds to eval".format(time.time() - t,))
print("The average loss was: {0}, and the accuracy was: {1} %".format(np.mean(losses), np.mean(accuracy) * 100))
########################################## PREDICT AND SAVE FOR SUBMISSION #############################################
def get_pred_data(file): #realiza a aquisiçao dos dados no formato apropriado para serem utilizados no treinamento
data = pd.read_csv(file) #le o arquivo csv com a biblioteca pandas
if(verbose == True):
print('data({0[0]},{0[1]})'.format(data.shape)) #printa o formato dos dados adquiridos (n_samples, n_pixels+n_labels)
images = data.values #Desfaz o DataFrame que é criado quando importa-se os dados pelo pandas
images = images.astype(np.float) #Converte dados para float32
images = np.multiply(images, 1.0/255.0)# Normaliza a intensidade de pixel de [0:255]
# para [0.0:1.0], ajuda na aprendizagem
image_size = images.shape[1]
image_width = image_height = np.ceil(np.sqrt(image_size)).astype(np.uint8)
if(verbose == True):
print('image_width => {0}\nimage_height => {1}'.format(image_width,image_height))
images = images.reshape([-1, image_width, image_height, 1])
labels = np.zeros((int(data.shape[0]), 10))
if(verbose == True):
print("X shape: {0}".format(images.shape))
return dict(
X = images,
Y = labels,
n_samples = int(data.shape[0]),
n_features = int(images.shape[1]),
)
def predict_logits(data, g, batch_size, verbose):
with tf.Session(graph=tf.get_default_graph()) as sess:
new_saver = tf.train.import_meta_graph('{0}.meta'.format(tf.train.latest_checkpoint('{0}'.format(save_dir))))
new_saver.restore(sess, tf.train.latest_checkpoint('{0}'.format(save_dir)))
print("Model Restored.")
sess.graph.finalize()
predictions = []
index = []
for idx, epoch in enumerate(get_epoch(data, 1, batch_size, shuffle=False)):
steps = 0
t = time.time()
for X, Y in epoch:
feed_dict={g['x']: X,
g['dropout']: 1.0,
}
pred = sess.run(g['pred'], feed_dict)
predictions.extend(pred.ravel())
steps += 1
if verbose:
print("It took", time.time() - t, "seconds to make the predictions.")
answer = []
for idx, value in enumerate(predictions):
index.append(idx + 1)
answer.append(value)
predictions = pd.DataFrame(answer, index=index)
print("Predictions:\n Index, Label\n", predictions)
return predictions
if submission_state or test_state:
mnist = get_pred_data(file='test.csv') #chama a funçao get_data
if(verbose == True):
#printa o formato da matriz de rotulos para conferencia
print('labels shape: ({0} samples)'.format(mnist['n_samples']))
#printa o formato da matriz de imagens para conferencia
print('images shape: ({0} samples, {1[1]} x {1[2]} x {1[3]} pixels)'.format(mnist['n_samples'], mnist['X'].shape))
t = time.time()
g = model()
print("Making Predictions")
predictions = predict_logits(mnist, g, batch_size, verbose)
print("It took {0} seconds to predict logits".format(time.time() - t,))
print("Saving predictions to csv file.")
predictions.to_csv(submit_file, index_label=['ImageId'], header=['Label'])
print("File saved.")
reset_graph()
Was passiert ist, das ist, es hängt nach:
Ich kann in der Tensorboard sehen, dass meine Warteschlangen nicht gefüllt werden, aber ich kann warum nicht finden.
Jede Hilfe wird geschätzt werden !!!
ja .... die Muss die schlechteste Frage aller Zeiten sein .... danke für den Hinweis. Ich habe es richtig, aber jetzt bekomme ich den folgenden Fehler: TypeError: 'FIFOQueue' Objekt ist nicht iterierbar Was m ich bekomme falsch? –
Ich glaube, Sie wollen 'X, Y = tf.train.batch ([features, labels], ...)' anstatt ein 'tf.train.slice_input_producer()' zu erzeugen. – mrry
Ich habe versucht, was Sie gesagt haben, aber bekam: OutOfRangeError (siehe oben für Traceback): FIFOQueue '_1_batch/fifo_queue' geschlossen ist und nicht genügend Elemente (angefragt 128, aktuelle Größe 0) \t [[Knoten: Batch = QueueDequeueMany [_class = ["loc: @Batch/fifo_queue"], Komponententyp = [DT_FLOAT, DT_FLOAT], timeout_ms = -1, _device = "/ job: localhost/replik: 0/task: 0/cpu: 0"] (batch/fifo_queue , Batch/n)]]] –