Ich versuche CIFAR-100 Training mit Python Tensorflow CNN, aber der Fehler CUDA_ERROR_OUT_OF_MEMORY
, CUDA_STATUS_NOT_INITIALIZED
und CUDA_STATUS_BAD_PARAM
keep stört mich zu tun, ich bin mit Anaconda virtuelle Umgebung, die Tensorflow hängt davon ab, meine Maschine, Python Version Anaconda Python 3.5 virtuelle Umgebung ist, Tensorflow Version 1.1.0 ist, hier ist mein Code: tf_cifar_learning.py:Tensorflow GPU-Version Fehler CNN CuDNN auf Windows 10
# Set working directory
import os
dir_model = "c:/tf_model_cifar100"
# Modules needed
import numpy as np
import tensorflow as tf
import pandas as pd
from mlxtend.preprocessing import one_hot
# Load CIFAR Data
from batch import next_batch
from read import unpickle
import time
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.995)
# Prepare test data
testdata = unpickle('test')[b'data']
testdata1 = testdata.astype('float')
del testdata
testdata = testdata1[0:5000, :]
testlabel = unpickle('test')[b'coarse_labels'][0:5000]
testlabel = one_hot(testlabel, 100)
for i in range(testdata.shape[0]):
for j in range(3072):
testdata[i][j] = float(testdata[i][j])/255.0
if(i % 1000 == 0):
print("%d of 5000 test datasets processed" % i)
# Parameters
learning_rate = 0.001
training_iters = 1000000
batch_size = 10 # 128
display_step = 2
# Network Parameters
n_input = 1024*3 # CIFAR data input (img shape: 32*32)
n_classes = 100 # CIFAR total classes
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
o = tf.nn.relu(x)
return o
def maxpool2d(x, k=2):
# MaxPool2D wrapper
o = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')
return o
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 32, 32, 3])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 8*8*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([8*8*64, 1024])),
# 1024 inputs, 100 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
sess.run(init)
step = 1
# Time measuring
t1 = time.time()
# Keep training until reach max iterations
while step * batch_size < training_iters:
# Prepare training batch
batch_x, batch_y = next_batch(batch_size)
batch_x1 = np.zeros([batch_size, 3072], dtype="float32")
for i in range(batch_size):
for j in range(3072):
batch_x1[i][j] = batch_x[i][j]/255.0
#if(i % 200 == 0):
#print("%d of %d training batch images processed" % (i, batch_size))
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x1, y: batch_y, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x1, y: batch_y, keep_prob: 1.})
# Calculate accuracy for all test samples
acc = accuracy.eval({x: testdata, y: testlabel, keep_prob: 1.})
# Time measuring
t2 = time.time()
tmp = t2-t1
sec = tmp % 60
m = int(tmp/60)
print("Iter# %8d"%(step*batch_size) + \
", Minibatch Loss= %16.10f"%(loss) + \
", Testing Accuracy= %8.6f"%(acc) + \
", Training currently elapsed " + \
"{:d} mins {:f} secs".format(m, sec))
step += 1
print("Optimization Finished!")
# Save the model after learning
model_saver = tf.train.Saver()
model_saver.save(sess, dir_model + "/CIFAR-100_cnn_model.chkp")
batch.py:
def next_batch(batch_size, onehot=True):
class a:
try:
temp = current_batch
except NameError:
current_batch = 0
import numpy as np
from read import unpickle
import tensorflow as tf
#from mlxtend.preprocessing import one_hot
dict_data = unpickle('train')
label = np.array(dict_data[b'fine_labels'][a.current_batch:a.current_batch+batch_size])
a1 = dict_data[b'data']
a2 = a1[a.current_batch:a.current_batch+batch_size, :]
a.current_batch += batch_size
a2 = np.reshape(a2, (batch_size, 3072))
with tf.device('/cpu:0'):
if(onehot==True):
label = tf.Session().run(tf.one_hot(label, 100))
return a2,label
read.py:
def unpickle(file):
import pickle
with open(file, 'rb') as a:
dict = pickle.load(a, encoding='bytes')
return dict
Windows-CMD python tf_cifar_learning.py
Ausgang:
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>python tf_cifar_learning.py
0 of 5000 test datasets processed
1000 of 5000 test datasets processed
2000 of 5000 test datasets processed
3000 of 5000 test datasets processed
4000 of 5000 test datasets processed
2017-05-02 17:48:46.635855: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.635975: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.637256: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638434: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.638939: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.639456: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641753: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.641909: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
2017-05-02 17:48:46.994154: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:887] Found device 0 with properties:
name: GeForce GT 730
major: 3 minor: 5 memoryClockRate (GHz) 0.9015
pciBusID 0000:01:00.0
Total memory: 2.00GiB
Free memory: 1.66GiB
2017-05-02 17:48:46.994318: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:908] DMA: 0
2017-05-02 17:48:46.997080: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:918] 0: Y
2017-05-02 17:48:46.997985: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:46.999359: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.99G (2136745984 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:46.999434: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.79G (1923071488 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY
2017-05-02 17:48:47.766766: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0)
2017-05-02 17:48:48.334298: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:359] could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2017-05-02 17:48:48.334466: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:366] error retrieving driver version: Unimplemented: kernel reported driver version not implemented on Windows
2017-05-02 17:48:48.343454: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:326] could not destroy cudnn handle: CUDNN_STATUS_BAD_PARAM
2017-05-02 17:48:48.343558: F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:659] Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms)
(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>
als 10 Windows-sagt, dass Python funktioniert nicht mehr und sofort getötet es kann mir bitte jemand sagen, was das Problem ist mir und sagen (oder vielleicht gib mir ein Beispiel), wie man es repariert?
Schreiben Sie die Ausgabe von 'nvidia-smi' zu setzen –
Was ist nvidia-smi? – Cro
Kommandozeilen-Tool, das mit den nvidia-Treibern installiert wird. Es informiert Sie über die GPU, welche Prozesse laufen, Speicherzuweisung, Auslastung, Treiberversionen, etc. Ich lasse es in einer Schleife laufen, wenn ich an den meisten Tagen mit Tensorflow arbeite, es ist ein ziemlich grundlegendes Puzzleteil beim Arbeiten auf Gpus. Google es, Sie werden viele Informationen finden. –