Tensorflow GPU-Version Fehler CNN CuDNN auf Windows 10

Ich versuche CIFAR-100 Training mit Python Tensorflow CNN, aber der Fehler CUDA_ERROR_OUT_OF_MEMORY, CUDA_STATUS_NOT_INITIALIZED und CUDA_STATUS_BAD_PARAM keep stört mich zu tun, ich bin mit Anaconda virtuelle Umgebung, die Tensorflow hängt davon ab, meine Maschine, Python Version Anaconda Python 3.5 virtuelle Umgebung ist, Tensorflow Version 1.1.0 ist, hier ist mein Code: tf_cifar_learning.py:Tensorflow GPU-Version Fehler CNN CuDNN auf Windows 10

# Set working directory 

import os 
dir_model = "c:/tf_model_cifar100" 

# Modules needed 

import numpy as np 
import tensorflow as tf 
import pandas as pd 
from mlxtend.preprocessing import one_hot 


# Load CIFAR Data 
from batch import next_batch 
from read import unpickle 
import time 
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.995) 


# Prepare test data 

testdata = unpickle('test')[b'data'] 
testdata1 = testdata.astype('float') 
del testdata 
testdata = testdata1[0:5000, :] 
testlabel = unpickle('test')[b'coarse_labels'][0:5000] 
testlabel = one_hot(testlabel, 100) 
for i in range(testdata.shape[0]): 
    for j in range(3072): 
     testdata[i][j] = float(testdata[i][j])/255.0 
    if(i % 1000 == 0): 
     print("%d of 5000 test datasets processed" % i) 

# Parameters 
learning_rate = 0.001 
training_iters = 1000000 
batch_size = 10 # 128 
display_step = 2 

# Network Parameters 
n_input = 1024*3 # CIFAR data input (img shape: 32*32) 
n_classes = 100 # CIFAR total classes 
dropout = 0.75 # Dropout, probability to keep units 

# tf Graph input 
x = tf.placeholder(tf.float32, [None, n_input]) 
y = tf.placeholder(tf.float32, [None, n_classes]) 
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) 

# Create some wrappers for simplicity 
def conv2d(x, W, b, strides=1): 
    # Conv2D wrapper, with bias and relu activation 
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 
    x = tf.nn.bias_add(x, b) 
    o = tf.nn.relu(x) 
    return o 

def maxpool2d(x, k=2): 
    # MaxPool2D wrapper 
    o = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') 
    return o 

# Create model 
def conv_net(x, weights, biases, dropout): 
     # Reshape input picture 
    x = tf.reshape(x, shape=[-1, 32, 32, 3]) 
    # Convolution Layer 
    conv1 = conv2d(x, weights['wc1'], biases['bc1']) 
    # Max Pooling (down-sampling) 
    conv1 = maxpool2d(conv1, k=2) 

    # Convolution Layer 
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) 
    # Max Pooling (down-sampling) 
    conv2 = maxpool2d(conv2, k=2) 

    # Fully connected layer 
    # Reshape conv2 output to fit fully connected layer input 
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) 
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) 
    fc1 = tf.nn.relu(fc1) 
    # Apply Dropout 
    fc1 = tf.nn.dropout(fc1, dropout) 

    # Output, class prediction 
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) 
    return out 

# Store layers weight & bias 
weights = { 
    # 5x5 conv, 1 input, 32 outputs 
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 32])), 
    # 5x5 conv, 32 inputs, 64 outputs 
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 
    # fully connected, 8*8*64 inputs, 1024 outputs 
    'wd1': tf.Variable(tf.random_normal([8*8*64, 1024])), 
    # 1024 inputs, 100 outputs (class prediction) 
    'out': tf.Variable(tf.random_normal([1024, n_classes])) 
} 

biases = { 
    'bc1': tf.Variable(tf.random_normal([32])), 
    'bc2': tf.Variable(tf.random_normal([64])), 
    'bd1': tf.Variable(tf.random_normal([1024])), 
    'out': tf.Variable(tf.random_normal([n_classes])) 
} 

# Construct model 
pred = conv_net(x, weights, biases, keep_prob) 

# Define loss and optimizer 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

# Evaluate model 
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) 
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) 

# Initializing the variables 
init = tf.global_variables_initializer() 

# Launch the graph 
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 
    sess.run(init) 
    step = 1 
    # Time measuring 
    t1 = time.time() 
    # Keep training until reach max iterations 
    while step * batch_size < training_iters: 
     # Prepare training batch 
     batch_x, batch_y = next_batch(batch_size) 
     batch_x1 = np.zeros([batch_size, 3072], dtype="float32") 
     for i in range(batch_size): 
      for j in range(3072): 
       batch_x1[i][j] = batch_x[i][j]/255.0 
      #if(i % 200 == 0): 
       #print("%d of %d training batch images processed" % (i, batch_size)) 
     # Run optimization op (backprop) 
     sess.run(optimizer, feed_dict={x: batch_x1, y: batch_y, keep_prob: dropout}) 
     if step % display_step == 0: 
      # Calculate batch loss and accuracy 
      loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x1, y: batch_y, keep_prob: 1.}) 
      # Calculate accuracy for all test samples 
      acc = accuracy.eval({x: testdata, y: testlabel, keep_prob: 1.}) 
      # Time measuring 
      t2 = time.time() 
      tmp = t2-t1 
      sec = tmp % 60 
      m = int(tmp/60) 
      print("Iter# %8d"%(step*batch_size) + \ 
        ", Minibatch Loss= %16.10f"%(loss) + \ 
        ", Testing Accuracy= %8.6f"%(acc) + \ 
        ", Training currently elapsed " + \ 
        "{:d} mins {:f} secs".format(m, sec)) 
     step += 1 
    print("Optimization Finished!") 
    # Save the model after learning 
    model_saver = tf.train.Saver() 
    model_saver.save(sess, dir_model + "/CIFAR-100_cnn_model.chkp")

batch.py:

def next_batch(batch_size, onehot=True): 
    class a: 
     try: 
      temp = current_batch 
     except NameError: 
      current_batch = 0 
    import numpy as np 
    from read import unpickle 
    import tensorflow as tf 
    #from mlxtend.preprocessing import one_hot 
    dict_data = unpickle('train') 
    label = np.array(dict_data[b'fine_labels'][a.current_batch:a.current_batch+batch_size]) 
    a1 = dict_data[b'data'] 
    a2 = a1[a.current_batch:a.current_batch+batch_size, :] 
    a.current_batch += batch_size 
    a2 = np.reshape(a2, (batch_size, 3072)) 
    with tf.device('/cpu:0'): 
     if(onehot==True): 
      label = tf.Session().run(tf.one_hot(label, 100)) 
    return a2,label

read.py:

def unpickle(file): 
    import pickle 
    with open(file, 'rb') as a: 
     dict = pickle.load(a, encoding='bytes') 
     return dict

Windows-CMD python tf_cifar_learning.py Ausgang:

(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>python tf_cifar_learning.py 
0 of 5000 test datasets processed 
1000 of 5000 test datasets processed 
2000 of 5000 test datasets processed 
3000 of 5000 test datasets processed 
4000 of 5000 test datasets processed 
2017-05-02 17:48:46.635855: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The  TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.635975: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.637256: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.638434: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.638939: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.639456: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.641753: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.641909: W c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-02 17:48:46.994154: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:887] Found device 0 with properties: 
name: GeForce GT 730 
major: 3 minor: 5 memoryClockRate (GHz) 0.9015 
pciBusID 0000:01:00.0 
Total memory: 2.00GiB 
Free memory: 1.66GiB 
2017-05-02 17:48:46.994318: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:908] DMA: 0 
2017-05-02 17:48:46.997080: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:918] 0: Y 
2017-05-02 17:48:46.997985: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0) 
2017-05-02 17:48:46.999359: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.99G (2136745984 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY 
2017-05-02 17:48:46.999434: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_driver.cc:893] failed to allocate 1.79G (1923071488 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY 
2017-05-02 17:48:47.766766: I c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\common_runtime\gpu\gpu_device.cc:977] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GT 730, pci bus id: 0000:01:00.0) 
2017-05-02 17:48:48.334298: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:359] could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED 
2017-05-02 17:48:48.334466: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:366] error retrieving driver version: Unimplemented: kernel reported driver version not implemented on Windows 
2017-05-02 17:48:48.343454: E c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\stream_executor\cuda\cuda_dnn.cc:326] could not destroy cudnn handle: CUDNN_STATUS_BAD_PARAM 
2017-05-02 17:48:48.343558: F c:\tf_jenkins\home\workspace\release-win\device\gpu\os\windows\tensorflow\core\kernels\conv_ops.cc:659] Check failed: stream->parent()->GetConvolveAlgorithms(&algorithms) 

(tensorflow) C:\Users\Administrator\learn_tensorflow\cifar-100-python>

als 10 Windows-sagt, dass Python funktioniert nicht mehr und sofort getötet es kann mir bitte jemand sagen, was das Problem ist mir und sagen (oder vielleicht gib mir ein Beispiel), wie man es repariert?

Quelle

2017-05-02 Cro

Schreiben Sie die Ausgabe von 'nvidia-smi' zu setzen –

Was ist nvidia-smi? – Cro

Kommandozeilen-Tool, das mit den nvidia-Treibern installiert wird. Es informiert Sie über die GPU, welche Prozesse laufen, Speicherzuweisung, Auslastung, Treiberversionen, etc. Ich lasse es in einer Schleife laufen, wenn ich an den meisten Tagen mit Tensorflow arbeite, es ist ein ziemlich grundlegendes Puzzleteil beim Arbeiten auf Gpus. Google es, Sie werden viele Informationen finden. –

Es ist wahrscheinlich, dass das Problem etwas mit Ihrer Umgebung zu tun hat.

Sie haben nur eine GPU, wahrscheinlich verwenden Sie auch für die Anzeige. Deshalb kann TensorFlow nicht den gesamten Speicher bereitstellen, den er benötigt. Sie können, wie viel GPU-Speicher steuern, wie diese zu verwenden per_process_gpu_memory_fraction:

https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/test_util.py#L388

Über Cudnn, es scheint, dass die Cudnn Bibliothek selbst nicht initialisieren kann „CUDNN_STATUS_NOT_INITIALIZED“. Sind Sie sicher, dass Sie andere Cuda und Cudnn Proben in dieser Umgebung ausführen können?

Quelle

2017-05-05 18:23:58 zhengxq

Werfen Sie einen Blick auf tf_cifar_learning.py Zeile 18 – Cro

Neuinstallation des BIOS macht den Job, vielleicht ist die Karte von Zeit zu Zeit. – Cro

Versuche per_process_gpu_memory_fraction=0.995 zu einem kleinen Daten wie 0,7 oder 0,6

Quelle

2017-11-16 06:49:37 user8949687

Tensorflow GPU-Version Fehler CNN CuDNN auf Windows 10

Antwort

Verwandte Themen