2017-05-17 5 views
1

Ich folgte Tensorflow Tutorial für weit & Deep Learning, und ich nahm den gleichen Code, nur um es auf meine Daten zu versuchen. Also änderte ich nur die Spalten im Grunde, und ich bekam Thesen Fehler:TensorFlow Wide & Deep lernen: Shape (0,) muss Rang 2 haben

  • Valueerror: Formen (0,) und muß das gleiche hat Rang
  • Valueerror (?,?): Formen (0,) und (? ?,) sind nicht kompatibel
  • Valueerror: Form (0, muss) haben Rang 2

hier ist der Code:

from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 

import argparse 
import sys 
import tempfile 

from six.moves import urllib 

import pandas as pd 
import tensorflow as tf 


COLUMNS = ["idPCE", "typeObj", "heure", "typeG", "pas", 
      "qualite", "valeur"] 
LABEL_COLUMN = "label" 
CATEGORICAL_COLUMNS = ["idPCE", "typeObj", "typeG", "pas", "qualite"] 
CONTINUOUS_COLUMNS = ["heure"] 


def maybe_download(train_data, test_data): 
    """Maybe downloads training data and returns train and test file names.""" 
    if train_data: 
    train_file_name = train_data 
    else: 
    train_file = tempfile.NamedTemporaryFile(delete=False) 
    urllib.request.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.data", train_file.name) # pylint: disable=line-too-long 
    train_file_name = train_file.name 
    train_file.close() 
    print("Training data is downloaded to %s" % train_file_name) 

    if test_data: 
    test_file_name = test_data 
    else: 
    test_file = tempfile.NamedTemporaryFile(delete=False) 
    urllib.request.urlretrieve("http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.test", test_file.name) # pylint: disable=line-too-long 
    test_file_name = test_file.name 
    test_file.close() 
    print("Test data is downloaded to %s" % test_file_name) 

    return train_file_name, test_file_name 


def build_estimator(model_dir, model_type): 
    """Build an estimator.""" 
    # Sparse base columns. 
    idPCE = tf.contrib.layers.sparse_column_with_hash_bucket("idPCE", hash_bucket_size=1000) 

    typeG = tf.contrib.layers.sparse_column_with_keys(column_name="typeG", 
                keys=["DENMOY","ENETER","ETHMOY","METMOY","PCSMOY","PREMOY","TEMMOY","VOLBAL","VOLBCP","VOLBCR","VOLCAL","VOLCCU","VOLTER"]) 
    pas = tf.contrib.layers.sparse_column_with_keys(column_name="pas", 
                keys=["H","J"]) 
    qualite = tf.contrib.layers.sparse_column_with_keys(column_name="qualite", 
                keys=["A","AA","AD","AF","CS","M"]) 
    # Continuous base columns. 
    heure = tf.contrib.layers.real_valued_column("heure") 



    # Transformations. 
    heure_buckets = tf.contrib.layers.bucketized_column(heure, 
                boundaries=[ 
                 6, 12, 18 
                ]) 

    # Wide columns and deep columns. 
    wide_columns = [idPCE, typeG, pas, 
        qualite, heure_buckets, 
        tf.contrib.layers.crossed_column([typeG, qualite], 
                hash_bucket_size=int(1e4)), 
        tf.contrib.layers.crossed_column(
         [heure_buckets, idPCE, pas], 
         hash_bucket_size=int(1e6)), 
        tf.contrib.layers.crossed_column([heure_buckets, qualite], 
                hash_bucket_size=int(1e4))] 

    deep_columns = [ 
     tf.contrib.layers.embedding_column(qualite, dimension=3), 
     tf.contrib.layers.embedding_column(pas, dimension=1), 
     tf.contrib.layers.embedding_column(typeG, dimension=4), 
     tf.contrib.layers.embedding_column(idPCE, 
             dimension=9), 
     heure 
    ] 

    if model_type == "wide": 
    m = tf.contrib.learn.LinearClassifier(model_dir=model_dir, 
              feature_columns=wide_columns) 
    elif model_type == "deep": 
    m = tf.contrib.learn.DNNClassifier(model_dir=model_dir, 
             feature_columns=deep_columns, 
             hidden_units=[100, 50]) 
    else: 
    m = tf.contrib.learn.DNNLinearCombinedClassifier(
     model_dir=model_dir, 
     linear_feature_columns=wide_columns, 
     dnn_feature_columns=deep_columns, 
     dnn_hidden_units=[100, 50], 
     fix_global_step_increment_bug=True) 
    return m 


def input_fn(df): 
    """Input builder function.""" 
    # Creates a dictionary mapping from each continuous feature column name (k) to 
    # the values of that column stored in a constant Tensor. 
    continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS} 
    # Creates a dictionary mapping from each categorical feature column name (k) 
    # to the values of that column stored in a tf.SparseTensor. 
    categorical_cols = { 
     k: tf.SparseTensor(
      indices=[[i, 0] for i in range(df[k].size)], 
      values=df[k].values, 
      dense_shape=[df[k].size, 1]) 
     for k in CATEGORICAL_COLUMNS} 
    # Merges the two dictionaries into one. 
    feature_cols = dict(continuous_cols) 
    feature_cols.update(categorical_cols) 
    # Converts the label column into a constant Tensor. 
    label = tf.constant(df[LABEL_COLUMN].values) 
    # Returns the feature columns and the label. 
    return feature_cols, label 


def train_and_eval(model_dir, model_type, train_steps, train_data, test_data): 
    """Train and evaluate the model.""" 
    train_file_name, test_file_name = maybe_download(train_data, test_data) 
    df_train = pd.read_csv(
     tf.gfile.Open(train_file_name), 
     names=COLUMNS, 
     skipinitialspace=True, 
     engine="python") 
    df_test = pd.read_csv(
     tf.gfile.Open(test_file_name), 
     names=COLUMNS, 
     skipinitialspace=True, 
     skiprows=1, 
     engine="python") 

    # remove NaN elements 
    df_train = df_train.dropna(how='any', axis=0) 
    df_test = df_test.dropna(how='any', axis=0) 

    df_train[LABEL_COLUMN] = (
     df_train["valeur"].apply(lambda x: x in x)).astype(float) 
    df_test[LABEL_COLUMN] = (
     df_test["valeur"].apply(lambda x: x in x)).astype(float) 

    model_dir = tempfile.mkdtemp() if not model_dir else model_dir 
    print("model directory = %s" % model_dir) 

    m = build_estimator(model_dir, model_type) 
    m.fit(input_fn=lambda: input_fn(df_train), steps=train_steps) 
    results = m.evaluate(input_fn=lambda: input_fn(df_test), steps=1) 
    for key in sorted(results): 
    print("%s: %s" % (key, results[key])) 


FLAGS = None 


def main(_): 
    train_and_eval(FLAGS.model_dir, FLAGS.model_type, FLAGS.train_steps, 
       FLAGS.train_data, FLAGS.test_data) 


if __name__ == "__main__": 
    parser = argparse.ArgumentParser() 
    parser.register("type", "bool", lambda v: v.lower() == "true") 
    parser.add_argument(
     "--model_dir", 
     type=str, 
     default="", 
     help="Base directory for output models." 
) 
    parser.add_argument(
     "--model_type", 
     type=str, 
     default="wide_n_deep", 
     help="Valid model types: {'wide', 'deep', 'wide_n_deep'}." 
) 
    parser.add_argument(
     "--train_steps", 
     type=int, 
     default=200, 
     help="Number of training steps." 
) 
    parser.add_argument(
     "--train_data", 
     type=str, 
     default="", 
     help="Path to the training data." 
) 
    parser.add_argument(
     "--test_data", 
     type=str, 
     default="", 
     help="Path to the test data." 
) 
    FLAGS, unparsed = parser.parse_known_args() 
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 

Antwort

0

die Standardtrenn in der Pandas Funktion read_csv() ist ',' aber meine Eingaben wurden durch ein ';' getrennt. Ich habe es behoben und es läuft perfekt, aber ich bekomme immer Genauigkeit 1.0 was unmöglich ist. Ich vermute, das Etikett Teil:

df_train[LABEL_COLUMN] = (
     df_train["valeur"].apply(lambda x: x in x)).astype(float) 
    df_test[LABEL_COLUMN] = (
     df_test["valeur"].apply(lambda x: x in x)).astype(float) 

Sollte meine Zieleingabe immer binär wie in Tensorflow Tutorials? Weil meine in zufälligen Gleitzahlen, die ich vorhersagen möchte. Danke !!

Verwandte Themen