Ich wiederverwendet TensorFlow-Code für multivariable lineare Regression und versucht, die Kosten zu reduzieren, aber das Problem ist, dass nach einigen Iterationen die Kosten sowie die Werte von W und b Inf und schnell Nan wird. Kann mir bitte jemand sagen, wo das Problem liegt. Ich habe ungefähr 100.000 Werte. Ich habe es auf 10.000 Werte zum Testen getrimmt. Der Datensatz ist hereMultivariable lineare Regression mit Tensorflow
Hier ist der Code
import numpy as np
import tensorflow as tf
def computeX():
all_xs = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols=range(4,260)) #reads the columns except first one
timestamps = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols=(0),dtype =str)
symbols = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols=(1),dtype =float)
categories = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols=(2),dtype =str)
tempList = []
BOW = {"M1": 1.0, "M5": 2.0, "M15": 3.0, "M30": 4.0, "H1": 5.0, "H4": 6.0, "D1": 7.0}
#explode dates and make them features.. 2016/11/1 01:54 becomes [2016, 11, 1, 01, 54]
for i, v in enumerate(timestamps):
splitted = v.split()
dateVal = splitted[0]
timeVal = splitted[1]
ar = dateVal.split("/")
splittedTime = timeVal.split(":")
ar = ar + splittedTime
Features = np.asarray(ar)
Features = Features.astype(float)
# append symbols
Features = np.append(Features,symbols[i])
#append categories from BOW
Features = np.append(Features, BOW[categories[i]])
row = np.append(Features,all_xs[i])
row = row.tolist()
tempList.append(row)
all_xs = np.array(tempList)
del tempList[:]
return all_xs
if __name__ == "__main__":
print ("Starting....")
learn_rate = 0.5
all_ys = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols=3)
#reads only first column
all_xs = computeX()
datapoint_size= int(all_xs.shape[0])
print(datapoint_size)
x = tf.placeholder(tf.float32, [None, 263], name="x")
W = tf.Variable(tf.ones([263,1]), name="W")
b = tf.Variable(tf.ones([1]), name="b")
product = tf.matmul(x,W)
y = product + b
y_ = tf.placeholder(tf.float32, [datapoint_size])
cost = tf.reduce_mean(tf.square(y_-y))/ (2*datapoint_size)
train_step = tf.train.GradientDescentOptimizer(learn_rate).minimize(cost)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
batch_size = 10000
steps =10
for i in range(steps):
print("Entering Loop")
if datapoint_size == batch_size:
batch_start_idx = 0
elif datapoint_size < batch_size:
raise ValueError("datapoint_size: %d, must be greater than batch_size: %d" % (datapoint_size, batch_size))
else:
batch_start_idx = (i * batch_size) % (datapoint_size - batch_size)
batch_end_idx = batch_start_idx + batch_size
batch_xs = all_xs[batch_start_idx:batch_end_idx]
batch_ys = all_ys[batch_start_idx:batch_end_idx]
xs = np.array(batch_xs)
ys = np.array(batch_ys)
feed = { x: xs, y_: ys }
sess.run(train_step, feed_dict=feed)
print("W: %s" % sess.run(W))
print("b: %f" % sess.run(b))
print("cost: %f" % sess.run(cost, feed_dict=feed))