Ich habe es mit dem Street View House Number Erkennungsproblem zu tun. Ich versuche ein CNN mit Keras zu trainieren.keras CNN mit niedrigen und konstanten Genauigkeiten
Hier ist, wie ich die Eingabe vorbereitet
from PIL import Image
from PIL import ImageFilter
train_folders = 'sv_train/train'
test_folders = 'test'
extra_folders = 'extra'
SV_IMG_SIZE = 28
SV_CHANNELS = 1
train_imsize = np.ndarray([len(train_data),2])
k = 500
sv_images = []
max_images = 20000#len(train_data)
max_digits = 5
sv_labels = np.ones([max_images, max_digits], dtype=int) * 10 # init to 10 cause it would be no digit
nboxes = [[] for i in range(max_images)]
print ("%d to load" % len(train_data))
def getBBox(i,perc):
'''
Given i, the desired i.png, returns
x_min, y_min, x_max, y_max,
the four numbers which define the small rectangular bounding
box that contains all individual character bounding boxes
'''
boxes = train_data[i]['boxes']
x_min=9990
y_min=9990
x_max=0
y_max=0
for bid,b in enumerate(boxes):
x_min = b['left'] if b['left'] <= x_min else x_min
y_min = b['top'] if b['top'] <= y_min else y_min
x_max = b['left']+b['width'] if b['left']+b['width'] >= x_max else x_max
y_max = b['top']+b['height'] if b['top']+b['height'] >= y_max else y_max
dy = y_max-y_min
dx = x_max-x_min
dpy = dy*perc
dpx = dx*perc
nboxes[i]=[dpx,dpy,dx,dy]
return x_min-dpx, y_min-dpy, x_max+dpx, y_max+dpy
for i in range(max_images):
print (" \r%d" % i ,end="")
filename = train_data[i]['filename']
fullname = os.path.join(train_folders, filename)
boxes = train_data[i]['boxes']
label = [10,10,10,10,10]
lb = len(boxes)
if lb <= max_digits:
im = Image.open(fullname)
x_min, y_min, x_max, y_max = getBBox(i,0.3)
im = im.crop([x_min,y_min,x_max,y_max])
owidth, oheight = im.size
wr = SV_IMG_SIZE/float(owidth)
hr = SV_IMG_SIZE/float(oheight)
for bid,box in enumerate(boxes):
sv_labels[i][max_digits-lb+bid] = int(box['label'])
box = nboxes[i]
box[0]*=wr
box[1]*=wr
box[2]*=hr
box[3]*=hr
im = im.resize((SV_IMG_SIZE,SV_IMG_SIZE),Image.ANTIALIAS)
img = img - np.mean(img)
im = im.filter(ImageFilter.EDGE_ENHANCE)
img = img - np.mean(img)
array = np.asarray(im)
array = array.reshape((SV_IMG_SIZE,SV_IMG_SIZE,3)).astype(np.float32)
na = np.zeros([SV_IMG_SIZE,SV_IMG_SIZE],dtype=int)
for x in range (array.shape[0]):
for y in range (array.shape[1]):
na[x][y]=np.average(array[x][y][:])
na = na.reshape(SV_IMG_SIZE,SV_IMG_SIZE,1)
#print(array.shape)
sv_images.append(na.astype(np.float32))
sv_train, sv_validation, svt_labels, svv_labels = train_test_split(sv_images, sv_labels, test_size=0.33, random_state=42)
Und hier ist, wie ich erstellt und trainiert das Modell
model = Sequential()
x = Input((28, 28,1))
y = Convolution2D(16, 3, 3, border_mode="same")(x)
#y = MaxPooling2D(pool_size = (2, 2), strides = (2, 2)) (y)
#y = Dropout(0.25)(y)
y = Convolution2D(32, 4, 4, border_mode="same")(y)
y = MaxPooling2D(pool_size = (3, 3)) (y)
#y = Dropout(0.25)(y)
y = Convolution2D(64, 5, 5, border_mode="same", activation="relu")(y)
y = MaxPooling2D((2, 2))(y)
#y = Dropout(0.25)(y)
y = Convolution2D(128, 5, 5, border_mode="same", activation="relu")(y)
y = MaxPooling2D((2, 2))(y)
#y = Dropout(0.25)(y)
y = Flatten()(y)
y = Dense(1024, activation="relu")(y)
digit1 = Dense(11, activation="softmax")(y)
digit2 = Dense(11, activation="softmax")(y)
digit3 = Dense(11, activation="softmax")(y)
digit4 = Dense(11, activation="softmax")(y)
digit5 = Dense(11, activation="softmax")(y)
model = Model(input=x, output=[digit1, digit2, digit3,digit4,digit5])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
print(model.layers[0].output_shape)
print(model.layers[2].output_shape)
print(model.layers[4].output_shape)
print(model.layers[6].output_shape)
print(model.layers[8].output_shape)
sv_train_labels = [svt_labels[:,0],svt_labels[:,1],svt_labels[:,2],svt_labels[:,3],svt_labels[:,4]]
sv_validation_labels = [svv_labels[:,0],svv_labels[:,1],svv_labels[:,2],svv_labels[:,3],svv_labels[:,4]]
model.fit(sv_train, sv_train_labels, nb_epoch=10, batch_size=64,validation_data=(sv_validation, sv_validation_labels))
Das Problem ist, ich sehr geringen Genauigkeiten erhalten, die mit dem gleichen Wert bleibt bei jede Epoche
Zug auf 13.400 Proben, validieren auf 6600 Proben Epoch 1/10 13400/13400 [====================== ========] - 78s - Verlust: 34.7407 - dichter_740_verlust: 0.1161 - dichter_741_verlust: 0.6879 - dichter_742_verlust: 4.7988 - dichter_743_verlust: 14.7893 - dichter_744_verlust: 14.3486 - dichter_740_acc: 0.9902 - dichter_741_acc: 0.9542 0,0810 - dense_744_acc: 0,1055 - val_loss: 34,7760 - val_dense_740_loss: 0,0049 - val_dense_741_loss: 0,7131 - val_dense_742_loss: 4,8721 - val_dense_743_loss: 14,8091 - val_dense_744_loss: 14,3769 - val_dense_740_acc: 0,9997 - val_dense_741_acc: 0,9558 - val_dense_742_acc: 0,6977 - val_dense_743_acc: 0,0812 - val_dense_744_acc: 0,1080 Epoch 2/10 13400/13400 [=============================] - 70s - Verlust: 34.7032 - dichter_740_verlust: 0.0036 - dichte_741_verlust: 0.6760 - dichte_742_verlust: 4.7861 - dichte_743_verlust: 14.8118 - dichte_744_verlust: 14.4257 - dichte_740_acc: 0.9998 - dichte_741_acc: 0.9581 - dichte_742_acc: 0.7031 - dichte_743_acc: 0.0810 - dichte_744_acc: 0.1050 - val_verlust: 34.7760 - val _dense_740_loss: 0,0049 - val_dense_741_loss: 0,7131 - val_dense_742_loss: 4,8721 - val_dense_743_loss: 14,8091 - val_dense_744_loss: 14,3769 - val_dense_740_acc: 0,9997 - val_dense_741_acc: 0,9558 - val_dense_742_acc: 0,6977 - val_dense_743_acc: 0,0812 - val_dense_744_acc: 0,1080