python - tensorflow mnist example accuracy does not increase -


i'm following this tutorial learn tensorflow , tensorboard. below code. accuracy stuck around random. couldn't find out wrong.

can point out bug is? know how 1 should debug in tensorflow. thanks.


import

from tensorflow.examples.tutorials.mnist import input_data   mnist = input_data.read_data_sets('mnist_data', one_hot=true)   import tensorflow tf 

define conv layer

def conv_layer(input, size_in, size_out, name="conv"):       tf.name_scope(name):           w = tf.variable(tf.truncated_normal([5, 5, size_in, size_out], stddev=0.1), name="w")         b = tf.variable(tf.constant(0.1, shape=[size_out]), name="b")         conv = tf.nn.conv2d(input, w, strides=[1,1,1,1], padding="same")         act = tf.nn.relu(conv + b)         tf.summary.histogram("weights", w)         tf.summary.histogram("biases", b)         tf.summary.histogram("activations", act)         return tf.nn.max_pool(act, ksize=[1,2,2,1], strides=[1,2,2,1], padding="same") 

define fc layer

def fc_layer(input, size_in, size_out, name="fc"):     tf.name_scope(name):         w = tf.variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="w")         b = tf.variable(tf.constant(0.1, shape=[size_out]), name="b")         act = tf.nn.relu(tf.matmul(input, w) + b)         tf.summary.histogram("weights", w)         tf.summary.histogram("biases", b)         tf.summary.histogram("activations", act)         return act 

define model

def mnist_model(learning_rate, path):     tf.reset_default_graph()     sess = tf.session()      x = tf.placeholder(tf.float32, shape=[none, 784], name="x")     x_image = tf.reshape(x, [-1, 28, 28, 1])     tf.summary.image('input', x_image, 3)     y = tf.placeholder(tf.float32, shape=[none, 10], name="labels")      conv1 = conv_layer(x_image, 1, 32, "conv1")     conv_out = conv_layer(conv1, 32, 64, "conv2")      flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64])      fc1 = fc_layer(flattened, 7 * 7 * 64, 1024, "fc1")     logits = fc_layer(fc1, 1024, 10, "fc2")      tf.name_scope("xent"):         xent = tf.reduce_mean(             tf.nn.softmax_cross_entropy_with_logits(                 logits=logits, labels=y), name="xent")         tf.summary.scalar("xent", xent)      tf.name_scope("train"):         train_step = tf.train.adamoptimizer(learning_rate).minimize(xent)      tf.name_scope("accuracy"):         correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))         accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))         tf.summary.scalar("accuracy", accuracy)      summ = tf.summary.merge_all()      sess.run(tf.global_variables_initializer())     writer = tf.summary.filewriter(path)     writer.add_graph(sess.graph)       in range(2000):         batch = mnist.train.next_batch(100)         if % 50 == 0:             [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: batch[0], y: batch[1]})             print train_accuracy             writer.add_summary(s, i)         sess.run(train_step, feed_dict={x: batch[0], y: batch[1]}) 

run

mnist_model(1e-3, path = "/tmp/mnist_demo/10") 

output

0.09 0.08 0.04 0.07 0.12 0.12 0.09 0.12 0.08 0.1 0.11 0.14 0.11 0.11 0.13 0.11 0.19 0.06 

the problem apply relu activation on last layer, logits thresholded @ zero.

solution:

change

def fc_layer(input, size_in, size_out, name="fc"):     tf.name_scope(name):         w = tf.variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="w")         b = tf.variable(tf.constant(0.1, shape=[size_out]), name="b")         act = tf.nn.relu(tf.matmul(input, w) + b)         tf.summary.histogram("weights", w)         tf.summary.histogram("biases", b)         tf.summary.histogram("activations", act)         return act 

to

def fc_layer(input, size_in, size_out, name="fc", activation=tf.nn.relu):     tf.name_scope(name):         w = tf.variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="w")         b = tf.variable(tf.constant(0.1, shape=[size_out]), name="b")         act = tf.matmul(input, w) + b         if activation not none:             act = activation(act)         tf.summary.histogram("weights", w)         tf.summary.histogram("biases", b)         tf.summary.histogram("activations", act)         return act 

and pass none activation in last fully-connected layer:

logits = fc_layer(fc1, 1024, 10, "fc2", activation=none) 

Comments