diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 3f4110e4a9de796140af9703559937338d27f251..2849ee7c8d0404432fcf6156552f40657d094983 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -29,3 +29,4 @@ py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) +py_test(mnist SRCS mnist.py) diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..9a0b109850e92c66e69f74c5cd0853a09b5551a1 --- /dev/null +++ b/python/paddle/v2/framework/tests/mnist.py @@ -0,0 +1,249 @@ +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy +import paddle.v2 as paddle + +BATCH_SIZE = 100 + +scope = core.Scope() +place = core.CPUPlace() +# if you want to test GPU training, you can use gpu place +# place = core.GPUPlace(0) +dev_ctx = core.DeviceContext.create(place) + +init_net = core.Net.create() +forward_net = core.Net.create() +backward_net = None +optimize_net = core.Net.create() + + +def atomic_id(): + id = 0 + while True: + yield id + id += 1 + + +uniq_id = atomic_id().next + + +def data_layer(name, dims): + var = scope.new_var(name) + tensor = var.get_tensor() + tensor.set_dims(dims) # 1 is batch size holder. + return name + + +def feed_data(name, data): + assert isinstance(data, numpy.ndarray) + tensor = scope.find_var(name).get_tensor() + tensor.set_dims(data.shape) + if data.dtype == numpy.dtype('int32'): + tensor.alloc_int(place) + elif data.dtype == numpy.dtype('float32'): + tensor.alloc_float(place) + else: + raise ValueError("data type not supported") + tensor.set(data, place) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + +def sgd_optimizer(net, param_name, learning_rate=0.005): + grad_name = grad_var_name(param_name) + optimize_op = Operator( + "sgd", + param=param_name, + grad=grad_name, + param_out=param_name, + learning_rate=learning_rate) + net.append_op(optimize_op) + + +# should use operator and add these to the init_network +def init_param(net, param_name, dims): + scope.new_var(param_name) + op = Operator( + "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) + op.infer_shape(scope) + net.append_op(op) + + +# fc_layer +def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): + """ + Add a fc layer to net + + :param input: input variable name. + :type input: str + :param size: fully connected layer size. + :param act: activation name + :param param: parameter attribute, used for initialize parameters. + :param bias: bias attribute. False will not have a bias. + :param name: the name of fc layer. If not set, model will generate a + readable name + :return: output variable name. + """ + if name is None: + name = 'fc_%d' % uniq_id() + if not isinstance(name, str): + raise ValueError("name should be string") + + input_dims = scope.find_var(input).get_tensor().get_dims() + + w_name = param or name + ".w" + init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size]) + sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) + + pre_activation = name + ".mul.out" + scope.new_var(pre_activation) + mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) + net.append_op(mul_op) + + # create bias variable if needed + if bias: + bias_name = name + ".b" + init_param(net=init_net, param_name=bias_name, dims=[size]) + sgd_optimizer( + net=optimize_net, param_name=bias_name, learning_rate=0.001) + bias_out = name + ".rowwise_add.out" + scope.new_var(bias_out) + rowwise_append_op = Operator( + "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) + net.append_op(rowwise_append_op) + pre_activation = bias_out + + activation_op = Operator(act, X=pre_activation, Y=name) + net.append_op(activation_op) + scope.new_var(name) + net.infer_shape(scope) + return name + + +def cross_entropy_layer(net, input, label): + cost_name = 'cross_entropy_%d' % uniq_id() + cross_entropy_op = Operator( + "onehot_cross_entropy", X=input, label=label, Y=cost_name) + net.append_op(cross_entropy_op) + scope.new_var(cost_name) + net.infer_shape(scope) + return cost_name + + +def create_backward_net(forward_net): + net = core.Operator.backward(forward_net, set()) + for input in net.inputs()["all"]: + var = scope.new_var(input) + var.get_tensor() + for output in net.outputs()["all"]: + var = scope.new_var(output) + var.get_tensor() + return net + + +def debug_print_op(op): + print("===============" + op.type() + "==============") + print("***inputs:***") + for input in op.inputs()["all"]: + print input, scope.find_var(input).get_tensor().get_dims() + print("\n***outputs:***") + for output in op.outputs()["all"]: + print output, scope.find_var(output).get_tensor().get_dims() + print("") + print("") + + +def set_cost(cost): + cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape + cost_grad = \ + scope.find_var(grad_var_name(cost)).get_tensor() + cost_grad.set_dims(cost_shape) + cost_grad.alloc_float(place) + cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) + + +def get_cost_mean(cost): + cost_data = numpy.array(scope.find_var(cost).get_tensor()) + return cost_data.sum() / len(cost_data) + + +def error_rate(predict, label): + predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax( + axis=1) + label = numpy.array(scope.find_var(label).get_tensor()) + error_num = numpy.sum(predict_var != label) + return error_num / float(len(label)) + + +images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) +labels = data_layer(name='label', dims=[BATCH_SIZE]) +fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") +fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") +predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax") +cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) + +init_net.complete_add_op(True) +forward_net.complete_add_op(True) +backward_net = create_backward_net(forward_net) +optimize_net.complete_add_op(True) + +print(init_net) +print(forward_net) +print(backward_net) +print(optimize_net) + +debug_print_op(forward_net) +debug_print_op(backward_net) +debug_print_op(optimize_net) + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=BATCH_SIZE) + + +def test(cost_name): + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) + cost = [] + error = [] + for data in test_reader(): + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) + + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) + cost.append(get_cost_mean(cost_name)) + error.append(error_rate(predict, "label")) + print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( + sum(error) / float(len(error)))) + + +PASS_NUM = 1 + +init_net.run(scope, dev_ctx) +for pass_id in range(PASS_NUM): + batch_id = 0 + + for data in train_reader(): + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) + + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) + set_cost(cost) + backward_net.infer_shape(scope) + backward_net.run(scope, dev_ctx) + + optimize_net.run(scope, dev_ctx) + if batch_id % 100 == 0: + print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") + test(cost) + + batch_id = batch_id + 1