diff --git a/demo/mnist/.gitignore b/demo/mnist/.gitignore
index 810910fd5ca56f0cfd7051f3392a9f7ea010d7f0..8bd9837523ccf98e6e72d5b82934b7b104816217 100644
--- a/demo/mnist/.gitignore
+++ b/demo/mnist/.gitignore
@@ -4,3 +4,4 @@ mnist_vgg_model
 plot.png
 train.log
 *pyc
+.ipynb_checkpoints
diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..f301da382ff8a5bc16d9c18b956f78566ed4894f
--- /dev/null
+++ b/demo/mnist/api_train.py
@@ -0,0 +1,205 @@
+"""
+A very basic example for how to use current Raw SWIG API to train mnist network.
+
+Current implementation uses Raw SWIG, which means the API call is directly \
+passed to C++ side of Paddle.
+
+The user api could be simpler and carefully designed.
+"""
+import py_paddle.swig_paddle as api
+from py_paddle import DataProviderConverter
+import paddle.trainer.PyDataProvider2 as dp
+import numpy as np
+import random
+from mnist_util import read_from_mnist
+from paddle.trainer_config_helpers import *
+
+
+def optimizer_config():
+    settings(
+        learning_rate=1e-4,
+        learning_method=AdamOptimizer(),
+        batch_size=1000,
+        model_average=ModelAverage(average_window=0.5),
+        regularization=L2Regularization(rate=0.5))
+
+
+def network_config():
+    imgs = data_layer(name='pixel', size=784)
+    hidden1 = fc_layer(input=imgs, size=200)
+    hidden2 = fc_layer(input=hidden1, size=200)
+    inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
+    cost = classification_cost(
+        input=inference, label=data_layer(
+            name='label', size=10))
+    outputs(cost)
+
+
+def init_parameter(network):
+    assert isinstance(network, api.GradientMachine)
+    for each_param in network.getParameters():
+        assert isinstance(each_param, api.Parameter)
+        array_size = len(each_param)
+        array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
+        each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
+
+
+def generator_to_batch(generator, batch_size):
+    ret_val = list()
+    for each_item in generator:
+        ret_val.append(each_item)
+        if len(ret_val) == batch_size:
+            yield ret_val
+            ret_val = list()
+    if len(ret_val) != 0:
+        yield ret_val
+
+
+class BatchPool(object):
+    def __init__(self, generator, batch_size):
+        self.data = list(generator)
+        self.batch_size = batch_size
+
+    def __call__(self):
+        random.shuffle(self.data)
+        for offset in xrange(0, len(self.data), self.batch_size):
+            limit = min(offset + self.batch_size, len(self.data))
+            yield self.data[offset:limit]
+
+
+def input_order_converter(generator):
+    for each_item in generator:
+        yield each_item['pixel'], each_item['label']
+
+
+def main():
+    api.initPaddle("-use_gpu=false", "-trainer_count=4")  # use 4 cpu cores
+
+    # get enable_types for each optimizer.
+    # enable_types = [value, gradient, momentum, etc]
+    # For each optimizer(SGD, Adam), GradientMachine should enable different
+    # buffers.
+    opt_config_proto = parse_optimizer_config(optimizer_config)
+    opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
+    _temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
+    enable_types = _temp_optimizer_.getParameterTypes()
+
+    # Create Simple Gradient Machine.
+    model_config = parse_network_config(network_config)
+    m = api.GradientMachine.createFromConfigProto(
+        model_config, api.CREATE_MODE_NORMAL, enable_types)
+
+    # This type check is not useful. Only enable type hint in IDE.
+    # Such as PyCharm
+    assert isinstance(m, api.GradientMachine)
+
+    # Initialize Parameter by numpy.
+    init_parameter(network=m)
+
+    # Create Local Updater. Local means not run in cluster.
+    # For a cluster training, here we can change to createRemoteUpdater
+    # in future.
+    updater = api.ParameterUpdater.createLocalUpdater(opt_config)
+    assert isinstance(updater, api.ParameterUpdater)
+
+    # Initialize ParameterUpdater.
+    updater.init(m)
+
+    # DataProvider Converter is a utility convert Python Object to Paddle C++
+    # Input. The input format is as same as Paddle's DataProvider.
+    converter = DataProviderConverter(
+        input_types=[dp.dense_vector(784), dp.integer_value(10)])
+
+    train_file = './data/raw_data/train'
+    test_file = './data/raw_data/t10k'
+
+    # start gradient machine.
+    # the gradient machine must be started before invoke forward/backward.
+    # not just for training, but also for inference.
+    m.start()
+
+    # evaluator can print error rate, etc. It is a C++ class.
+    batch_evaluator = m.makeEvaluator()
+    test_evaluator = m.makeEvaluator()
+
+    # Get Train Data.
+    # TrainData will stored in a data pool. Currently implementation is not care
+    # about memory, speed. Just a very naive implementation.
+    train_data_generator = input_order_converter(read_from_mnist(train_file))
+    train_data = BatchPool(train_data_generator, 512)
+
+    # outArgs is Neural Network forward result. Here is not useful, just passed
+    # to gradient_machine.forward
+    outArgs = api.Arguments.createArguments(0)
+
+    for pass_id in xrange(2):  # we train 2 passes.
+        updater.startPass()
+
+        for batch_id, data_batch in enumerate(train_data()):
+            # data_batch is input images.
+            # here, for online learning, we could get data_batch from network.
+
+            # Start update one batch.
+            pass_type = updater.startBatch(len(data_batch))
+
+            # Start BatchEvaluator.
+            # batch_evaluator can be used between start/finish.
+            batch_evaluator.start()
+
+            # forwardBackward is a shortcut for forward and backward.
+            # It is sometimes faster than invoke forward/backward separately,
+            # because in GradientMachine, it may be async.
+            m.forwardBackward(converter(data_batch), outArgs, pass_type)
+
+            for each_param in m.getParameters():
+                updater.update(each_param)
+
+            # Get cost. We use numpy to calculate total cost for this batch.
+            cost_vec = outArgs.getSlotValue(0)
+            cost_vec = cost_vec.copyToNumpyMat()
+            cost = cost_vec.sum() / len(data_batch)
+
+            # Make evaluator works.
+            m.eval(batch_evaluator)
+
+            # Print logs.
+            print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
+                cost, batch_evaluator
+
+            batch_evaluator.finish()
+            # Finish batch.
+            #  * will clear gradient.
+            #  * ensure all values should be updated.
+            updater.finishBatch(cost)
+
+        # testing stage. use test data set to test current network.
+        updater.apply()
+        test_evaluator.start()
+        test_data_generator = input_order_converter(read_from_mnist(test_file))
+        for data_batch in generator_to_batch(test_data_generator, 512):
+            # in testing stage, only forward is needed.
+            m.forward(converter(data_batch), outArgs, api.PASS_TEST)
+            m.eval(test_evaluator)
+
+        # print error rate for test data set
+        print 'Pass', pass_id, ' test evaluator: ', test_evaluator
+        test_evaluator.finish()
+        updater.restore()
+
+        updater.catchUpWith()
+        params = m.getParameters()
+        for each_param in params:
+            assert isinstance(each_param, api.Parameter)
+            value = each_param.getBuf(api.PARAMETER_VALUE)
+            value = value.copyToNumpyArray()
+
+            # Here, we could save parameter to every where you want
+            print each_param.getName(), value
+
+        updater.finishPass()
+
+    m.finish()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 4635833d36b9f21c992d96910f3ac9094ccefd2c..888cfef1e7e3e1b4f556756c003eeb23e741cabe 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -1,5 +1,5 @@
 from paddle.trainer.PyDataProvider2 import *
-import numpy
+from mnist_util import read_from_mnist
 
 
 # Define a py data provider
@@ -8,27 +8,5 @@ import numpy
                  'label': integer_value(10)},
     cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):  # settings is not used currently.
-    imgf = filename + "-images-idx3-ubyte"
-    labelf = filename + "-labels-idx1-ubyte"
-    f = open(imgf, "rb")
-    l = open(labelf, "rb")
-
-    f.read(16)
-    l.read(8)
-
-    # Define number of samples for train/test
-    if "train" in filename:
-        n = 60000
-    else:
-        n = 10000
-
-    images = numpy.fromfile(
-        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
-    images = images / 255.0 * 2.0 - 1.0
-    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
-
-    for i in xrange(n):
-        yield {"pixel": images[i, :], 'label': labels[i]}
-
-    f.close()
-    l.close()
+    for each in read_from_mnist(filename):
+        yield each
diff --git a/demo/mnist/mnist_util.py b/demo/mnist/mnist_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fd88ae7edc821296ca0accbf6dedc083e411744
--- /dev/null
+++ b/demo/mnist/mnist_util.py
@@ -0,0 +1,30 @@
+import numpy
+
+__all__ = ['read_from_mnist']
+
+
+def read_from_mnist(filename):
+    imgf = filename + "-images-idx3-ubyte"
+    labelf = filename + "-labels-idx1-ubyte"
+    f = open(imgf, "rb")
+    l = open(labelf, "rb")
+
+    f.read(16)
+    l.read(8)
+
+    # Define number of samples for train/test
+    if "train" in filename:
+        n = 60000
+    else:
+        n = 10000
+
+    images = numpy.fromfile(
+        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
+    images = images / 255.0 * 2.0 - 1.0
+    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
+
+    for i in xrange(n):
+        yield {"pixel": images[i, :], 'label': labels[i]}
+
+    f.close()
+    l.close()
diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt
index ed69bd764f30ac4047895ff539a0b70dfa5aac61..da6dad10cd807654f9ddd03beeb29cef69fc8de0 100644
--- a/paddle/api/CMakeLists.txt
+++ b/paddle/api/CMakeLists.txt
@@ -1,10 +1,12 @@
 set(API_SOURCES
     Arguments.cpp
     ConfigParser.cpp
+    Evaluator.cpp
     GradientMachine.cpp
     Matrix.cpp
     Parameter.cpp
     ParameterOptimizer.cpp
+    ParameterUpdater.cpp
     SequenceGenerator.cpp
     Trainer.cpp
     Util.cpp
@@ -63,6 +65,15 @@ install(DIRECTORY ${PROJ_ROOT}/paddle/dist/
 
 add_custom_target(python_api_wheel ALL DEPENDS
   ${PROJ_ROOT}/paddle/dist/.timestamp)
+add_dependencies(python_api_wheel python_swig_sources
+  paddle_parameter
+  paddle_math
+  paddle_utils
+  paddle_gserver
+  paddle_pserver
+  paddle_trainer
+  paddle_api
+  paddle_cuda)
 
 if(WITH_TESTING)
     add_subdirectory(test)
diff --git a/paddle/api/Evaluator.cpp b/paddle/api/Evaluator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c30e09876397e37ef9ed4ec3200d1aa372ceb609
--- /dev/null
+++ b/paddle/api/Evaluator.cpp
@@ -0,0 +1,29 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <sstream>
+#include "PaddleAPI.h"
+#include "PaddleAPIPrivate.h"
+
+Evaluator::Evaluator() : m(new EvaluatorPrivate()) {}
+Evaluator::~Evaluator() { delete m; }
+
+void Evaluator::start() { m->rawPtr->start(); }
+
+void Evaluator::finish() { m->rawPtr->finish(); }
+
+std::string Evaluator::toString() {
+  std::ostringstream sout;
+  m->rawPtr->printStats(sout);
+  return sout.str();
+}
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
index ced2293376cae51eb5ac9cd27133f13174f61e3c..66115f8293b905809639afff779abfdb2bb3a54e 100644
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@@ -64,6 +64,10 @@ GradientMachine* GradientMachine::createByModelConfig(
   return GradientMachine::createFromPaddleModelPtr(confPtr, mode, types);
 }
 
+void GradientMachine::start() { m->machine->start(); }
+
+void GradientMachine::finish() { m->machine->finish(); }
+
 void GradientMachine::onPassEnd() { m->machine->onPassEnd(); }
 
 void GradientMachine::prefetch(const Arguments& inArgs) {
@@ -166,3 +170,13 @@ SequenceGenerator* GradientMachine::asSequenceGenerator(
   r->setBeamSize(beam_size);
   return r;
 }
+
+Evaluator* GradientMachine::makeEvaluator() {
+  auto ev = new Evaluator();
+  ev->m->rawPtr = m->machine->makeEvaluator();
+  return ev;
+}
+
+void GradientMachine::eval(Evaluator* evaluator) {
+  m->machine->eval(evaluator->m->rawPtr);
+}
diff --git a/paddle/api/Paddle.swig b/paddle/api/Paddle.swig
index 9194a6371be9e00c037967464ee2b63c1e4f6192..3365927f9b59936244230bed439808fa7ead2c61 100644
--- a/paddle/api/Paddle.swig
+++ b/paddle/api/Paddle.swig
@@ -96,7 +96,9 @@ namespace std {
 %rename(__getitem__) Vector::get;
 %rename(__setitem__) Vector::set;
 %rename(__len__) Vector::getSize;
+%rename(__len__) Parameter::getSize;
 %rename(__call__) ParameterTraverseCallback::apply;
+%rename(__repr__) Evaluator::toString;
 
 %apply (float* INPLACE_ARRAY2, int DIM1, int DIM2) { 
   (float* data, int dim1, int dim2) 
@@ -167,6 +169,7 @@ namespace std {
 %newobject GradientMachine::asSequenceGenerator;
 %newobject GradientMachine::getParameter;
 %newobject GradientMachine::getLayerOutput;
+%newobject GradientMachine::makeEvaluator;
 %newobject TrainerConfig::createFromTrainerConfigFile;
 %newobject TrainerConfig::getModelConfig;
 %newobject TrainerConfig::getOptimizationConfig;
@@ -174,6 +177,7 @@ namespace std {
 %newobject Parameter::getConfig;
 %newobject ParameterOptimizer::create;
 %newobject ParameterOptimizer::needSpecialTraversal;
+%newobject ParameterUpdater::createLocalUpdater;
 
 %feature("director") UpdateCallback;
 %feature("autodoc", 1); // To generate method stub, for code hint in ide
@@ -193,4 +197,4 @@ namespace std {
 %ignore OptimizationConfigPrivate;
 %ignore ParameterTraverseCallbackPrivate;
 %include "utils/GlobalConstants.h"
-%include "api/PaddleAPI.h"
\ No newline at end of file
+%include "api/PaddleAPI.h"
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index 841942ddae54fdd87796289ec131d79e3299c367..09c891871a5ca8571216d211203fe8643fc3a63f 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -515,6 +515,7 @@ private:
 
   friend class TrainerConfig;
   friend class ParameterOptimizer;
+  friend class ParameterUpdater;
   friend class Trainer;
 };
 
@@ -545,6 +546,8 @@ public:
   ParameterConfig* getConfig();
   void setValueUpdated();
 
+  size_t getSize() const;
+
 private:
   static Parameter* createFromRawPtr(void* ptr);
   static Parameter* createFromSharedPtr(void* ptr);
@@ -553,6 +556,7 @@ private:
   ParameterPrivate* m;
   friend class UpdateCallbackWrapper;
   friend class GradientMachine;
+  friend class ParameterUpdater;
 };
 
 struct ModelConfigPrivate;
@@ -679,7 +683,7 @@ private:
 };
 
 class SequenceGenerator;
-
+class Evaluator;
 struct GradientMachinePrivate;
 class GradientMachine {
 private:
@@ -710,6 +714,13 @@ public:
       GradientMatchineCreateMode mode = CREATE_MODE_NORMAL,
       const std::vector<int>& parameterTypes = defaultParamTypes);
 
+  /**
+   * @brief finish
+   */
+  void finish();
+
+  void start();
+
   /**
    * Prefetch row ids of sparse parameter.
    */
@@ -767,6 +778,10 @@ public:
       size_t max_length = 100UL,
       size_t beam_size = -1UL);
 
+  Evaluator* makeEvaluator();
+
+  void eval(Evaluator* evaluator);
+
 private:
   GradientMachinePrivate* m;
 
@@ -778,6 +793,109 @@ private:
   // Not to use c++ 11 init-list, so we use static var as function default arg.
   static std::vector<int> defaultParamTypes;
   friend class Trainer;
+  friend class ParameterUpdater;
+};
+
+struct ParameterUpdaterPrivate;
+class ParameterUpdater {
+private:
+  ParameterUpdater();
+
+public:
+  static ParameterUpdater* createLocalUpdater(OptimizationConfig* config);
+  ~ParameterUpdater();
+
+  /**
+   * @brief initialize Parameter Updater by GradientMachine.
+   * @param gm
+   */
+  void init(const GradientMachine& gm);
+
+  /**
+   * @brief begin of a training/testing of one pass.
+   */
+  void startPass();
+
+  /**
+   * @brief end of a traning/testing of one pass.
+   */
+  void finishPass();
+
+  /**
+   * @brief begin of a training/testing of one batch.
+   * @param data batch's size
+   * @return PassType, mostly will be training.
+   */
+  PassType startBatch(size_t batchSize);
+
+  /**
+   * @brief end of a traning/testing of one batch
+   * @param cost current batch cost.
+   */
+  void finishBatch(float cost);
+
+  /**
+   * @brief update a parameter (by local optimizer or by cluster pserver)
+   * @param param
+   */
+  void update(Parameter* param);
+
+  /**
+   * @brief restore the average parameter.
+   * @note It is only used in AverageOptimizer. Restore will get the current
+   * PARAMETER_VALUE back.
+   */
+  void restore();
+
+  /**
+   * @brief apply. Store the average parameter.
+   * @note It is only used in AverageOptimizer. Apply will store the current
+   * PARAMETER_VALUE to buffer, calcaualte current Average Parameter, and save
+   * it to PARAMETER_VALUE.
+   */
+  void apply();
+
+  /**
+   * @brief catchUpWith The Regularization will be delayed in many situations(
+   * pserver, local sparse). Catch Up means catch the regularization up, apply
+   * regularization to all params.
+   */
+  void catchUpWith();
+
+private:
+  ParameterUpdaterPrivate* m;
+};
+
+struct EvaluatorPrivate;
+class Evaluator {
+private:
+  Evaluator();
+  DISABLE_COPY(Evaluator);
+
+public:
+  ~Evaluator();
+
+  /**
+   * @brief begin an evaluate stage.
+   */
+  void start();
+
+  /**
+   * @brief end an evaluate stage.
+   */
+  void finish();
+
+  /**
+   * @brief toString will get a evaluate result.
+   *
+   * __repr__ method in python
+   */
+  std::string toString();
+
+private:
+  EvaluatorPrivate* m;
+
+  friend class GradientMachine;
 };
 
 struct TrainerPrivate;
diff --git a/paddle/api/PaddleAPIPrivate.h b/paddle/api/PaddleAPIPrivate.h
index d2b56fc41c8aadb136ad6812f848e764e031073c..f41352bfec7c3333bde9509957aba8c5f373b9f2 100644
--- a/paddle/api/PaddleAPIPrivate.h
+++ b/paddle/api/PaddleAPIPrivate.h
@@ -11,12 +11,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-
+#pragma once
+#include <memory>
+#include "PaddleAPI.h"
+#include "paddle/gserver/evaluators/Evaluator.h"
 #include "paddle/gserver/gradientmachines/GradientMachine.h"
+#include "paddle/parameter/ParameterUpdaterBase.h"
 #include "paddle/trainer/TrainerConfigHelper.h"
 
-#pragma once
-
 struct GradientMachinePrivate {
   std::shared_ptr<paddle::GradientMachine> machine;
 
@@ -65,3 +67,31 @@ struct ArgumentsPrivate {
     return *(std::shared_ptr<T>*)(rawPtr);
   }
 };
+
+struct ParameterUpdaterPrivate {
+  std::unique_ptr<paddle::ParameterUpdater> updater;
+};
+
+struct ParameterPrivate {
+  std::shared_ptr<paddle::Parameter> sharedPtr;
+  paddle::Parameter* rawPtr;  // rawPtr only used in ParameterUpdater,
+                              // in other situation sharedPtr should
+                              // contains value.
+
+  ParameterPrivate() : sharedPtr(nullptr), rawPtr(nullptr) {}
+
+  paddle::Parameter* getPtr() {
+    if (sharedPtr) {
+      return sharedPtr.get();
+    } else {
+      return rawPtr;
+    }
+  }
+};
+
+struct EvaluatorPrivate {
+  paddle::Evaluator* rawPtr;
+
+  EvaluatorPrivate() : rawPtr(nullptr) {}
+  ~EvaluatorPrivate() { delete rawPtr; }
+};
diff --git a/paddle/api/Parameter.cpp b/paddle/api/Parameter.cpp
index 4eed00a84a695f2c48ff93b33419ae2b3dd03768..ddc00d8d1af4c58d7e2233423bea916408bee92b 100644
--- a/paddle/api/Parameter.cpp
+++ b/paddle/api/Parameter.cpp
@@ -14,21 +14,7 @@ limitations under the License. */
 
 #include "paddle/parameter/Parameter.h"
 #include "PaddleAPI.h"
-
-struct ParameterPrivate {
-  std::shared_ptr<paddle::Parameter> sharedPtr;
-  paddle::Parameter* rawPtr;
-
-  ParameterPrivate() : sharedPtr(nullptr), rawPtr(nullptr) {}
-
-  paddle::Parameter* getPtr() {
-    if (sharedPtr) {
-      return sharedPtr.get();
-    } else {
-      return rawPtr;
-    }
-  }
-};
+#include "PaddleAPIPrivate.h"
 
 Parameter::Parameter() : m(new ParameterPrivate()) {}
 
@@ -70,3 +56,5 @@ ParameterConfig* Parameter::getConfig() {
 size_t Parameter::getID() const { return m->getPtr()->getID(); }
 
 void Parameter::setValueUpdated() { m->getPtr()->setValueUpdated(); }
+
+size_t Parameter::getSize() const { return m->getPtr()->getSize(); }
diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7cd8ed7e3907489a60f37090df6f51492def2612
--- /dev/null
+++ b/paddle/api/ParameterUpdater.cpp
@@ -0,0 +1,56 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "PaddleAPI.h"
+
+#include "PaddleAPIPrivate.h"
+#include "paddle/trainer/ThreadParameterUpdater.h"
+
+ParameterUpdater::ParameterUpdater() : m(new ParameterUpdaterPrivate()) {}
+
+ParameterUpdater *ParameterUpdater::createLocalUpdater(
+    OptimizationConfig *config) {
+  auto param = new ParameterUpdater();
+  param->m->updater.reset(new paddle::SgdThreadUpdater(config->m->getConfig()));
+  return param;
+}
+
+ParameterUpdater::~ParameterUpdater() { delete m; }
+
+void ParameterUpdater::init(const GradientMachine &gm) {
+  m->updater->init(gm.m->machine->getNonStaticParameters());
+}
+
+void ParameterUpdater::startPass() { m->updater->startPass(); }
+
+void ParameterUpdater::finishPass() { m->updater->finishPass(); }
+
+PassType ParameterUpdater::startBatch(size_t batchSize) {
+  return m->updater->startBatch((int64_t)batchSize);
+}
+
+void ParameterUpdater::finishBatch(float cost) {
+  m->updater->finishBatch(cost);
+}
+
+void ParameterUpdater::update(Parameter *param) {
+  auto paddleParam = param->m->getPtr();
+  m->updater->update(paddleParam);
+}
+
+void ParameterUpdater::restore() { m->updater->restore(); }
+
+void ParameterUpdater::apply() { m->updater->apply(); }
+
+void ParameterUpdater::catchUpWith() { m->updater->catchUpWith(); }
diff --git a/paddle/api/Vector.cpp b/paddle/api/Vector.cpp
index 874f2fd044e9e86b44f8ca69f08bdfd3287d4749..db8f005929d90f718fc1ad42c60b68108ff55005 100644
--- a/paddle/api/Vector.cpp
+++ b/paddle/api/Vector.cpp
@@ -253,7 +253,7 @@ void Vector::copyToNumpyArray(float** view_m_data, int* dim1) {
   *view_m_data = new float[*dim1];
   if (auto cpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
     std::memcpy(*view_m_data, cpuVec->getData(), sizeof(float) * (*dim1));
-  } else if (auto gpuVec = dynamic_cast<paddle::CpuVector*>(m->vec.get())) {
+  } else if (auto gpuVec = dynamic_cast<paddle::GpuVector*>(m->vec.get())) {
     hl_memcpy_device2host(
         *view_m_data, gpuVec->getData(), sizeof(float) * (*dim1));
   } else {
diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py
index edcefba6a854df518fd2eb8c1fea5b72c5f5d6a8..981d10afda2671be9e8f0da1a4bee755f7aa9d61 100644
--- a/paddle/py_paddle/dataprovider_converter.py
+++ b/paddle/py_paddle/dataprovider_converter.py
@@ -15,6 +15,7 @@
 import paddle.trainer.PyDataProvider2 as dp2
 import collections
 import swig_paddle
+import numpy
 
 __all__ = ['DataProviderConverter']
 
@@ -35,18 +36,18 @@ class IScanner(object):
 class DenseScanner(IScanner):
     def __init__(self, input_type, pos):
         IScanner.__init__(self, input_type, pos)
-        self.__mat__ = []
-        self.__height__ = 0
+        self.__mat__ = None
 
     def scan(self, dat):
-        self.__mat__.extend(dat)
-        self.__height__ += 1
+        if self.__mat__ is None:
+            self.__mat__ = numpy.array([dat], dtype='float32')
+        else:
+            self.__mat__ = numpy.append(self.__mat__, [dat], axis=0)
 
     def finish_scan(self, argument):
         assert isinstance(argument, swig_paddle.Arguments)
         assert isinstance(self.input_type, dp2.InputType)
-        m = swig_paddle.Matrix.createDense(self.__mat__, self.__height__,
-                                           self.input_type.dim, False)
+        m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
         argument.setSlotValue(self.pos, m)
 
 
diff --git a/paddle/utils/common.h b/paddle/utils/common.h
index 3ff0b869478832d023956d190ddeb89a69acf58b..202a9d980d8350c230daaf473dd34d4069479e5f 100644
--- a/paddle/utils/common.h
+++ b/paddle/utils/common.h
@@ -14,8 +14,6 @@ limitations under the License. */
 
 #pragma once
 
-namespace paddle {
-
 /**
  * Disable copy macro.
  */
@@ -24,6 +22,8 @@ namespace paddle {
   class_name(const class_name &other) = delete; \
   class_name &operator=(const class_name &other) = delete
 
+namespace paddle {
+
 #ifdef PADDLE_TYPE_DOUBLE
 using real = double;
 #else
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 2eb7b17a0b40eb42d98b6df02ae26559ee2d8a7e..674b5ac58b6febd914cb36c75356d8aa70a908b1 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -3416,8 +3416,35 @@ def register_parse_config_hook(f):
     _parse_config_hooks.add(f)
 
 
-def parse_config(config_file, config_arg_str):
+def update_g_config():
     '''
+    Update g_config after execute config_file or config_functions.
+    '''
+    for k, v in settings.iteritems():
+        if v is None:
+            continue
+        g_config.opt_config.__setattr__(k, v)
+
+    for k, v in trainer_settings.iteritems():
+        if v is None:
+            continue
+        g_config.__setattr__(k, v)
+
+    for name in g_config.model_config.input_layer_names:
+        assert name in g_layer_map, \
+            'input name "%s" does not correspond to a layer name' % name
+        assert (g_layer_map[name].type == "data" or g_layer_map[name].type == "data_trim"), \
+            'The type of input layer "%s" is not "data"' % name
+    for name in g_config.model_config.output_layer_names:
+        assert name in g_layer_map, \
+            'input name "%s" does not correspond to a layer name' % name
+    return g_config
+
+
+def parse_config(trainer_config, config_arg_str):
+    '''
+    @param trainer_config: can be a string of config file name or a function name
+    with config logic
     @param config_arg_str: a string of the form var1=val1,var2=val2. It will be
     passed to config script as a dictionary CONFIG_ARGS
     '''
@@ -3451,45 +3478,20 @@ def parse_config(config_file, config_arg_str):
     g_root_submodel.is_recurrent_layer_group = False
     g_current_submodel = g_root_submodel
 
-    # for paddle on spark, need support non-file config.
-    # you can use parse_config like below:
-    #
-    # from paddle.trainer.config_parser import parse_config
-    # def configs():
-    #    #your paddle config code, which is same as config file.
-    #
-    # config = parse_config(configs, "is_predict=1")
-    # # then you get config proto object.
-    if hasattr(config_file, '__call__'):
-        config_file.func_globals.update(
+    if hasattr(trainer_config, '__call__'):
+        trainer_config.func_globals.update(
             make_config_environment("", config_args))
-        config_file()
+        trainer_config()
     else:
-        execfile(config_file, make_config_environment(config_file, config_args))
-    for k, v in settings.iteritems():
-        if v is None:
-            continue
-        g_config.opt_config.__setattr__(k, v)
-
-    for k, v in trainer_settings.iteritems():
-        if v is None:
-            continue
-        g_config.__setattr__(k, v)
+        execfile(trainer_config,
+                 make_config_environment(trainer_config, config_args))
 
-    for name in g_config.model_config.input_layer_names:
-        assert name in g_layer_map, \
-            'input name "%s" does not correspond to a layer name' % name
-        assert (g_layer_map[name].type == "data" or g_layer_map[name].type == "data_trim"), \
-            'The type of input layer "%s" is not "data"' % name
-    for name in g_config.model_config.output_layer_names:
-        assert name in g_layer_map, \
-            'input name "%s" does not correspond to a layer name' % name
-    return g_config
+    return update_g_config()
 
 
-def parse_config_and_serialize(config_file, config_arg_str):
+def parse_config_and_serialize(trainer_config, config_arg_str):
     try:
-        config = parse_config(config_file, config_arg_str)
+        config = parse_config(trainer_config, config_arg_str)
         #logger.info(config)
         return config.SerializeToString()
     except:
diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py
index 0ff5edf825e6d3ba96328ed925fefb8c773b2a89..13155ebddbb49c502d9d4110704ab09f49825be2 100644
--- a/python/paddle/trainer_config_helpers/__init__.py
+++ b/python/paddle/trainer_config_helpers/__init__.py
@@ -20,4 +20,6 @@ from layers import *
 from networks import *
 from optimizers import *
 from attrs import *
+from config_parser_utils import *
+# This will enable operator overload for LayerOutput
 import layer_math
diff --git a/python/paddle/trainer_config_helpers/config_parser.py b/python/paddle/trainer_config_helpers/config_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b91b8d2824cd89ac0d6da696492bd9289b6e5f4
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/config_parser.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer.config_parser as config_parser
+'''
+This file is a wrapper of formal config_parser. The main idea of this file is to 
+separete different config logic into different function, such as network configuration
+ and optimizer configuration.
+'''
+
+__all__ = [
+    "parse_trainer_config", "parse_network_config", "parse_optimizer_config"
+]
+
+
+def parse_trainer_config(trainer_conf, config_arg_str):
+    return config_parser.parse_config(trainer_conf, config_arg_str)
+
+
+def parse_network_config(network_conf):
+    config = config_parser.parse_config(network_conf, '')
+    return config.model_config
+
+
+def parse_optimizer_config(optimizer_conf):
+    config = config_parser.parse_config(optimizer_conf, '')
+    return config.opt_config
diff --git a/python/paddle/trainer_config_helpers/config_parser_utils.py b/python/paddle/trainer_config_helpers/config_parser_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..681b177a55f48d02a8ff792945dd7cc3b05cd976
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/config_parser_utils.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer.config_parser as config_parser
+'''
+This file is a wrapper of formal config_parser. The main idea of this file is to 
+separete different config logic into different function, such as network configuration
+ and optimizer configuration.
+'''
+
+__all__ = [
+    "parse_trainer_config", "parse_network_config", "parse_optimizer_config"
+]
+
+
+def parse_trainer_config(trainer_conf, config_arg_str):
+    return config_parser.parse_config(trainer_conf, config_arg_str)
+
+
+def parse_network_config(network_conf, config_arg_str=''):
+    config = config_parser.parse_config(network_conf, config_arg_str)
+    return config.model_config
+
+
+def parse_optimizer_config(optimizer_conf, config_arg_str=''):
+    config = config_parser.parse_config(optimizer_conf, config_arg_str)
+    return config.opt_config