diff --git a/CMakeLists.txt b/CMakeLists.txt
index b85709f807bc3734cdd162faae3f849278244713..4613155f7700b25b2a8d7c250832722085b332fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
 set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b1)
+set(PADDLE_PATCH_VERSION 0b2)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
 
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
diff --git a/cmake/util.cmake b/cmake/util.cmake
index d776c3ae499526ef52e24c0aeea18ccab71a242b..0fa36f070cc11be543efe9573b93173ec771b9be 100644
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -184,3 +184,20 @@ macro(add_paddle_culib TARGET_NAME)
     cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
     set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
 endmacro()
+
+
+# Creates C resources file from files in given resource file
+function(create_resources res_file output)
+    # Create empty output file
+    file(WRITE ${output} "")
+    # Get short filename
+    string(REGEX MATCH "([^/]+)$" filename ${res_file})
+    # Replace filename spaces & extension separator for C compatibility
+    string(REGEX REPLACE "\\.| |-" "_" filename ${filename})
+    # Read hex data from file
+    file(READ ${res_file} filedata HEX)
+    # Convert hex data for C compatibility
+    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+    # Append data to output file
+    file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
+endfunction()
diff --git a/demo/mnist/data/get_mnist_data.sh b/demo/mnist/data/get_mnist_data.sh
old mode 100644
new mode 100755
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 0f14ded2dce933d1b790e947a03402940cbc2a6b..32af29730a7365df1a98fe54a2edf8850ee93e8d 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,10 @@ from paddle.trainer.PyDataProvider2 import *
 
 
 # Define a py data provider
-@provider(input_types=[
-    dense_vector(28 * 28),
-    integer_value(10)
-])
+@provider(input_types={
+    'pixel': dense_vector(28 * 28),
+    'label': integer_value(10)
+})
 def process(settings, filename):  # settings is not used currently.
     imgf = filename + "-images-idx3-ubyte"
     labelf = filename + "-labels-idx1-ubyte"
@@ -14,20 +14,19 @@ def process(settings, filename):  # settings is not used currently.
 
     f.read(16)
     l.read(8)
-    
+
     # Define number of samples for train/test
     if "train" in filename:
         n = 60000
     else:
         n = 10000
-    
+
     for i in range(n):
         label = ord(l.read(1))
         pixels = []
-        for j in range(28*28):
+        for j in range(28 * 28):
             pixels.append(float(ord(f.read(1))) / 255.0)
-        yield  { "pixel": pixels, 'label': label }
-        
+        yield {"pixel": pixels, 'label': label}
+
     f.close()
     l.close()
-    
\ No newline at end of file
diff --git a/demo/mnist/vgg_16_mnist.py b/demo/mnist/vgg_16_mnist.py
index ad0a4de3215ca75c9693815fb94c5099b74dc1fe..45a45bb061aa781231a944bb82ebfbc6b0dc9618 100644
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -47,6 +47,7 @@ predict = small_vgg(input_image=img,
 
 if not is_predict:
     lbl = data_layer(name="label", size=label_size)
+    inputs(img, lbl)
     outputs(classification_cost(input=predict, label=lbl))
 else:
     outputs(predict)
diff --git a/doc_cn/ui/data_provider/mnist_provider.dict.py b/doc_cn/ui/data_provider/mnist_provider.dict.py
index 4eab5b1fd3b50a67a9cfee92883cce71ee1a2c87..bf13b56372b56a1e810fad159cd51371ef46c468 100644
--- a/doc_cn/ui/data_provider/mnist_provider.dict.py
+++ b/doc_cn/ui/data_provider/mnist_provider.dict.py
@@ -2,10 +2,10 @@ from paddle.trainer.PyDataProvider2 import *
 
 
 # Define a py data provider
-@provider(input_types=[
-    dense_vector(28 * 28),
-    integer_value(10)
-])
+@provider(input_types={
+    'pixel': dense_vector(28 * 28),
+    'label': integer_value(10)
+})
 def process(settings, filename):  # settings is not used currently.
     f = open(filename, 'r')  # open one of training file
 
@@ -20,6 +20,6 @@ def process(settings, filename):  # settings is not used currently.
             pixels_float.append(float(each_pixel_str))
 
         # give data to paddle.
-        yield { "pixel": pixels_float, 'label': int(label) }
+        yield {"pixel": pixels_float, 'label': int(label)}
 
     f.close()  # close file
diff --git a/doc_cn/ui/data_provider/pydataprovider2.rst b/doc_cn/ui/data_provider/pydataprovider2.rst
index 9e1d8c531f5ba2101d0f4d9506361e058b168181..80b40084d8f5037a76df0b3e01ed5742d8476bd0 100644
--- a/doc_cn/ui/data_provider/pydataprovider2.rst
+++ b/doc_cn/ui/data_provider/pydataprovider2.rst
@@ -141,8 +141,6 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
    是一个batch size，但是有时为了计算均衡性，可以将一条数据设置成多个batch size
 *  cache 是数据缓存的策略，参考 `cache`_
 *  init_hook 是初始化时调用的函数，参考 `init_hook`_
-*  use_dynamic_order 如果是true的话，可以返回一个dict，key是data_layer的名字，value是特征值。同时，也可以
-   返回一个list或者tuple。如果是false的话，只能够返回list或者tuple
 *  check 设置成true的话，会根据input_types检查数据的合法性。
 *  check_fail_continue 如果设置成true的话，即使在check中数据不合法，也会扔到这条数据，继续训练。 如果
    check是false的话，没有作用。
diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp
index 2f9a1223c6e45481063ff1312123a96093b0fe50..e3e472ac166c271940308e1a8efa917c286b962a 100644
--- a/paddle/gserver/dataproviders/PyDataProvider2.cpp
+++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp
@@ -246,8 +246,7 @@ private:
                        PyObjectPtr && kwargs) {
     LOG(INFO) << "loading dataprovider " << model <<"::" << className;
 
-    PyObjectPtr module(PyImport_ImportModule(model.c_str()));
-    CHECK_PY(module) << "Cannot imort module " << model.c_str();
+    PyObjectPtr module = py::import(model);
     PyObjectPtr moduleDict(PyModule_GetDict(module.get()));
     CHECK_PY(moduleDict) << "Invoke module.__dict__ error";
     PyObjectPtr cls(PyDict_GetItemString(moduleDict.get(),
diff --git a/paddle/gserver/tests/test_PyDataProvider2.cpp b/paddle/gserver/tests/test_PyDataProvider2.cpp
index e75e53ab7f431a34798e8a79985f30441005098c..6bf1e329251219fcbf68b95f2d80a3235cb7037f 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.cpp
+++ b/paddle/gserver/tests/test_PyDataProvider2.cpp
@@ -117,7 +117,7 @@ TEST(PyDataProvider2, index_no_seq) {
 }
 
 TEST(PyDataProvider2, init_hook) {
-  paddle::PyObjectPtr pickle(PyImport_ImportModule("pickle"));
+  paddle::PyObjectPtr pickle = paddle::py::import("pickle");
   paddle::PyObjectPtr globals(
       PyModule_GetDict(PyImport_AddModule("__main__")));
   PyDict_SetItemString(globals.get(), "pickle", pickle.get());
diff --git a/paddle/gserver/tests/test_PyDataProvider2.py b/paddle/gserver/tests/test_PyDataProvider2.py
index 145fe85cff7d88e73233068f956489a0c2259abe..71c3335231e52132e6c7e9aaf0cb92d0db2e20df 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
@@ -86,7 +86,7 @@ def test_can_over_batch_size(setting, filename):
         yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)]
 
 
-@provider(input_types=[index_slot(10), index_slot(10)])
+@provider(input_types={'input1':index_slot(10), 'input2': index_slot(10)})
 def test_input_order(setting, filename):
     for _ in xrange(1000):
         yield {
diff --git a/paddle/utils/.gitignore b/paddle/utils/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f2cfd7409412de68f4183daebcb48e7a3ae37672
--- /dev/null
+++ b/paddle/utils/.gitignore
@@ -0,0 +1 @@
+enable_virtualenv.c
diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt
index 0557b01e36f078bebebbcb65af95357c96369514..45240b5002aa18be4a9b7e3ec3b754eb83ca0e09 100644
--- a/paddle/utils/CMakeLists.txt
+++ b/paddle/utils/CMakeLists.txt
@@ -2,6 +2,9 @@
 
 file(GLOB UTIL_HEADERS . *.h)
 file(GLOB UTIL_SOURCES . *.cpp)
+create_resources(enable_virtualenv.py enable_virtualenv.c)
+set(UTIL_RES enable_virtualenv.c)
+
 if(APPLE)
     file(GLOB UTIL_ARCH_SOURCES . arch/osx/*.cpp)
 else()
@@ -9,7 +12,8 @@ else()
 endif()
 add_library(paddle_utils STATIC
         ${UTIL_SOURCES}
-        ${UTIL_ARCH_SOURCES})
+        ${UTIL_ARCH_SOURCES}
+        ${UTIL_RES})
 add_style_check_target(paddle_utils ${UTIL_HEADERS})
 add_style_check_target(paddle_utils ${UTIL_SOURCES}
     ${UTIL_ARCH_SOURCES})
diff --git a/paddle/utils/PythonUtil.cpp b/paddle/utils/PythonUtil.cpp
index 78c3a80674f9c108d48d90325b9dd92ff4aad7af..90e5093f96ea4e892b7f2b1f2baa1bf1d6c85c05 100644
--- a/paddle/utils/PythonUtil.cpp
+++ b/paddle/utils/PythonUtil.cpp
@@ -77,11 +77,18 @@ static std::recursive_mutex g_pyMutex;
 PyGuard::PyGuard() : guard_(g_pyMutex) {}
 
 
-static void printPyErrorStack(std::ostream& os, bool withEndl = false) {
+static void printPyErrorStack(std::ostream& os, bool withEndl = false,
+                              bool withPyPath = true) {
   PyObject * ptype, *pvalue, *ptraceback;
   PyErr_Fetch(&ptype, &pvalue, &ptraceback);
   PyErr_NormalizeException(&ptype, &pvalue, &ptraceback);
   PyErr_Clear();
+  if (withPyPath) {
+    os << "Current PYTHONPATH: " << py::repr(PySys_GetObject(strdup("path")));
+    if (withEndl) {
+      os << std::endl;
+    }
+  }
   PyTracebackObject* obj = (PyTracebackObject*)ptraceback;
 
   os << "Python Error: " << PyString_AsString(PyObject_Str(ptype))
@@ -114,10 +121,7 @@ PyObjectPtr callPythonFuncRetPyObj(const std::string& moduleName,
                                    const std::string& funcName,
                                    const std::vector<std::string>& args) {
   PyGuard guard;
-  PyObjectPtr pyModuleName(PyString_FromString(moduleName.c_str()));
-  CHECK_PY(pyModuleName) << "Import PyModule failed" << moduleName;
-  PyObjectPtr pyModule(PyImport_Import(pyModuleName.get()));
-  CHECK_PY(pyModule) << "Import Python Module"<< moduleName << " failed.";
+  PyObjectPtr pyModule = py::import(moduleName);
   PyObjectPtr pyFunc(PyObject_GetAttrString(pyModule.get(), funcName.c_str()));
   CHECK_PY(pyFunc) << "GetAttrString failed.";
   PyObjectPtr pyArgs(PyTuple_New(args.size()));
@@ -143,7 +147,7 @@ PyObjectPtr createPythonClass(
     const std::vector<std::string>& args,
     const std::map<std::string, std::string>& kwargs) {
   PyGuard guard;
-  PyObjectPtr pyModule(PyImport_ImportModule(moduleName.c_str()));
+  PyObjectPtr pyModule = py::import(moduleName);
   LOG(INFO) << "createPythonClass moduleName.c_str:" << moduleName.c_str();
   CHECK_PY(pyModule) << "Import module " << moduleName << " failed.";
   PyObjectPtr pyDict(PyModule_GetDict(pyModule.get()));
@@ -181,18 +185,29 @@ std::string getPyCallStack() {
   printPyErrorStack(os, true);
   return os.str();
 }
+
+PyObjectPtr import(const std::string &moduleName) {
+  auto module = PyImport_ImportModule(moduleName.c_str());
+  CHECK_PY(module) << "Import " << moduleName << "Error";
+  return PyObjectPtr(module);
+}
+
 }  // namespace py
 
 #endif
-
+extern "C" {
+extern const char enable_virtualenv_py[];
+}
 void initPython(int argc, char** argv) {
 #ifndef PADDLE_NO_PYTHON
   Py_SetProgramName(argv[0]);
   Py_Initialize();
   PySys_SetArgv(argc, argv);
-
   // python blocks SIGINT. Need to enable it.
   signal(SIGINT, SIG_DFL);
+
+  // Manually activate virtualenv when user is using virtualenv
+  PyRun_SimpleString(enable_virtualenv_py);
 #endif
 }
 
diff --git a/paddle/utils/PythonUtil.h b/paddle/utils/PythonUtil.h
index db02d1252b4057dbfdcc7c894b4a23bc5561732b..00fc177022ac343a5760e57bcbcabf18f697bd4d 100644
--- a/paddle/utils/PythonUtil.h
+++ b/paddle/utils/PythonUtil.h
@@ -87,6 +87,8 @@ PyObjectPtr createPythonClass(const std::string& moduleName,
   CHECK((x) != nullptr) << ::paddle::py::getPyCallStack()
 
 namespace py {
+PyObjectPtr import(const std::string& moduleName);
+
 /**
  * Cast a PyLong or PyInt to int type T.
  * @tparam T return type.
diff --git a/paddle/utils/enable_virtualenv.py b/paddle/utils/enable_virtualenv.py
new file mode 100644
index 0000000000000000000000000000000000000000..99d822a4145cca3f5ae35c4cf144210f35460827
--- /dev/null
+++ b/paddle/utils/enable_virtualenv.py
@@ -0,0 +1,10 @@
+import os
+
+def __activate_virtual_env__():
+  __path__ = os.getenv('VIRTUAL_ENV')
+  if __path__ is None:
+    return
+  __script__ = os.path.join(__path__, 'bin', 'activate_this.py')
+  execfile(__script__, {'__file__': __script__})
+
+__activate_virtual_env__()
diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
index 34f5dd41b7e683bbfa71e8a3e23ff3f542b39591..53409b746d811a3d73188a613c6b121e71955552 100644
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -208,7 +208,6 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
              calc_batch_size=None,
              cache=CacheType.NO_CACHE,
              check=False, check_fail_continue=False,
-             use_dynamic_order=True,
              init_hook=None, **kwargs):
     """
     Provider decorator. Use it to make a function into PyDataProvider2 object.
@@ -228,9 +227,15 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
     The configuration of data provider should be setup by\:
 
     :param input_types: Specify the input types, can also be set in init_hook.
-                        It is a list of InputType object. For example, input_types= \
-                        [dense_vector(9), integer_value(2)].
-    :type input_types: list|tuple
+                        It could be a list of InputType object. For example,
+                        input_types=[dense_vector(9), integer_value(2)]. Or user
+                        can set a dict of InputType object, which key is
+                        data_layer's name. For example, input_types=\
+                        {'img': img_features, 'label': label}. when using dict of
+                        InputType, user could yield a dict of feature values, which
+                        key is also data_layer's name.
+
+    :type input_types: list|tuple|dict
 
     :param should_shuffle: True if data should shuffle. Pass None means shuffle
                            when is training and not to shuffle when is testing.
@@ -281,12 +286,6 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
                                 drop the wrong format data when it is True. Has
                                 no effect when check set to False.
     :type check_fail_continue: bool
-
-    :param use_dynamic_order: Allow provider to yield a dictionary object, whose
-                              key is a input data layer name, and value is the
-                              feature value. The tuples are still allowed when
-                              use_dynmaic_order is True.
-    :type use_dynamic_order: bool
     """
 
     def __wrapper__(generator):
@@ -340,6 +339,11 @@ def provider(input_types=None, should_shuffle=None, pool_size=-1,
                 assert self.slots is not None
                 assert self.generator is not None
 
+                use_dynamic_order = False
+                if isinstance(self.slots, dict):  # reorder input_types
+                    self.slots = [self.slots[ipt] for ipt in self.input_order]
+                    use_dynamic_order = True
+
                 if len(self.slots) == 1:
                     self.generator = SingleSlotWrapper(self.generator)
 
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 18f0b1b4e497ea7841106b975b5adabcaf6415fd..c1e74c7a2d8f7448429edcdbc2ec7c32f6cedd57 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -216,6 +216,10 @@ def Inputs(*args):
         if g_current_submodel is g_root_submodel:
             g_config.model_config.input_layer_names.append(name)
 
+@config_func
+def HasInputsSet():
+    return len(g_config.model_config.input_layer_names) != 0
+
 
 # Define the name of the output layers of the NeuralNetwork.
 # Usually the output is simply the cost layer.
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index c54ec3096989cd7220d42c3cb71633c59dedc9a0..d8f96195020b42e4aae6cd13520de9558d5622fa 100644
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -30,7 +30,7 @@ __all__ = ['sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
            'lstmemory_unit', 'small_vgg', 'img_conv_group', 'vgg_16_network',
            'gru_unit', 'gru_group', 'simple_gru', 'simple_attention',
            'text_conv_pool',
-           'bidirectional_lstm', 'outputs']
+           'bidirectional_lstm', 'inputs', 'outputs']
 
 
 ######################################################
@@ -372,8 +372,8 @@ def small_vgg(input_image, num_channels, num_classes):
     tmp = __vgg__(tmp, 128, 2, [0.4, 0])
     tmp = __vgg__(tmp, 256, 3, [0.4, 0.4, 0])
     tmp = __vgg__(tmp, 512, 3, [0.4, 0.4, 0])
-    tmp = img_pool_layer(input = tmp, stride = 2,
-                         pool_size = 2, pool_type = MaxPooling())
+    tmp = img_pool_layer(input=tmp, stride=2,
+                         pool_size=2, pool_type=MaxPooling())
     tmp = dropout_layer(input=tmp, dropout_rate=0.5)
     tmp = fc_layer(input=tmp, size=512, layer_attr=ExtraAttr(drop_rate=0.5),
                    act=LinearActivation())
@@ -745,7 +745,6 @@ def gru_group(input,
               gru_bias_attr=None,
               act=None, gate_act=None,
               gru_layer_attr=None):
-
     """
     gru_group is a recurrent layer group version Gated Recurrent Unit. It
     does exactly the same calculation as the grumemory layer does. A promising
@@ -919,12 +918,12 @@ def bidirectional_lstm(input, size, name=None, return_seq=False,
 
     fw = simple_lstm(name='%s_fw' % name, input=input, size=size,
                      **dict((k[len('fwd_'):], v) for k, v in args.iteritems()
-                        if k.startswith('fwd_')))
+                            if k.startswith('fwd_')))
 
     bw = simple_lstm(name="%s_bw" % name, input=input, size=size,
                      reverse=True,
                      **dict((k[len('bwd_'):], v) for k, v in args.iteritems()
-                        if k.startswith('bwd_')))
+                            if k.startswith('bwd_')))
 
     if return_seq:
         return concat_layer(name=name, input=[fw, bw], layer_attr=concat_attr,
@@ -1052,14 +1051,30 @@ def dropout_layer(input, dropout_rate, name=None):
                        layer_attr=ExtraAttr(drop_rate=dropout_rate))
 
 
-def outputs(layers, *args):
+def inputs(layers, *args):
+    """
+    Declare the inputs of network. The order of input should be as same as
+    the data provider's return order.
+
+    :param layers: Input Layers.
+    :type layers: list|tuple|LayerOutput.
+    :return:
     """
-    Declare the end of network. Currently it will only calculate the
-    input/output order of network. It will calculate the predict network or
-    train network's output automatically.
 
+    if isinstance(layers, LayerOutput) or isinstance(layers, basestring):
+        layers = [layers]
+    if len(args) != 0:
+        layers.extend(args)
 
-    :param layers:
+    Inputs(*[l.name for l in layers])
+
+
+def outputs(layers, *args):
+    """
+    Declare the outputs of network. If user have not defined the inputs of
+    network, this method will calculate the input order by dfs travel.
+
+    :param layers: Output layers.
     :type layers: list|tuple|LayerOutput
     :return:
     """
@@ -1093,6 +1108,11 @@ def outputs(layers, *args):
         layers.extend(args)
 
     assert len(layers) > 0
+
+    if HasInputsSet():  # input already set
+        Outputs(*[l.name for l in layers])
+        return  # just return outputs.
+
     if len(layers) != 1:
         logger.warning("`outputs` routine try to calculate network's"
                        " inputs and outputs order. It might not work well."