diff --git a/CMakeLists.txt b/CMakeLists.txt
index bbf3acb8ad26f59e4ec99db4c350e060c8368975..b4a700f974405094cbc78fb8326a8c8e40b7b181 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,7 +49,6 @@ option(WITH_MKL         "Compile PaddlePaddle with MKL support."        ${AVX_FO
 option(WITH_NGRAPH      "Compile PaddlePaddle with nGraph support."     OFF)
 option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
 option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        OFF)
-option(WITH_SWIG_PY     "Compile PaddlePaddle with inference api"       ON)
 option(WITH_PYTHON      "Compile PaddlePaddle with python interpreter"  ON)
 option(WITH_DOUBLE      "Compile PaddlePaddle with double precision"    OFF)
 option(WITH_RDMA        "Compile PaddlePaddle with RDMA support"        OFF)
@@ -176,7 +175,6 @@ include(external/python)    # download, build, install python
 include(external/openblas)  # download, build, install openblas
 include(external/mkldnn)    # download, build, install mkldnn
 include(external/ngraph)    # download, build, install nGraph
-include(external/swig)      # download, build, install swig
 include(external/boost)     # download boost
 include(external/any)       # download libn::any
 include(external/eigen)     # download eigen3
diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake
deleted file mode 100644
index de07703695eb14e76eedd3758d55cb98edd1e02b..0000000000000000000000000000000000000000
--- a/cmake/external/swig.cmake
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-IF(NOT WITH_SWIG_PY)
-    return()
-ENDIF()
-
-FIND_PACKAGE(SWIG)
-
-IF(NOT SWIG_FOUND)
-    # build swig as an external project
-    INCLUDE(ExternalProject)
-
-    SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
-    SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
-    SET(SWIG_TARGET_VERSION "3.0.2")
-    SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
-    SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
-
-    IF(WIN32)
-        # swig.exe available as pre-built binary on Windows:
-        ExternalProject_Add(swig
-            URL                 http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip
-            URL_MD5             ${SWIG_DOWNLOAD_WIN_MD5}
-            SOURCE_DIR          ${SWIG_SOURCES_DIR}
-            CONFIGURE_COMMAND   ""
-            BUILD_COMMAND       ""
-            INSTALL_COMMAND     ""
-            UPDATE_COMMAND      ""
-        )
-        SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
-        SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe  CACHE FILEPATH "SWIG Executable" FORCE)
-    ELSE(WIN32)
-        # swig uses bison find it by cmake and pass it down
-        FIND_PACKAGE(BISON)
-
-        # From SWIG configure
-        ExternalProject_Add(swig
-            GIT_REPOSITORY      https://github.com/swig/swig.git
-            GIT_TAG             rel-3.0.10
-            PREFIX              ${SWIG_SOURCES_DIR}
-            CONFIGURE_COMMAND   cd <SOURCE_DIR> && ./autogen.sh && ./configure
-                                --prefix=${SWIG_INSTALL_DIR} --without-pcre
-            BUILD_COMMAND       cd <SOURCE_DIR> && make
-            INSTALL_COMMAND     cd <SOURCE_DIR> && make install
-            UPDATE_COMMAND      ""
-        )
-
-        SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
-        SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig)
-    ENDIF(WIN32)
-
-    LIST(APPEND external_project_dependencies swig)
-ENDIF(NOT SWIG_FOUND)
diff --git a/paddle/py_paddle/.gitignore b/paddle/py_paddle/.gitignore
deleted file mode 100644
index 80d1f76fbc05627e21e334af55d63a4a534434c6..0000000000000000000000000000000000000000
--- a/paddle/py_paddle/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-swig_paddle.py
-_swig_paddle.so
diff --git a/paddle/py_paddle/__init__.py b/paddle/py_paddle/__init__.py
deleted file mode 100644
index 5504d1d50c523315036bfaaf6641c5216269a5e5..0000000000000000000000000000000000000000
--- a/paddle/py_paddle/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from util import DataProviderWrapperConverter
-from dataprovider_converter import DataProviderConverter
-
-__all__ = [
-    'paddle',
-    'DataProviderConverter',
-    'DataProviderWrapperConverter',  # for deprecated usage.
-    'loadParameterFile'
-]
-util.monkeypatches()
diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py
deleted file mode 100644
index 43614b9779d21795f1f274589ea93639e923ce75..0000000000000000000000000000000000000000
--- a/paddle/py_paddle/dataprovider_converter.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.trainer.PyDataProvider2 as dp2
-import collections
-import swig_paddle
-import numpy
-import itertools
-from functools import reduce
-
-__all__ = ['DataProviderConverter']
-
-
-class IScanner(object):
-    """
-    The scanner will scan Python object two passes, then convert it to Paddle's
-    argument.
-
-    In the first pass, `pre_scan` will be invoked by every data instance, and
-    then invoke `finish_pre_scan` to arguments. And the second pass do the same
-    thing except the functions changed to `scan`, `finish_scan`.
-
-    During the first pass, a scanner may count the shape of input matrix and
-    allocate memory for this argument. Then fill the data into this  argument
-    in second pass.
-    """
-
-    def __init__(self, input_type, pos):
-        self.input_type = input_type
-        if not isinstance(self.input_type, dp2.InputType):
-            raise ValueError("input type should be dataprovider2.InputType")
-        self.pos = pos
-        # data_in_gpu is used to indicate whether to create argument on GPU
-        # or not in GPU mode. Now if using one thread (trainer_count=1),
-        # trainer uses NeuralNetwork which needs to create argument on GPU
-        # before calling forward function. So, set data_in_gpu to True.
-        # Otherwise, trainer uses MultiGradientMachine which will transfer
-        # data from CPU to GPU in the forward function, set data_in_gpu to
-        # False in this case.
-        self.data_in_gpu = swig_paddle.isUsingGpu(
-        ) and swig_paddle.getTrainerCount() == 1
-
-    def pre_scan(self, dat):
-        """
-        First pass scan method. During this method, the scanner could count the
-        data number, and get the total memory size this batch would use.
-
-        :param dat: The python object.
-        """
-        pass
-
-    def finish_pre_scan(self, argument):
-        """
-        Finish first scan pass. Allocate the memory.
-
-        :param argument: Output arguments object.
-        :type argument: swig_paddle.Arguments
-        :param dat: Output arguments object.
-        :type dat: The Python object, numpy.array or List.
-        :return:
-        """
-        pass
-
-    def scan(self, dat):
-        """
-        Second pass scan method. Copy the data to arguments.
-
-        :param dat: The python object.
-        """
-        pass
-
-    def finish_scan(self, argument):
-        """
-        Finish second pass. Finalize the resources, etc.
-
-        :param argument: Output arguments object.
-        :type argument: swig_paddle.Arguments
-        """
-        pass
-
-
-class DenseScanner(IScanner):
-    """
-    :type __mat__: numpy.ndarray
-    """
-
-    def __init__(self, input_type, pos):
-        IScanner.__init__(self, input_type, pos)
-        self.__mat__ = None
-        self.__shape__ = None
-        self.__height__ = 0
-        self.__dim__ = 0
-
-    def pre_scan(self, dat):
-        self.__height__ += 1
-        if self.__shape__ is None:
-            self.__shape__ = numpy.array(dat).shape
-            if len(self.__shape__) > 3:
-                raise ValueError(
-                    "The dimension of input cannot be greater than 3.")
-            if len(self.__shape__) == 0:
-                raise ValueError(
-                    "The input should be a vector, please check your input data."
-                )
-            self.__dim__ = reduce(lambda x, y: x * y, self.__shape__)
-            if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim:
-                raise ValueError(
-                    "The data size must be equal to it in data layer.")
-        else:
-            if self.__shape__ != numpy.array(dat).shape:
-                raise ValueError(
-                    "The data shape must be same in one mini-batch.")
-
-    def finish_pre_scan(self, argument):
-        self.__mat__ = numpy.ndarray(
-            shape=(self.__height__, self.__dim__), dtype=numpy.float32)
-        self.__height__ = 0
-
-    def scan(self, dat):
-        # It's better to use NumPy array for speed.
-        dat = numpy.array(dat)
-        dat = dat.flatten()
-        self.__mat__[self.__height__] = dat
-        self.__height__ += 1
-
-    def finish_scan(self, argument):
-        assert isinstance(argument, swig_paddle.Arguments)
-        if self.__mat__.dtype != numpy.float32:
-            self.__mat__ = self.__mat__.astype(numpy.float32)
-        m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True,
-                                                    self.data_in_gpu)
-        argument.setSlotValue(self.pos, m)
-        if len(self.__shape__) > 1:
-            # The last-two dimenstions are the frame height and width.
-            # For example, the layout is CHW for 3-D feature of image.
-            # The H and W are the frame height and width.
-            h, w = self.__shape__[-2:]
-            argument.setSlotFrameHeight(self.pos, h)
-            argument.setSlotFrameWidth(self.pos, w)
-        self.__shape__ = None
-
-
-class SparseBinaryScanner(IScanner):
-    def __init__(self, input_type, pos):
-        IScanner.__init__(self, input_type, pos)
-        self.__rows__ = [0]
-        self.__cols__ = []
-        self.__height__ = 0
-        self.__value__ = []
-
-    def scan(self, dat):
-        self.extend_cols(dat)
-        self.__rows__.append(len(self.__cols__))
-        self.__height__ += 1
-
-    def extend_cols(self, dat):
-        self.__cols__.extend(dat)
-
-    def finish_scan(self, argument):
-        assert isinstance(argument, swig_paddle.Arguments)
-        m = swig_paddle.Matrix.createSparse(
-            self.__height__,
-            self.input_type.dim,
-            len(self.__cols__),
-            len(self.__value__) == 0,
-            False,  # trans
-            False)  # TODO supoort GPU
-        assert isinstance(m, swig_paddle.Matrix)
-        m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
-        argument.setSlotValue(self.pos, m)
-
-
-class SparseFloatScanner(SparseBinaryScanner):
-    def __init__(self, input_type, pos):
-        SparseBinaryScanner.__init__(self, input_type, pos)
-
-    def extend_cols(self, dat):
-        self.__cols__.extend((x[0] for x in dat))
-        self.__value__.extend((x[1] for x in dat))
-
-
-class IndexScanner(IScanner):
-    def __init__(self, input_type, pos):
-        IScanner.__init__(self, input_type, pos)
-        self.__ids__ = None
-        self.__idx__ = 0
-
-    def pre_scan(self, dat):
-        self.__idx__ += 1
-
-    def finish_pre_scan(self, argument):
-        self.__ids__ = [0] * self.__idx__
-        self.__idx__ = 0
-
-    def scan(self, dat):
-        self.__ids__[self.__idx__] = dat
-        self.__idx__ += 1
-
-    def finish_scan(self, argument):
-        ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
-        assert isinstance(argument, swig_paddle.Arguments)
-        argument.setSlotIds(self.pos, ids)
-
-
-class SequenceScanner(IScanner):
-    def __init__(self, input_type, pos, inner_scanner, setter):
-        IScanner.__init__(self, input_type, pos)
-        self.__seq__ = [0]
-        self.__inner_scanner__ = inner_scanner
-        self.__setter__ = setter
-
-    def pre_scan(self, dat):
-        for each in dat:
-            self.__inner_scanner__.pre_scan(each)
-
-    def finish_pre_scan(self, argument):
-        self.__inner_scanner__.finish_pre_scan(argument)
-
-    def scan(self, dat):
-        self.__seq__.append(self.__seq__[-1] + self.get_size(dat))
-        for each in dat:
-            self.__inner_scanner__.scan(each)
-
-    def finish_scan(self, argument):
-        seq = swig_paddle.IVector.create(self.__seq__, False)
-        self.__setter__(argument, self.pos, seq)
-        self.__inner_scanner__.finish_scan(argument)
-
-    def get_size(self, dat):
-        if isinstance(self.__inner_scanner__, SequenceScanner):
-            return sum(self.__inner_scanner__.get_size(item) for item in dat)
-        else:
-            return len(dat)
-
-
-class DataProviderConverter(object):
-    def __init__(self, input_types):
-        self.input_types = input_types
-        assert isinstance(self.input_types, collections.Sequence)
-        for each in self.input_types:
-            assert isinstance(each, dp2.InputType)
-
-    def convert(self, dat, argument=None):
-        if argument is None:
-            argument = swig_paddle.Arguments.createArguments(0)
-        assert isinstance(argument, swig_paddle.Arguments)
-        argument.resize(len(self.input_types))
-
-        scanners = [
-            DataProviderConverter.create_scanner(i, each_type)
-            for i, each_type in enumerate(self.input_types)
-        ]
-
-        for each_sample in dat:
-            for each_step, scanner in itertools.izip(each_sample, scanners):
-                scanner.pre_scan(each_step)
-
-        for scanner in scanners:
-            scanner.finish_pre_scan(argument)
-
-        for each_sample in dat:
-            for each_step, scanner in itertools.izip(each_sample, scanners):
-                scanner.scan(each_step)
-
-        for scanner in scanners:
-            scanner.finish_scan(argument)
-
-        return argument
-
-    def __call__(self, dat, argument=None):
-        return self.convert(dat, argument)
-
-    @staticmethod
-    def create_scanner(i, each):
-        assert isinstance(each, dp2.InputType)
-        retv = None
-        if each.type == dp2.DataType.Dense:
-            retv = DenseScanner(each, i)
-        elif each.type == dp2.DataType.Index:
-            retv = IndexScanner(each, i)
-        elif each.type == dp2.DataType.SparseNonValue:
-            retv = SparseBinaryScanner(each, i)
-        elif each.type == dp2.DataType.SparseValue:
-            retv = SparseFloatScanner(each, i)
-        assert retv is not None
-
-        if each.seq_type == dp2.SequenceType.SUB_SEQUENCE:
-            retv = SequenceScanner(
-                each, i, retv,
-                lambda a, p, seq: a.setSlotSubSequenceStartPositions(p, seq))
-
-        if each.seq_type in [
-                dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE
-        ]:
-            retv = SequenceScanner(
-                each, i, retv,
-                lambda a, p, seq: a.setSlotSequenceStartPositions(p, seq))
-        return retv
diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
deleted file mode 100644
index 3ae8dbf964c68c6f01ba30cb3ac69fb6c2f08c30..0000000000000000000000000000000000000000
--- a/paddle/py_paddle/util.py
+++ /dev/null
@@ -1,578 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Some Useful method for py_paddle.
-"""
-
-import swig_paddle
-import os
-import paddle.trainer.PyDataProviderWrapper
-import paddle.proto.ParameterConfig_pb2
-import paddle.proto.ModelConfig_pb2
-import paddle.proto.TrainerConfig_pb2
-import weakref
-import numpy
-import struct
-import sys
-import copy
-
-
-def initializePaddle(*args):
-    """
-    To initialize paddle process.
-    :param args: Command line options, such as --use_gpu=0, etc.
-    :return: Nothing.
-    """
-    old_argv = copy.deepcopy(sys.argv)
-    old_pypath = os.getenv("PYTHONPATH")
-    pypath = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-    if old_pypath is not None:
-        pypath = os.pathsep.join([pypath, old_pypath])
-        os.putenv("PYTHONPATH", pypath)
-    args = [""] + list(args)  # argv[0] is command name, it is not important.
-    swig_paddle.__initPaddle__(args)
-    sys.argv = old_argv
-
-
-def __monkeypatch_init_paddle__():
-    swig_paddle.__initPaddle__ = swig_paddle.initPaddle
-    swig_paddle.initPaddle = initializePaddle
-
-
-class __ParameterCallbackWrapper__(swig_paddle.UpdateCallback):
-    """
-    Wrap the python callable object to paddle.UpdateCallback.
-
-    INTERNAL USE ONLY.
-    """
-
-    def __init__(self, callback):
-        swig_paddle.UpdateCallback.__init__(self)
-        self.callback = callback
-
-    def apply(self, param):
-        self.callback(param)
-
-    @staticmethod
-    def wrap(callback):
-        """
-        Cast the python callable object/paddle.UpdateCallback to
-        swig_paddle.UpdateCallback.__disown__
-        :param callback: callable or swig_paddle.UpdateCallback object.
-        """
-        if isinstance(callback, swig_paddle.UpdateCallback):
-            return callback.__disown__()
-        elif isinstance(callback, weakref.ProxyType):
-            raise RuntimeError("Should not pass __disown__ object")
-        else:
-            return __ParameterCallbackWrapper__(callback).__disown__()
-
-
-def __arguments_to_numpy__(i, arg):
-    assert isinstance(arg, swig_paddle.Arguments)
-    value = arg.getSlotValue(i)
-    ids = arg.getSlotIds(i)
-    prob = arg.getSlotIn(i)
-    if value is not None:
-        assert isinstance(value, swig_paddle.Matrix)
-        value = value.copyToNumpyMat()
-    if ids is not None:
-        assert isinstance(ids, swig_paddle.IVector)
-        ids = ids.copyToNumpyArray()
-    if prob is not None:
-        assert isinstance(prob, swig_paddle.Matrix)
-        prob = prob.copyToNumpyMat()
-    return {"value": value, "id": ids, "prob": prob}
-
-
-def __monkeypatch_gradient_machine__():
-    """
-    Add some class methods to GradientMachine.
-    This method should be only used internally.
-    """
-    swig_paddle.GradientMachine.loadFromConfigFile = \
-        staticmethod(loadGradientMachine)
-
-    def __matrix_to_numpy__(m):
-        if isinstance(m, swig_paddle.Matrix):
-            return m.copyToNumpyMat()
-        elif isinstance(m, swig_paddle.IVector):
-            return m.copyToNumpyArra()
-        else:
-            raise RuntimeError("Input arg should be matrix or vecotr.")
-
-    def createFromConfigProto(protoObj,
-                              createMode=swig_paddle.CREATE_MODE_NORMAL,
-                              paramTypes=[
-                                  swig_paddle.PARAMETER_VALUE,
-                                  swig_paddle.PARAMETER_GRADIENT,
-                                  swig_paddle.PARAMETER_MOMENTUM
-                              ]):
-        """
-        Create Gradient Machine From Proto object.
-        :param protoObj: Model config
-        :type protoObj: proto.ModelConfig_pb2.ModelConfig
-        :param createMode: Create Mode, default is normal.
-        :type createMode: int
-        :param paramTypes: the gradient machine parameter type.
-        :type paramTypes: list of int
-        :return: paddle.GradientMachine
-        """
-        assert isinstance(protoObj, paddle.proto.ModelConfig)
-        return swig_paddle.GradientMachine.createByConfigProtoStr(
-            protoObj.SerializeToString(), createMode, paramTypes)
-
-    swig_paddle.GradientMachine.createFromConfigProto = \
-        staticmethod(createFromConfigProto)
-
-    def forwardTest(self, inArgs):
-        """
-        forwardTest. forward gradient machine in test mode, and return a numpy
-        matrix dict.
-
-        :param inArgs: The input arguments
-        :type inArgs: paddle.Arguments
-        :return: A dictionary with keys ['id', 'value'], each value is a
-                 numpy.ndarray.
-        """
-        outArgs = swig_paddle.Arguments.createArguments(0)
-        self.forward(inArgs, outArgs, swig_paddle.PASS_TEST)
-        return [
-            __arguments_to_numpy__(i, outArgs)
-            for i in xrange(outArgs.getSlotNum())
-        ]
-
-    swig_paddle.GradientMachine.forwardTest = forwardTest
-
-    # Monkey patching backward
-    swig_paddle.GradientMachine.__backward__ = swig_paddle.GradientMachine.backward
-
-    def backward(self, callback):
-        """
-        GradientMachine Backward
-        :param callback: a callback which parameter is (paddle.Parameter) or
-                         a paddle.UpdateCallback object.
-        """
-        self.__backward__(__ParameterCallbackWrapper__.wrap(callback))
-
-    swig_paddle.GradientMachine.backward = backward
-
-    # Monkey patching forwardBackward.
-    swig_paddle.GradientMachine.__forwardBackward__ = \
-        swig_paddle.GradientMachine.forwardBackward
-
-    def forwardBackward(self,
-                        inArgs,
-                        outArgs,
-                        passType,
-                        callback=swig_paddle.UpdateCallback()):
-        """
-        GradientMachine forward backward.
-        :param inArgs: Input Arguments for GradientMachine.
-        :type inArgs: paddle.Arguments
-        :param outArgs: Output Arguments for GradientMachine.
-        :type outArgs: paddle.Arguments
-        :param passType: gradient machine's pass type.
-        :type passType: paddle.PassType
-        :param callback: a callable object with arguments (paddle.Parameter) or
-                         a paddle.UpdateCallback it will be called when
-                         backward
-        """
-        self.__forwardBackward__(inArgs, outArgs, passType,
-                                 __ParameterCallbackWrapper__.wrap(callback))
-
-    swig_paddle.GradientMachine.forwardBackward = forwardBackward
-
-    def getParameters(self):
-        return (self.getParameter(i) for i in xrange(self.getParameterSize()))
-
-    swig_paddle.GradientMachine.getParameters = getParameters
-
-    def getNonStaticParameters(self):
-        return (self.getNonStaticParameter(i)
-                for i in xrange(self.getNonStaticParameterSize()))
-
-    swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
-
-    def getLayerOutputs(self, layerNames):
-        """
-        getLayerOutputs. get outputs of layers and return a numpy matrix dict.
-        :param layerNames: layer names.
-        :type layerNames: string or list.
-        """
-        if isinstance(layerNames, basestring):
-            layerNames = [layerNames]
-        elif not isinstance(layerNames, list):
-            raise RuntimeError("Input args shuld be string or a sting list.")
-
-        output = dict()
-        for name in layerNames:
-            output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name))
-        return output
-
-    swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs
-
-
-def loadGradientMachine(config_filename, model_dir=None):
-    """
-    Load a gradient machine from config file name/path.
-    :param config_filename: The trainer config file name/path
-    :param model_dir: The model parameter directory. None if same as the
-    directory of config_filename
-    :return: GradientMachine with some enhance methods.
-    :rtype: paddle.GradientMachine
-    """
-    trainer_config = swig_paddle.TrainerConfig.createFromTrainerConfigFile(
-        config_filename)
-    assert isinstance(trainer_config, swig_paddle.TrainerConfig)
-    model_conf = trainer_config.getModelConfig()
-    network = swig_paddle.GradientMachine.createByModelConfig(model_conf)
-    assert isinstance(network, swig_paddle.GradientMachine)
-    if model_dir is None:
-        model_dir = os.path.dirname(config_filename)
-    network.loadParameters(model_dir)
-    return network
-
-
-def loadParameterFile(fn):
-    """
-    Load Paddle Parameter file to numpy.ndarray
-    :param fn: file name or file like object.
-    :type fn: str or file like object.
-    :return: numpy array
-    :rtype: numpy.ndarray
-    :raise: paddle.UnsupportError when parameter format is wrong.
-    """
-    if isinstance(fn, str):
-        with open(fn, 'rb') as f:
-            return loadParameterFile(f)
-    elif hasattr(fn, 'read'):  # File like object
-        version, = struct.unpack('i', fn.read(4))
-        if version != 0:
-            raise swig_paddle.UnsupportError()
-        value_length, = struct.unpack("I", fn.read(4))
-        if value_length != 4 and value_length != 8:
-            raise swig_paddle.UnsupportError()
-        dtype = 'float32' if value_length == 4 else 'float64'
-        param_size, = struct.unpack("L", fn.read(8))
-        value = numpy.fromfile(fn, dtype)
-        if len(value) != param_size:
-            raise swig_paddle.UnsupportError()
-        return value
-    else:
-        raise swig_paddle.UnsupportError()
-
-
-class DataProviderWrapperConverter(object):
-    """
-    A class convert DataFormat from PyDataProvider Wrapper to
-    py_paddle.paddle.Arguemnts.
-    """
-
-    class DenseValueConverter(object):
-        """
-        Internal class
-        """
-
-        def __init__(self, header_def):
-            self.__dim__ = header_def.dim
-            self.buf = []
-
-        def append(self, other):
-            assert len(other) == self.__dim__
-            self.buf += other
-
-        def __call__(self, slot_idx, arg):
-            mat = swig_paddle.Matrix.createDense(self.buf,
-                                                 len(self.buf) / self.__dim__,
-                                                 self.__dim__)
-            arg.setSlotValue(slot_idx, mat)
-
-    class IdValueConverter(object):
-        """
-        Internal class
-        """
-
-        def __init__(self, *args):
-            self.buf = []
-
-        def append(self, other):
-            assert isinstance(other, int)
-            self.buf.append(other)
-
-        def __call__(self, slot_idx, arg):
-            arg.setSlotIds(slot_idx, swig_paddle.IVector.create(self.buf))
-
-    class SparseNonValueConverter(object):
-        """
-        Internal class
-        """
-
-        def __init__(self, slot_def):
-            self.indices = [0]
-            self.cols = []
-            self.dim = slot_def.dim
-
-        def append(self, other):
-            self.indices.append(self.indices[-1] + len(other))
-            self.cols += other
-
-        def __call__(self, slot_idx, arg):
-            mat = swig_paddle.Matrix.createSparse(
-                len(self.indices) - 1, self.dim, len(self.cols), True)
-            assert isinstance(mat, swig_paddle.Matrix)
-            mat.sparseCopyFrom(self.indices, self.cols)
-            self.putIntoArg(slot_idx, arg, mat)
-
-        def putIntoArg(self, slot_idx, arg, mat):
-            arg.setSlotValue(slot_idx, mat)
-
-    class SparseValueConverter(SparseNonValueConverter):
-        """
-        Internal class
-        """
-
-        def __init__(self, slot_def):
-            super(DataProviderWrapperConverter.SparseValueConverter,
-                  self).__init__(slot_def)
-            self.values = []
-
-        def append(self, other):
-            super(DataProviderWrapperConverter.SparseValueConverter,
-                  self).append(map(lambda x: x[0], other))
-            self.values += map(lambda x: x[1], other)
-
-        def __call__(self, slot_idx, arg):
-            mat = swig_paddle.Matrix.createSparse(
-                len(self.indices) - 1, self.dim, len(self.cols), False)
-            assert isinstance(mat, swig_paddle.Matrix)
-            mat.sparseCopyFrom(self.indices, self.cols, self.values)
-            self.putIntoArg(slot_idx, arg, mat)
-
-    __SLOT_VALUE_CONVERTER_MAP__ = {
-        paddle.trainer.PyDataProviderWrapper.DenseSlot: DenseValueConverter,
-        paddle.trainer.PyDataProviderWrapper.IndexSlot: IdValueConverter,
-        paddle.trainer.PyDataProviderWrapper.SparseNonValueSlot:
-        SparseNonValueConverter,
-        paddle.trainer.PyDataProviderWrapper.SparseValueSlot:
-        SparseValueConverter
-    }
-
-    def __init__(self, use_seq, header):
-        """
-        Ctor
-        :param use_seq: True if use sequence.
-        :param header:  List of slots type,
-                       trainer.PyDataProviderWrapper.SlotType
-        """
-        self.__use_seq__ = use_seq
-        self.__header__ = header
-
-    def convert(self, wrapper_data, argument=None):
-        """
-        Convert PyDataProviderWrapper format to paddle.Argument
-        :param wrapper_data: PyDataProviderWrapper yield's data list.
-        :param argument: The output paddle.Arguments.
-                        If it is not None, it will assign data in this
-                        arguments, else it will create new arguments.
-        :return: arguments that contains data.
-        :rtype: paddle.Arguments
-        """
-        if argument is None:
-            argument = swig_paddle.Arguments.createArguments(0)
-        assert isinstance(argument, swig_paddle.Arguments)
-        argument.resize(len(self.__header__))
-
-        values = map(
-            lambda x: DataProviderWrapperConverter.__SLOT_VALUE_CONVERTER_MAP__[x.__class__](x),
-            self.__header__)
-
-        if self.__use_seq__:
-            seq_dim = [[] for _ in xrange(self.__header__.__len__())]
-            seq_start_pos = [[0] for _ in xrange(self.__header__.__len__())]
-
-            for each_sample in wrapper_data:
-                for slot_idx, sequence in enumerate(each_sample):
-                    for raw_data in sequence:
-                        values[slot_idx].append(raw_data)
-                    seq_start_pos[slot_idx].append(seq_start_pos[slot_idx][-1] +
-                                                   len(sequence))
-                    seq_dim[slot_idx].append(len(sequence))
-
-            for slot_idx in xrange(len(self.__header__)):
-                argument.setSlotSequenceDim(
-                    slot_idx, swig_paddle.IVector.create(seq_dim[slot_idx]))
-                argument.setSlotSequenceStartPositions(
-                    slot_idx,
-                    swig_paddle.IVector.create(seq_start_pos[slot_idx]))
-        else:
-            for each_sample in wrapper_data:
-                for raw_data, value in zip(each_sample, values):
-                    value.append(raw_data)
-
-        for i, v in enumerate(values):
-            v(i, argument)
-
-        return argument
-
-    def __call__(self, wrapper_data, argument=None):
-        """
-        Invoke self.convert. See documents in self.convert.
-        """
-        return self.convert(wrapper_data, argument)
-
-
-def __monkey_patch_protobuf_objects__():
-    def ParameterConfig_toProto(self):
-        """
-        Convert paddle.ParameterConfig to
-        proto.ParameterConfig_pb2.ParameterConfig
-
-        :return: proto.ParameterConfig_pb2.ParameterConfig object.
-        """
-        param_conf = paddle.proto.ParameterConfig_pb2.ParameterConfig()
-        param_conf.ParseFromString(self.toProtoString())
-        return param_conf
-
-    swig_paddle.ParameterConfig.toProto = ParameterConfig_toProto
-
-    def OptimizationConfig_toProto(self):
-        """
-        Convert paddle.OptimizationConfig to
-        proto.TrainerConfig_pb2.OptimizationConfig
-
-        :return: proto.TrainerConfig_pb2.OptimizationConfig
-        """
-        opt_conf = proto.TrainerConfig_pb2.OptimizationConfig()
-        opt_conf.ParseFromString(self.toProtoString())
-        return opt_conf
-
-    swig_paddle.OptimizationConfig.toProto = OptimizationConfig_toProto
-
-    def OptimizationConfig_createFromProto(protoObj):
-        """
-        Create a new paddle.OptimizationConfig from
-        proto.TrainerConfig_pb2.OptimizationConfig
-
-        :param protoObj: proto.TrainerConfig_pb2.OptimizationConfig
-        :return: paddle.OptimizationConfig
-        """
-
-        assert isinstance(protoObj, paddle.proto.OptimizationConfig)
-        return swig_paddle.OptimizationConfig.createFromProtoString(
-            protoObj.SerializeToString())
-
-    swig_paddle.OptimizationConfig.createFromProto = staticmethod(
-        OptimizationConfig_createFromProto)
-
-    def TrainerConfig_createFromProto(protoObj):
-        """
-        Create a new paddle.TrainerConfig from
-        proto.OptimizationConfig
-
-        :param protoObj: proto.TrainerConfig
-        :return: paddle.TrainerConfig
-        """
-        assert isinstance(protoObj, paddle.proto.TrainerConfig)
-        return swig_paddle.TrainerConfig.createFromProtoString(
-            protoObj.SerializeToString())
-
-    swig_paddle.TrainerConfig.createFromProto = staticmethod(
-        TrainerConfig_createFromProto)
-
-
-def __monkey_patch_parameter__():
-    def getBufs(self):
-        """
-        get all parameter vectors.
-        NOTE: the return value is a generator. Maybe you need to cast to
-        list or tuple or something else.
-
-        :return: generator of all parameter vectors.
-        :rtype: generator
-        """
-        return (self.getBuf(i) for i in xrange(swig_paddle.NUM_PARAMETER_TYPES))
-
-    swig_paddle.Parameter.getBufs = getBufs
-
-
-def __monkey_patch_trainer__():
-    swig_paddle.Trainer.__create__ = staticmethod(swig_paddle.Trainer.create)
-
-    def Trainer_create(config, model=None):
-        """
-        Create a trainer for model with TrainerCOnfig trainer_config
-        trainer_config.model_config will be ignored when model is supplied.
-        Trainer.trainOneBatch() and Trainer.forwardOneBatch() can be used only
-        when trainer_config.data_config is set.
-
-        A typical usage for Trainer is:
-        .. code-block:: python
-           trainer = Trainer.create(trainer_config, model)
-           for p in xrange(num_passes)
-               while True:
-                   data = get_next_batch(batch_size)
-                   if not data:
-                       break
-                   trainer.trainOneDataBatch(batch_size, data)
-               trainer.finishTrainPass()
-           trainer.finishTrain()
-
-        The trainer will take care of logging, model saving, distributed
-        training, etc.
-
-        :param config: trainer configuration
-        :type config: paddle.proto.TrainerConfig
-        :param model: the model to be trained
-        :type model: swig_paddle.GradientMachine
-        :return: a trainer
-        :rtype swig_paddle.Trainer
-
-        """
-        assert isinstance(config, paddle.proto.TrainerConfig)
-        if model is not None:
-            assert isinstance(model, swig_paddle.GradientMachine)
-        return swig_paddle.Trainer.__create__(
-            swig_paddle.TrainerConfig.createFromProto(config), model)
-
-    swig_paddle.Trainer.create = staticmethod(Trainer_create)
-
-    swig_paddle.Trainer.__getForwardOutput__ = \
-        swig_paddle.Trainer.getForwardOutput
-
-    def getForwardOutput(self):
-        """
-        Get the netword outputs from the previous trainOneBatch(),
-        trainOneDataBatch(), testOneDataPatch(), or forwardOneBatch() call.
-
-        :return: list of dictionary with keys ['id', 'value'], each value is a
-                 numpy.ndarray.
-        """
-        outArgs = self.__getForwardOutput__()
-        return [
-            __arguments_to_numpy__(i, outArgs)
-            for i in xrange(outArgs.getSlotNum())
-        ]
-
-    swig_paddle.Trainer.getForwardOutput = getForwardOutput
-
-
-def monkeypatches():
-    patches = [
-        __monkeypatch_init_paddle__, __monkeypatch_gradient_machine__,
-        __monkey_patch_protobuf_objects__, __monkey_patch_parameter__,
-        __monkey_patch_trainer__
-    ]
-    for patch in patches:
-        patch()
diff --git a/paddle/scripts/README.md b/paddle/scripts/README.md
index 277222450648e349cfff33a96fd657f03a02e597..dd3242f62baa152a8977ac8da049323a0f1abcfd 100644
--- a/paddle/scripts/README.md
+++ b/paddle/scripts/README.md
@@ -68,7 +68,6 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
 | `WITH_TESTING` | OFF | Build unit tests binaries. |
 | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. |
 | `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. |
-| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
 | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
 | `WITH_STYLE_CHECK` | ON | Check the code style when building. |
 | `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu |
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index cbd39d7a5d9686be71bde6c9fd7b6d03eef73e9a..a06952782b39fdf35e23b039fb21fed75213b354 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -180,7 +180,6 @@ function cmake_gen() {
         -DWITH_GOLANG=${WITH_GOLANG:-OFF}
         -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
         -DWITH_PYTHON=${WITH_PYTHON:-ON}
-        -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
         -DCUDNN_ROOT=/usr/
         -DWITH_TESTING=${WITH_TESTING:-ON}
         -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
@@ -214,7 +213,6 @@ EOF
         -DWITH_AVX=${WITH_AVX:-OFF} \
         -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
         -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-        -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
         -DWITH_PYTHON=${WITH_PYTHON:-ON} \
         -DCUDNN_ROOT=/usr/ \
         -DWITH_TESTING=${WITH_TESTING:-ON} \
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 37ad77549c88166186d9bf354d6bfd4c6bef152c..59e695e6fcb66cbaed1bcc9e861df81b5f73c1ed 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -39,7 +39,6 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE})
 IF(WIN32)
     add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
             COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/
-            COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
             COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
             COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
             COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
@@ -48,7 +47,6 @@ ELSE(WIN32)
 	add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
 		COMMAND touch stub.cc
 		COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
-		COMMAND cp -r ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
 		COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
 		COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
 		COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python