diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
index 36a2efc0ce113e7ca47243d7a1dee1ca998edd03..4bfcba6c3ce312e21e281e32fe1cb92ef45fda6f 100644
--- a/paddle/fluid/operators/ngraph/ngraph_bridge.cc
+++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
@@ -43,6 +43,7 @@ std::map<std::string,
         {"fill_constant", NG_OPS::BuildFillConstantNode},
         {"mean", NG_OPS::BuildMeanNode},
         {"mean_grad", NG_OPS::BuildMeanGradNode},
+        {"momentum", NG_OPS::BuildMomentumNode},
         {"mul", NG_OPS::BuildMulNode},
         {"mul_grad", NG_OPS::BuildMulGradNode},
         {"pool2d", NG_OPS::BuildPool2dNode},
diff --git a/paddle/fluid/operators/ngraph/ngraph_ops.h b/paddle/fluid/operators/ngraph/ngraph_ops.h
index c2fdbc670072648ac05840eec636de8ca383406b..8edb4dd2a10787c334e0c630a54fc9b833ac0e60 100644
--- a/paddle/fluid/operators/ngraph/ngraph_ops.h
+++ b/paddle/fluid/operators/ngraph/ngraph_ops.h
@@ -30,6 +30,7 @@ limitations under the License. */
 #include "ops/elementwise_add_op.h"
 #include "ops/fill_constant_op.h"
 #include "ops/mean_op.h"
+#include "ops/momentum_op.h"
 #include "ops/mul_op.h"
 #include "ops/pool2d_op.h"
 #include "ops/scale_op.h"
diff --git a/paddle/fluid/operators/ngraph/ops/momentum_op.h b/paddle/fluid/operators/ngraph/ops/momentum_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..f1b365c488d31c437ec17f5872c2c739e00a86a7
--- /dev/null
+++ b/paddle/fluid/operators/ngraph/ops/momentum_op.h
@@ -0,0 +1,101 @@
+/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "ngraph/ngraph.hpp"
+#include "paddle/fluid/platform/ngraph_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace ngraphs {
+
+void BuildMomentumNode(
+    const std::shared_ptr<paddle::framework::OperatorBase>& op,
+    std::shared_ptr<
+        std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
+        ngb_node_map) {
+  auto op_attrs = paddle::framework::AttrReader(op->Attrs());
+  auto param = paddle::platform::GetInputNode(op, "Param", ngb_node_map);
+  auto grad = paddle::platform::GetInputNode(op, "Grad", ngb_node_map);
+  auto velocity = paddle::platform::GetInputNode(op, "Velocity", ngb_node_map);
+  auto learning_rate =
+      paddle::platform::GetInputNode(op, "LearningRate", ngb_node_map);
+
+  auto mu = op_attrs.Get<float>("mu");
+  bool use_nesterov = op_attrs.Get<bool>("use_nesterov");
+
+  auto param_shape = param->get_shape();
+  auto velocity_shape = velocity->get_shape();
+  auto grad_shape = grad->get_shape();
+  auto lr_shape = learning_rate->get_shape();
+
+  auto shape_velocity = ngraph::Shape{velocity_shape};
+  auto mu_create =
+      ngraph::op::Constant::create(ngraph::element::f32, shape_velocity, {mu});
+
+  auto vel_mul = std::make_shared<ngraph::op::Multiply>(velocity, mu_create);
+  auto vel_out = std::make_shared<ngraph::op::Add>(vel_mul, grad);
+
+  ngraph::NodeVector result;
+  if (use_nesterov) {
+    auto mul_res = std::make_shared<ngraph::op::Multiply>(vel_out, mu_create);
+    auto add_res = std::make_shared<ngraph::op::Add>(grad, mul_res);
+
+    auto add_2d = paddle::platform::FlattenTo2d(add_res->get_shape(), 0);
+    auto vel_reshape = paddle::platform::NgReshaper(vel_out, add_2d);
+
+    auto lr_bcast = std::make_shared<ngraph::op::Broadcast>(
+        learning_rate, vel_reshape->get_shape(),
+        ngraph::AxisSet{vel_reshape->get_shape().size() - 1});
+
+    auto lr_1d = paddle::platform::FlattenTo1d(lr_bcast->get_shape(), 0);
+    auto lr_reshape = std::make_shared<ngraph::op::Reshape>(
+        lr_bcast, ngraph::AxisVector{0, 1}, lr_1d);
+
+    lr_reshape = std::make_shared<ngraph::op::Reshape>(
+        lr_reshape, ngraph::AxisVector{0}, param->get_shape());
+
+    auto mul_res1 = std::make_shared<ngraph::op::Multiply>(add_res, lr_reshape);
+    auto res = std::make_shared<ngraph::op::Subtract>(param, mul_res1);
+    paddle::platform::SetOutputNode(op, "ParamOut", res, ngb_node_map);
+  } else {
+    auto vel_2d = paddle::platform::FlattenTo2d(vel_out->get_shape(), 0);
+    auto vel_reshape = paddle::platform::NgReshaper(vel_out, vel_2d);
+
+    auto lr_bcast = std::make_shared<ngraph::op::Broadcast>(
+        learning_rate, vel_reshape->get_shape(),
+        ngraph::AxisSet{vel_reshape->get_shape().size() - 1});
+
+    auto lr_1d = paddle::platform::FlattenTo1d(lr_bcast->get_shape(), 0);
+    auto lr_reshape = std::make_shared<ngraph::op::Reshape>(
+        lr_bcast, ngraph::AxisVector{0, 1}, lr_1d);
+
+    lr_reshape = std::make_shared<ngraph::op::Reshape>(
+        lr_reshape, ngraph::AxisVector{0}, param->get_shape());
+
+    auto mul_result =
+        std::make_shared<ngraph::op::Multiply>(lr_reshape, vel_out);
+
+    auto res = std::make_shared<ngraph::op::Subtract>(param, mul_result);
+    paddle::platform::SetOutputNode(op, "ParamOut", res, ngb_node_map);
+  }
+  paddle::platform::SetOutputNode(op, "VelocityOut", vel_out, ngb_node_map);
+}
+
+}  // namespace ngraphs
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/platform/ngraph_helper.h b/paddle/fluid/platform/ngraph_helper.h
index 5ee985ea719f8cb28bf8be23823eb6c96f4af1a3..e74f57a79a66ea8fe8c9b972a9a2ec9d722731eb 100644
--- a/paddle/fluid/platform/ngraph_helper.h
+++ b/paddle/fluid/platform/ngraph_helper.h
@@ -43,6 +43,13 @@ std::shared_ptr<ngraph::Node> Nchw2Nhwc(std::shared_ptr<ngraph::Node> in) {
   return std::make_shared<ngraph::op::Reshape>(in, axis_vec, in_shape);
 }
 
+ngraph::Shape FlattenTo1d(ngraph::Shape sh, int num) {
+  auto x1 = std::accumulate(std::begin(sh), std::end(sh) + num, 1,
+                            std::multiplies<size_t>());
+  size_t x1_l = (size_t)x1;
+  return ngraph::Shape{x1_l};
+}
+
 ngraph::Shape FlattenTo2d(ngraph::Shape sh, int num) {
   auto x1 = std::accumulate(std::begin(sh), std::begin(sh) + num, 1,
                             std::multiplies<size_t>());
diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
index 9a185eb97ca6ad4d7987f1a422073d3c8db0d8df..3057218a1d80deffe7eb3164c2350143fc38007d 100644
--- a/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
+++ b/python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
@@ -1,4 +1,4 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,261 +15,7 @@
 from __future__ import print_function
 
 import unittest
-import numpy as np
-import paddle.fluid.core as core
-from paddle.fluid.tests.unittests.op_test import OpTest, randomize_probability
-
-
-class TestCrossEntropyOp(OpTest):
-    """Test cross-entropy with discrete one-hot labels.
-    """
-
-    def setUp(self):
-        self.op_type = "cross_entropy"
-        self.soft_label = False
-        self.ignore_index = -100
-        self.dtype = np.float64
-        self.batch_size = 30
-        self.class_num = 10
-        self._cpu_only = True
-
-        self.init_dtype_type()
-        self.init_attr_type()
-        self.init_bs_class_num()
-        self.init_x()
-        self.init_label()
-        self.get_cross_entropy()
-
-        self.inputs = {"X": self.x, "Label": self.label}
-        self.outputs = {"Y": self.cross_entropy}
-        self.attrs = {
-            "soft_label": self.soft_label,
-            "ignore_index": self.ignore_index
-        }
-
-    def init_x(self):
-        self.x = randomize_probability(
-            self.batch_size, self.class_num, dtype=self.dtype)
-
-    def init_label(self):
-        self.label = np.random.randint(
-            0, self.class_num, (self.batch_size, 1), dtype="int64")
-
-    def get_cross_entropy(self):
-        self.cross_entropy = np.asmatrix(
-            [[-np.log(self.x[i][self.label[i][0]])]
-             for i in range(self.x.shape[0])],
-            dtype="float64")
-
-    def init_attr_type(self):
-        pass
-
-    def init_dtype_type(self):
-        pass
-
-    def init_bs_class_num(self):
-        pass
-
-    def test_check_output(self):
-        self.check_output()
-
-    def test_check_grad(self):
-        self.check_grad(["X"], "Y", numeric_grad_delta=0.001)
-
-
-class TestCrossEntropyOp2(TestCrossEntropyOp):
-    """Test cross-entropy with vectorized soft labels.
-    """
-
-    def init_label(self):
-        self.label = np.random.uniform(
-            0.1, 1.0, [self.batch_size, self.class_num]).astype(self.dtype)
-        self.label /= self.label.sum(axis=1, keepdims=True)
-
-    def get_cross_entropy(self):
-        self.cross_entropy = (-self.label * np.log(self.x)).sum(
-            axis=1, keepdims=True).astype(self.dtype)
-
-    def init_attr_type(self):
-        self.soft_label = True
-
-    def init_dtype_type(self):
-        self.dtype = np.float32
-
-    def init_bs_class_num(self):
-        self.batch_size = 5
-        self.class_num = 37
-
-    def test_check_grad(self):
-        self.check_grad(
-            ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
-
-
-class TestCrossEntropyOp3(TestCrossEntropyOp):
-    """Test cross-entropy with vectorized one-hot representation of labels.
-    """
-
-    def init_label(self):
-        self.label_index = np.random.randint(0, self.class_num,
-                                             (self.batch_size))
-        self.label = np.zeros(self.x.shape).astype(self.dtype)
-        self.label[np.arange(self.batch_size), self.label_index] = 1
-
-    def get_cross_entropy(self):
-        self.cross_entropy = np.asmatrix(
-            [[-np.log(self.x[i][self.label_index[i]])]
-             for i in range(self.x.shape[0])]).astype(self.dtype)
-
-    def init_attr_type(self):
-        self.soft_label = True
-
-    def init_dtype_type(self):
-        self.dtype = np.float32
-
-    def init_bs_class_num(self):
-        self.batch_size = 5
-        self.class_num = 17
-
-    def test_check_grad(self):
-        self.check_grad(
-            ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
-
-
-class TestCrossEntropyOp4(TestCrossEntropyOp):
-    """Test high rank tensor cross-entropy with discrete one-hot labels.
-    """
-
-    def init_x(self):
-        self.shape = [10, 2, 4]
-        self.ins_num = np.prod(np.array(self.shape))
-        self.X_2d = randomize_probability(self.ins_num,
-                                          self.class_num).astype(self.dtype)
-        self.x = self.X_2d.reshape(self.shape + [self.class_num])
-
-    def init_label(self):
-        self.label_2d = np.random.randint(
-            0, self.class_num, (self.ins_num, 1), dtype="int64")
-        self.label = self.label_2d.reshape(self.shape + [1])
-
-    def get_cross_entropy(self):
-        cross_entropy_2d = np.asmatrix(
-            [[-np.log(self.X_2d[i][self.label_2d[i][0]])]
-             for i in range(self.X_2d.shape[0])]).astype(self.dtype)
-        self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape +
-                                                                [1])
-
-    def init_attr_type(self):
-        self.soft_label = False
-
-    def init_dtype_type(self):
-        self.dtype = np.float64
-
-    def init_bs_class_num(self):
-        self.class_num = 10
-
-
-class TestCrossEntropyOp5(TestCrossEntropyOp):
-    """Test high rank tensor cross-entropy with vectorized soft labels.
-    """
-
-    def init_x(self):
-        self.shape = [4, 3]
-        self.ins_num = np.prod(np.array(self.shape))
-        self.X_2d = randomize_probability(self.ins_num,
-                                          self.class_num).astype(self.dtype)
-        self.x = self.X_2d.reshape(self.shape + [self.class_num])
-
-    def init_label(self):
-        self.label_2d = np.random.uniform(
-            0.1, 1.0, [self.ins_num, self.class_num]).astype(self.dtype)
-        self.label_2d /= self.label_2d.sum(axis=1, keepdims=True)
-        self.label = self.label_2d.reshape(self.shape + [self.class_num])
-
-    def get_cross_entropy(self):
-        cross_entropy_2d = (-self.label_2d * np.log(self.X_2d)).sum(
-            axis=1, keepdims=True).astype(self.dtype)
-        self.cross_entropy = np.array(cross_entropy_2d).reshape(self.shape +
-                                                                [1])
-
-    def init_attr_type(self):
-        self.soft_label = True
-
-    def init_dtype_type(self):
-        self.dtype = np.float32
-
-    def init_bs_class_num(self):
-        self.class_num = 37
-
-    def test_check_grad(self):
-        self.check_grad(
-            ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
-
-
-class TestCrossEntropyOp6(TestCrossEntropyOp):
-    """Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
-    """
-
-    def init_x(self):
-        self.shape = [4, 3, 2]
-        self.ins_num = np.prod(np.array(self.shape))
-        self.X_2d = randomize_probability(self.ins_num,
-                                          self.class_num).astype(self.dtype)
-        self.x = self.X_2d.reshape(self.shape + [self.class_num])
-
-    def init_label(self):
-        self.label_index_2d = np.random.randint(
-            0, self.class_num, (self.ins_num), dtype="int64")
-        label_2d = np.zeros(self.X_2d.shape)
-        label_2d[np.arange(self.ins_num), self.label_index_2d] = 1
-        self.label = label_2d.reshape(self.shape + [self.class_num]).astype(
-            self.dtype)
-
-    def get_cross_entropy(self):
-        cross_entropy_2d = np.asmatrix(
-            [[-np.log(self.X_2d[i][self.label_index_2d[i]])]
-             for i in range(self.X_2d.shape[0])])
-        self.cross_entropy = np.array(cross_entropy_2d).reshape(
-            self.shape + [1]).astype(self.dtype)
-
-    def init_attr_type(self):
-        self.soft_label = True
-
-    def init_dtype_type(self):
-        self.dtype = np.float32
-
-    def init_bs_class_num(self):
-        self.class_num = 17
-
-    def test_check_grad(self):
-        self.check_grad(
-            ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
-
-
-class TestCrossEntropyOp7(TestCrossEntropyOp):
-    """Test cross-entropy with ignore index.
-    """
-
-    def init_label(self):
-        self.label = np.random.randint(
-            0, self.class_num, (self.batch_size, 1), dtype="int64")
-
-    def get_cross_entropy(self):
-        self.cross_entropy = np.asmatrix(
-            [[-np.log(self.x[i][self.label[i][0]])]
-             if self.label[i][0] != self.ignore_index else [0]
-             for i in range(self.x.shape[0])]).astype(self.dtype)
-
-    def init_attr_type(self):
-        self.soft_label = False
-        self.ignore_index = 3
-
-    def init_dtype_type(self):
-        self.dtype = np.float64
-
-    def init_bs_class_num(self):
-        self.batch_size = 30
-        self.class_num = 10
-
+from paddle.fluid.tests.unittests.test_cross_entropy_op import TestCrossEntropyOp, TestCrossEntropyOp2, TestCrossEntropyOp3, TestCrossEntropyOp4, TestCrossEntropyOp5, TestCrossEntropyOp6, TestCrossEntropyOp7
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c3549d907f5f67abc0cbd448a492d95b8ae6c32
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ngraph/test_momentum_ngraph_op.py
@@ -0,0 +1,21 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+from paddle.fluid.tests.unittests.test_momentum_op import TestMomentumOp1, TestMomentumOp2, TestLarsMomentumOp, TestSparseMomentumOp, TestSparseMomentumOp2
+
+if __name__ == '__main__':
+    unittest.main()