diff --git a/paddle/function/context_projection_op.cpp b/paddle/function/ContextProjectionOp.cpp
similarity index 99%
rename from paddle/function/context_projection_op.cpp
rename to paddle/function/ContextProjectionOp.cpp
index a6a85fb6a46f765fcacb9406010ada14f9f6335a..3ada0b727bc6de2762586c0de8857d39ea28b004 100644
--- a/paddle/function/context_projection_op.cpp
+++ b/paddle/function/ContextProjectionOp.cpp
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "context_projection_op.h"
+#include "ContextProjectionOp.h"
 #include "paddle/math/Matrix.h"
 #include "paddle/math/Vector.h"
 
diff --git a/paddle/function/context_projection_op.h b/paddle/function/ContextProjectionOp.h
similarity index 100%
rename from paddle/function/context_projection_op.h
rename to paddle/function/ContextProjectionOp.h
diff --git a/paddle/function/context_projection_op_gpu.cu b/paddle/function/ContextProjectionOpGpu.cu
similarity index 99%
rename from paddle/function/context_projection_op_gpu.cu
rename to paddle/function/ContextProjectionOpGpu.cu
index fdea433d07e9c44361a632dc189f89f245498f9c..438826299891ce0e8257d3b49e30d8697e7b095a 100644
--- a/paddle/function/context_projection_op_gpu.cu
+++ b/paddle/function/ContextProjectionOpGpu.cu
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "hl_base.h"
-#include "context_projection_op.h"
+#include "ContextProjectionOp.h"
 
 namespace paddle {
 
@@ -327,7 +327,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(Tensor& out_grad,
                                                       int context_start,
                                                       size_t total_pad,
                                                       size_t begin_pad) {
-  CHECK(w_grad.getData() && out_grad.getData());
+  CHECK(w_grad.getData() && out_grad.getData() && sequence.getData());
   CHECK_EQ(out_grad.dims_.size(), 2);
   CHECK_EQ(w_grad.dims_.size(), 2);
   CHECK_EQ(sequence.dims_.size(), 1);
diff --git a/paddle/function/context_projection_op_test.cpp b/paddle/function/ContextProjectionOpTest.cpp
similarity index 100%
rename from paddle/function/context_projection_op_test.cpp
rename to paddle/function/ContextProjectionOpTest.cpp
diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp
index 215b3dbd837687eefea631cc79b107a65d5b33ed..6f82a8d053bc203eed44bd0d8d4c47d23a15268d 100644
--- a/paddle/function/Function.cpp
+++ b/paddle/function/Function.cpp
@@ -60,14 +60,14 @@ FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
 
 template <>
 FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
-  CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
+  CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
   valueMap_[key].i = v;
   return *this;
 }
 
 template <>
 FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
-  CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
+  CHECK_EQ(valueMap_.count(key), 0) << "Duplicated value: " << key;
   valueMap_[key].b = v;
   return *this;
 }
diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp
index 51c0ae5cc9523debffa4bdfe44fe0df0c56839c2..d6af71824cfe5975449a55aa94a2b7e4db405e0c 100644
--- a/paddle/gserver/layers/ContextProjection.cpp
+++ b/paddle/gserver/layers/ContextProjection.cpp
@@ -38,6 +38,53 @@ ContextProjection::ContextProjection(const ProjectionConfig& config,
     CHECK_EQ(inputDim * totalPad, parameter->getSize());
     weight_.reset(new Weight(totalPad, inputDim, parameter));
   }
+  // init forward_ and backward_ functions
+  init();
+}
+
+bool ContextProjection::init() {
+  size_t context_length = config_.context_length();
+  int context_start = config_.context_start();
+  bool is_padding = config_.trainable_padding();
+  size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
+  if (!useGpu_) {  // CPU functions
+    createFunction(forward_,
+                   "ContextProjectionForward-CPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+    createFunction(backward_,
+                   "ContextProjectionBackward-CPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+  } else {  // GPU functions
+    createFunction(forward_,
+                   "ContextProjectionForward-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("is_padding", is_padding));
+    createFunction(backward_,
+                   "ContextProjectionBackwardData-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start));
+
+    createFunction(backward_,
+                   "ContextProjectionBackwardWeight-GPU",
+                   FuncConfig()
+                       .set("context_length", context_length)
+                       .set("context_start", context_start)
+                       .set("begin_pad", beginPad_)
+                       .set("total_pad", total_pad));
+  }
+  return true;
 }
 
 void ContextProjection::resetState() {
@@ -78,25 +125,35 @@ LayerStatePtr ContextProjection::getState() {
 }
 
 void ContextProjection::forward() {
-  CHECK(in_->value);
+  CHECK(in_->value && out_->value);
   CHECK(in_->sequenceStartPositions);
 
-  auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
-
-  int64_t inputDim = in_->value->getWidth();
-  int64_t dim = out_->value->getWidth();
-  CHECK_EQ(dim, inputDim * config_.context_length());
+  size_t input_dim = in_->value->getWidth();
+  size_t dim = out_->value->getWidth();
+  CHECK_EQ(dim, input_dim * config_.context_length());
+  size_t batch_size = in_->value->getHeight();
+  CHECK_EQ(batch_size, out_->value->getHeight());
 
   REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
-  bool isPadding = config_.trainable_padding();
-  out_->value->contextProjectionForward(
-      *(in_->value),
-      state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr,
-      *startPositions,
-      config_.context_length(),
-      config_.context_start(),
-      beginPad_,
-      state_ ? true : isPadding);
+  bool is_padding = config_.trainable_padding();
+  /// first use state_, otherwise use weight_(padding false === w nullptr)
+  auto w_ptr =
+      state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
+  auto start_pos = in_->sequenceStartPositions;
+  /// if use state_ as weight_, w_ptr already has mem, so padding true
+  forward_[0]->init(FuncConfig()
+                        .set("context_length", config_.context_length())
+                        .set("context_start", config_.context_start())
+                        .set("begin_pad", beginPad_)
+                        .set("is_padding", state_ ? true : is_padding));
+  forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}),
+                     Tensor(w_ptr ? w_ptr->getData() : nullptr,
+                            Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
+                     Tensor(reinterpret_cast<real*>(
+                                const_cast<int*>(start_pos->getData(useGpu_))),
+                            Dims{start_pos->getSize()})},
+                    {Tensor(out_->value->getData(), Dims{batch_size, dim})},
+                    {});
 
   if (state_ && config_.context_start() < 0) {
     CHECK_EQ(1, in_->getNumSequences());
@@ -118,39 +175,46 @@ void ContextProjection::forward() {
 }
 
 void ContextProjection::backward(const UpdateCallback& callback) {
-  CHECK(in_->value);
-  int64_t inputDim = in_->value->getWidth();
-  int64_t dim = out_->value->getWidth();
-  CHECK_EQ(dim, inputDim * config_.context_length());
-  auto startPositions = in_->sequenceStartPositions->getVector(useGpu_);
+  CHECK(in_->value && out_->value && out_->grad);
+  size_t input_dim = in_->value->getWidth();
+  size_t dim = out_->value->getWidth();
+  CHECK_EQ(dim, input_dim * config_.context_length());
+  size_t batch_size = in_->value->getHeight();
+  CHECK_EQ(batch_size, out_->value->getHeight());
 
   REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
-  bool isPadding = config_.trainable_padding();
+  bool is_padding = config_.trainable_padding();
+  auto start_pos = in_->sequenceStartPositions;
   if (!out_->grad->useGpu()) {
-    out_->grad->contextProjectionBackward(
-        in_->grad.get(),
-        isPadding ? weight_->getWGrad().get() : nullptr,
-        *startPositions,
-        config_.context_length(),
-        config_.context_start(),
-        beginPad_,
-        isPadding);
+    auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
+    backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
+                               Dims{batch_size, input_dim}),
+                        Tensor(w_ptr ? w_ptr->getData() : nullptr,
+                               Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
+                        Tensor(reinterpret_cast<real*>(const_cast<int*>(
+                                   start_pos->getData(useGpu_))),
+                               Dims{start_pos->getSize()})},
+                       {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+                       {});
   } else {
     if (in_->grad) {
-      out_->grad->contextProjectionBackwardData(*(in_->grad),
-                                                *startPositions,
-                                                config_.context_length(),
-                                                config_.context_start());
+      backward_[0]->calc(
+          {Tensor(in_->grad->getData(), Dims{batch_size, input_dim}),
+           Tensor(reinterpret_cast<real*>(
+                      const_cast<int*>(start_pos->getData(useGpu_))),
+                  Dims{start_pos->getSize()})},
+          {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+          {});
     }
-
-    if (isPadding && weight_->getWGrad()) {
-      out_->grad->contextProjectionBackwardWeight(
-          *(weight_->getWGrad()),
-          *startPositions,
-          config_.context_length(),
-          config_.context_start(),
-          weight_->getWGrad()->getHeight(),
-          beginPad_);
+    if (is_padding && weight_->getWGrad()) {
+      backward_[1]->calc(
+          {Tensor(weight_->getWGrad()->getData(),
+                  Dims{weight_->getWGrad()->getHeight(), input_dim}),
+           Tensor(reinterpret_cast<real*>(
+                      const_cast<int*>(start_pos->getData(useGpu_))),
+                  Dims{start_pos->getSize()})},
+          {Tensor(out_->grad->getData(), Dims{batch_size, dim})},
+          {});
     }
   }
 
diff --git a/paddle/gserver/layers/ContextProjection.h b/paddle/gserver/layers/ContextProjection.h
index 2df43bd04fec868924b5d45f9def231a48ee7f04..c87d6ed1d6d46b391ccf8722f6d110614be1fe78 100644
--- a/paddle/gserver/layers/ContextProjection.h
+++ b/paddle/gserver/layers/ContextProjection.h
@@ -61,6 +61,8 @@ public:
 
   virtual LayerStatePtr getState();
 
+  virtual bool init();
+
 protected:
   std::unique_ptr<Weight> weight_;
   /// number of extra timesteps added at the beginning
diff --git a/paddle/gserver/layers/Projection.h b/paddle/gserver/layers/Projection.h
index 8cd8042479eafdbd6b8dac03b63b344fcf9526b1..1e9f60706f89cba01ad0d1bb557cecbefabe43b9 100644
--- a/paddle/gserver/layers/Projection.h
+++ b/paddle/gserver/layers/Projection.h
@@ -88,11 +88,31 @@ public:
    */
   virtual LayerStatePtr getState() { return nullptr; }
 
+  /**
+   * init forward_ and backward_ functions
+   */
+  virtual bool init() { return true; }
+
   /**
    * Get output size of projection.
    */
   size_t getOutputSize() const { return config_.output_size(); }
 
+protected:
+  /**
+   * Create layer function. Function is called in forward or backward.
+   * \param function, Layer::forward_ or Layer::backward_
+   * \param name, function name, include -GPU or -CPU
+   * \param config, initialization configuration for the function
+   */
+  void createFunction(std::vector<std::shared_ptr<FunctionBase>>& function,
+                      const std::string& name,
+                      const FuncConfig& config) {
+    function.emplace_back(FunctionBase::funcRegistrar_.createByType(name));
+    auto& func = function.back();
+    func->init(config);
+  }
+
 protected:
   /// Config of projection
   ProjectionConfig config_;
@@ -106,5 +126,9 @@ protected:
   const Argument* out_;
   /// Store `passType` passed to forward()
   PassType passType_;
+  /// Layer forward function
+  std::vector<std::shared_ptr<FunctionBase>> forward_;
+  /// Layer backward function
+  std::vector<std::shared_ptr<FunctionBase>> backward_;
 };
 }  // namespace paddle