diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc
index fec091255f6391b77cd2858905f3aa2e5dd8baff..ac487223d09b1b5be2cb889fb7fb7f60c0093397 100644
--- a/paddle/fluid/operators/benchmark/op_tester.cc
+++ b/paddle/fluid/operators/benchmark/op_tester.cc
@@ -257,7 +257,8 @@ framework::VarDesc *OpTester::Var(const std::string &name) {
 template <typename T>
 void OpTester::SetupTensor(framework::LoDTensor *tensor,
                            const std::vector<int64_t> &shape, T lower, T upper,
-                           const std::string &initializer) {
+                           const std::string &initializer,
+                           const std::string &filename) {
   static unsigned int seed = 100;
   std::mt19937 rng(seed++);
   std::uniform_real_distribution<double> uniform_dist(0, 1);
@@ -280,12 +281,20 @@ void OpTester::SetupTensor(framework::LoDTensor *tensor,
     }
   } else if (initializer == "natural") {
     for (int i = 0; i < cpu_tensor.numel(); ++i) {
-      cpu_ptr[i] = lower + i;
+      cpu_ptr[i] = static_cast<T>(lower + i);
     }
   } else if (initializer == "zeros") {
     for (int i = 0; i < cpu_tensor.numel(); ++i) {
-      cpu_ptr[i] = 0;
+      cpu_ptr[i] = static_cast<T>(0);
     }
+  } else if (initializer == "file") {
+    std::ifstream is(filename);
+    for (size_t i = 0; i < cpu_tensor.numel(); ++i) {
+      T value;
+      is >> value;
+      cpu_ptr[i] = static_cast<T>(value);
+    }
+    is.close();
   } else {
     PADDLE_THROW("Unsupported initializer %s.", initializer.c_str());
   }
@@ -325,15 +334,19 @@ void OpTester::CreateVariables(framework::Scope *scope) {
     auto *tensor = var->GetMutable<framework::LoDTensor>();
     const auto &data_type = var_desc->GetDataType();
     if (data_type == framework::proto::VarType::INT32) {
-      SetupTensor<int>(tensor, shape, 0, 1, item.second.initializer);
+      SetupTensor<int>(tensor, shape, 0, 1, item.second.initializer,
+                       item.second.filename);
     } else if (data_type == framework::proto::VarType::INT64) {
-      SetupTensor<int64_t>(tensor, shape, 0, 1, item.second.initializer);
+      SetupTensor<int64_t>(tensor, shape, 0, 1, item.second.initializer,
+                           item.second.filename);
     } else if (data_type == framework::proto::VarType::FP32) {
       SetupTensor<float>(tensor, shape, static_cast<float>(0.0),
-                         static_cast<float>(1.0), item.second.initializer);
+                         static_cast<float>(1.0), item.second.initializer,
+                         item.second.filename);
     } else if (data_type == framework::proto::VarType::FP64) {
       SetupTensor<double>(tensor, shape, static_cast<double>(0.0),
-                          static_cast<double>(1.0), item.second.initializer);
+                          static_cast<double>(1.0), item.second.initializer,
+                          item.second.filename);
     } else {
       PADDLE_THROW("Unsupported dtype %d.", data_type);
     }
diff --git a/paddle/fluid/operators/benchmark/op_tester.h b/paddle/fluid/operators/benchmark/op_tester.h
index 328389293c4b71a2f1fefbc3bf26fd46b79ec6e2..a6d21573a05166a5cb98e78d4993f9304882d2e1 100644
--- a/paddle/fluid/operators/benchmark/op_tester.h
+++ b/paddle/fluid/operators/benchmark/op_tester.h
@@ -55,7 +55,7 @@ class OpTester {
   template <typename T>
   void SetupTensor(framework::LoDTensor *input,
                    const std::vector<int64_t> &shape, T lower, T upper,
-                   const std::string &initializer);
+                   const std::string &initializer, const std::string &filename);
 
   void RunImpl();
 
diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc
index b4878ab04244cf6b54d323943fc1fbf4e3882660..818e5f64edc2c1d213659c48d282df75625676ca 100644
--- a/paddle/fluid/operators/benchmark/op_tester_config.cc
+++ b/paddle/fluid/operators/benchmark/op_tester_config.cc
@@ -56,6 +56,9 @@ OpInputConfig::OpInputConfig(std::istream& is) {
         ParseDims(is);
       } else if (sep == "lod" || sep == "lod:") {
         ParseLoD(is);
+      } else if (sep == "filename") {
+        is >> filename;
+        EraseEndSep(&filename);
       }
     }
   }
@@ -86,7 +89,7 @@ void OpInputConfig::ParseInitializer(std::istream& is) {
   EraseEndSep(&initializer_str);
 
   const std::vector<std::string> supported_initializers = {"random", "natural",
-                                                           "zeros"};
+                                                           "zeros", "file"};
   if (!Has(supported_initializers, initializer_str)) {
     PADDLE_THROW("Unsupported initializer %s", initializer_str.c_str());
   }
diff --git a/paddle/fluid/operators/benchmark/op_tester_config.h b/paddle/fluid/operators/benchmark/op_tester_config.h
index 5803f82ac28867a481875c2af607290c5d366146..3956bc0a8b1080e14cb773c9664f821dc7e40abd 100644
--- a/paddle/fluid/operators/benchmark/op_tester_config.h
+++ b/paddle/fluid/operators/benchmark/op_tester_config.h
@@ -35,7 +35,8 @@ struct OpInputConfig {
 
   std::string name;
   std::string dtype{"fp32"};  // int32/int, int64/long, fp32/float, fp64/double
-  std::string initializer{"random"};  // random, natural
+  std::string initializer{"random"};  // random, natural, zeros, file
+  std::string filename{""};
   std::vector<int64_t> dims;
   std::vector<std::vector<size_t>> lod;
 };
diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu
index fed12785f47e1b8eea3f053712830901bee3bdc9..fc38653ce1132ec9e05074fb739335970f6b9256 100644
--- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+
 #include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
 #include "paddle/fluid/platform/float16.h"
 
diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h
index 69f640ab6649df673f07ac0cef81bf80d16eb98d..ba8ca1ad4f71732921e9ef3fe0d0dce69e27f733 100644
--- a/paddle/fluid/operators/elementwise/elementwise_add_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h
@@ -40,25 +40,26 @@ template <typename DeviceContext, typename T>
 typename std::enable_if<
     std::is_floating_point<T>::value &&
     std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
-elementwise_add(const framework::ExecutionContext &ctx,
-                const framework::Tensor *x, const framework::Tensor *y,
-                framework::Tensor *z) {
-  auto eigen_x = framework::EigenVector<T>::Flatten(*x);
-  auto eigen_y = framework::EigenVector<T>::Flatten(*y);
-  auto eigen_z = framework::EigenVector<T>::Flatten(*z);
-
+elementwise_add_same_dims(const framework::ExecutionContext &ctx,
+                          const framework::Tensor *x,
+                          const framework::Tensor *y, framework::Tensor *z) {
   auto blas = math::GetBlas<DeviceContext, T>(ctx);
-  blas.VADD(x->numel(), eigen_x.data(), eigen_y.data(), eigen_z.data());
+  blas.VADD(x->numel(), x->data<T>(), y->data<T>(), z->data<T>());
 }
 
 template <typename DeviceContext, typename T>
 typename std::enable_if<
     !std::is_floating_point<T>::value ||
     !std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
-elementwise_add(const framework::ExecutionContext &ctx,
-                const framework::Tensor *x, const framework::Tensor *y,
-                framework::Tensor *z) {
-  default_elementwise_add<DeviceContext, T>(ctx, x, y, z);
+elementwise_add_same_dims(const framework::ExecutionContext &ctx,
+                          const framework::Tensor *x,
+                          const framework::Tensor *y, framework::Tensor *z) {
+  auto eigen_x = framework::EigenVector<T>::Flatten(*x);
+  auto eigen_y = framework::EigenVector<T>::Flatten(*y);
+  auto eigen_z = framework::EigenVector<T>::Flatten(*z);
+
+  auto &place = *ctx.template device_context<DeviceContext>().eigen_device();
+  eigen_z.device(place) = eigen_x + eigen_y;
 }
 
 template <typename DeviceContext, typename T>
@@ -73,7 +74,7 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
 
     auto dims_equal = x->dims() == y->dims();
     if (dims_equal) {
-      elementwise_add<DeviceContext, T>(ctx, x, y, z);
+      elementwise_add_same_dims<DeviceContext, T>(ctx, x, y, z);
     } else {
       default_elementwise_add<DeviceContext, T>(ctx, x, y, z);
     }
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
index 303070bd19a47040ef89aeac3544a02b57ebeeb5..d18c7e66f10a0a7c4e63fdb2262228727591daee 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+
 #include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
 #include "paddle/fluid/platform/float16.h"
 
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
index f67c55f31022bc8c1866be76bb5bbfb9d63e687c..105707b803e205cf5718ed7305d2e6882c76973e 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
@@ -38,22 +38,26 @@ template <typename DeviceContext, typename T>
 typename std::enable_if<
     std::is_floating_point<T>::value &&
     std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
-elementwise_mul(const framework::ExecutionContext& ctx,
-                const framework::Tensor* x, const framework::Tensor* y,
-                framework::Tensor* z) {
+elementwise_mul_same_dims(const framework::ExecutionContext& ctx,
+                          const framework::Tensor* x,
+                          const framework::Tensor* y, framework::Tensor* z) {
   auto blas = math::GetBlas<DeviceContext, T>(ctx);
-  blas.VMUL(x->numel(), x->data<T>(), y->data<T>(),
-            z->mutable_data<T>(ctx.GetPlace()));
+  blas.VMUL(x->numel(), x->data<T>(), y->data<T>(), z->data<T>());
 }
 
 template <typename DeviceContext, typename T>
 typename std::enable_if<
     !std::is_floating_point<T>::value ||
     !std::is_same<DeviceContext, platform::CPUDeviceContext>::value>::type
-elementwise_mul(const framework::ExecutionContext& ctx,
-                const framework::Tensor* x, const framework::Tensor* y,
-                framework::Tensor* z) {
-  default_elementwise_mul<DeviceContext, T>(ctx, x, y, z);
+elementwise_mul_same_dims(const framework::ExecutionContext& ctx,
+                          const framework::Tensor* x,
+                          const framework::Tensor* y, framework::Tensor* z) {
+  auto eigen_x = framework::EigenVector<T>::Flatten(*x);
+  auto eigen_y = framework::EigenVector<T>::Flatten(*y);
+  auto eigen_z = framework::EigenVector<T>::Flatten(*z);
+
+  auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
+  eigen_z.device(place) = eigen_x * eigen_y;
 }
 
 template <typename DeviceContext, typename T>
@@ -88,7 +92,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
 
     z->mutable_data<T>(ctx.GetPlace());
     if (x.numel() == y->numel()) {
-      elementwise_mul<DeviceContext, T>(ctx, &x, y, z);
+      elementwise_mul_same_dims<DeviceContext, T>(ctx, &x, y, z);
     } else {
       default_elementwise_mul<DeviceContext, T>(ctx, &x, y, z);
     }