Merge branch 'implement_basic_OpKernel' of https://github.com/QiJune/Paddle...

Merge branch 'implement_basic_OpKernel' of https://github.com/QiJune/Paddle into implement_basic_OpKernel

Merge branch 'implement_basic_OpKernel' of https://github.com/QiJune/Paddle...
Merge branch 'implement_basic_OpKernel' of https://github.com/QiJune/Paddle into implement_basic_OpKernel
3014662d · qijun · 736d078c · 14cfb8c2 · 3014662d · 3014662d
4 changed file
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -124,6 +124,7 @@ set(GPU_COMMON_FLAGS
    -Wno-error=literal-suffix
    -Wno-error=unused-local-typedefs
    -Wno-error=unused-function  # Warnings in Numpy Header.
+    -Wno-error=array-bounds # Warnings in Eigen::array
 )

 if (APPLE)

--- a/paddle/operators/mul_op.h
+++ b/paddle/operators/mul_op.h
@@ -25,9 +25,10 @@ template <typename Place, typename T>
 class MulKernel : public framework::OpKernel {
 public:
  void Compute(const framework::KernelContext& context) const override {
-    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
-    dim_pair[0].first = 1;
-    dim_pair[0].second = 0;
+    Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
+        Eigen::IndexPair<Eigen::DenseIndex>(1, 0)};
+    // dim_pair[0].first = 1;
+    // dim_pair[0].second = 0;

    auto input0 = context.Input(0)->Get<framework::Tensor>();
    auto input1 = context.Input(1)->Get<framework::Tensor>();

--- a/paddle/operators/rowwise_add_op.h
+++ b/paddle/operators/rowwise_add_op.h
@@ -27,6 +27,7 @@ public:
    auto in0 = context.Input(0)->Get<framework::Tensor>();
    auto in1 = context.Input(1)->Get<framework::Tensor>();
    auto* out = context.Output(0)->GetMutable<framework::Tensor>();
+    out->mutable_data<T>(context.GetPlace());

    auto input = framework::EigenMatrix<T>::From(in0);
    auto bias = framework::EigenVector<T>::From(in1);

--- a/paddle/operators/softmax_op.h
+++ b/paddle/operators/softmax_op.h
@@ -27,6 +27,7 @@ public:
  void Compute(const framework::KernelContext& context) const override {
    auto input = context.Input(0)->Get<framework::Tensor>();
    auto* output = context.Output(0)->GetMutable<framework::Tensor>();
+    output->mutable_data<T>(context.GetPlace());

    auto logits = framework::EigenMatrix<T>::From(input);
    auto softmax = framework::EigenMatrix<T>::From(*output);
@@ -41,7 +42,8 @@ public:
    Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
    Eigen::DSizes<int, 2> one_by_class(1, num_classes);

-    auto shifted_logits = (logits - logits.maximum(along_class)
+    auto shifted_logits = (logits -
+                           logits.maximum(along_class)
                               .eval()
                               .reshape(batch_by_one)
                               .broadcast(one_by_class));
@@ -49,7 +51,8 @@ public:
    softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp();

    softmax.device(*(context.GetEigenDevice<Place>())) =
-        (softmax * softmax.sum(along_class)
+        (softmax *
+         softmax.sum(along_class)
             .inverse()
             .eval()
             .reshape(batch_by_one)