From dcda20233cedcc700a7556ec3fb7dbf689da6c15 Mon Sep 17 00:00:00 2001 From: Yiqun Liu Date: Mon, 13 May 2019 16:44:42 +0800 Subject: [PATCH] Optimize the elementwise op using eigen (#15494) * Optimize the elementwise op with CUDA kernels. test=develop * Support setting of attr in op config file. test=develop * Add the support the setting dtype and initializer in config. test=develop * Save workspace. * Add initializer "zeros". test=develop * Fix compiling error. * Support the use of existed file to initailize tensor in op_tester. * Use eigen to optimize the elementwise_add/mul for the case that x and y have the same dims. test=develop --- paddle/fluid/operators/benchmark/op_tester.cc | 27 ++++++++++++++----- paddle/fluid/operators/benchmark/op_tester.h | 2 +- .../operators/benchmark/op_tester_config.cc | 5 +++- .../operators/benchmark/op_tester_config.h | 3 ++- .../elementwise/elementwise_add_op.cu | 1 + .../elementwise/elementwise_add_op.h | 27 ++++++++++--------- .../elementwise/elementwise_mul_op.cu | 1 + .../elementwise/elementwise_mul_op.h | 24 ++++++++++------- 8 files changed, 57 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc index fec091255f6..ac487223d09 100644 --- a/paddle/fluid/operators/benchmark/op_tester.cc +++ b/paddle/fluid/operators/benchmark/op_tester.cc @@ -257,7 +257,8 @@ framework::VarDesc *OpTester::Var(const std::string &name) { template void OpTester::SetupTensor(framework::LoDTensor *tensor, const std::vector &shape, T lower, T upper, - const std::string &initializer) { + const std::string &initializer, + const std::string &filename) { static unsigned int seed = 100; std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); @@ -280,12 +281,20 @@ void OpTester::SetupTensor(framework::LoDTensor *tensor, } } else if (initializer == "natural") { for (int i = 0; i < cpu_tensor.numel(); ++i) { - cpu_ptr[i] = lower + i; + cpu_ptr[i] = static_cast(lower + i); } } else if (initializer == "zeros") { for (int i = 0; i < cpu_tensor.numel(); ++i) { - cpu_ptr[i] = 0; + cpu_ptr[i] = static_cast(0); } + } else if (initializer == "file") { + std::ifstream is(filename); + for (size_t i = 0; i < cpu_tensor.numel(); ++i) { + T value; + is >> value; + cpu_ptr[i] = static_cast(value); + } + is.close(); } else { PADDLE_THROW("Unsupported initializer %s.", initializer.c_str()); } @@ -325,15 +334,19 @@ void OpTester::CreateVariables(framework::Scope *scope) { auto *tensor = var->GetMutable(); const auto &data_type = var_desc->GetDataType(); if (data_type == framework::proto::VarType::INT32) { - SetupTensor(tensor, shape, 0, 1, item.second.initializer); + SetupTensor(tensor, shape, 0, 1, item.second.initializer, + item.second.filename); } else if (data_type == framework::proto::VarType::INT64) { - SetupTensor(tensor, shape, 0, 1, item.second.initializer); + SetupTensor(tensor, shape, 0, 1, item.second.initializer, + item.second.filename); } else if (data_type == framework::proto::VarType::FP32) { SetupTensor(tensor, shape, static_cast(0.0), - static_cast(1.0), item.second.initializer); + static_cast(1.0), item.second.initializer, + item.second.filename); } else if (data_type == framework::proto::VarType::FP64) { SetupTensor(tensor, shape, static_cast(0.0), - static_cast(1.0), item.second.initializer); + static_cast(1.0), item.second.initializer, + item.second.filename); } else { PADDLE_THROW("Unsupported dtype %d.", data_type); } diff --git a/paddle/fluid/operators/benchmark/op_tester.h b/paddle/fluid/operators/benchmark/op_tester.h index 328389293c4..a6d21573a05 100644 --- a/paddle/fluid/operators/benchmark/op_tester.h +++ b/paddle/fluid/operators/benchmark/op_tester.h @@ -55,7 +55,7 @@ class OpTester { template void SetupTensor(framework::LoDTensor *input, const std::vector &shape, T lower, T upper, - const std::string &initializer); + const std::string &initializer, const std::string &filename); void RunImpl(); diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc index b4878ab0424..818e5f64edc 100644 --- a/paddle/fluid/operators/benchmark/op_tester_config.cc +++ b/paddle/fluid/operators/benchmark/op_tester_config.cc @@ -56,6 +56,9 @@ OpInputConfig::OpInputConfig(std::istream& is) { ParseDims(is); } else if (sep == "lod" || sep == "lod:") { ParseLoD(is); + } else if (sep == "filename") { + is >> filename; + EraseEndSep(&filename); } } } @@ -86,7 +89,7 @@ void OpInputConfig::ParseInitializer(std::istream& is) { EraseEndSep(&initializer_str); const std::vector supported_initializers = {"random", "natural", - "zeros"}; + "zeros", "file"}; if (!Has(supported_initializers, initializer_str)) { PADDLE_THROW("Unsupported initializer %s", initializer_str.c_str()); } diff --git a/paddle/fluid/operators/benchmark/op_tester_config.h b/paddle/fluid/operators/benchmark/op_tester_config.h index 5803f82ac28..3956bc0a8b1 100644 --- a/paddle/fluid/operators/benchmark/op_tester_config.h +++ b/paddle/fluid/operators/benchmark/op_tester_config.h @@ -35,7 +35,8 @@ struct OpInputConfig { std::string name; std::string dtype{"fp32"}; // int32/int, int64/long, fp32/float, fp64/double - std::string initializer{"random"}; // random, natural + std::string initializer{"random"}; // random, natural, zeros, file + std::string filename{""}; std::vector dims; std::vector> lod; }; diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index fed12785f47..fc38653ce11 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 69f640ab664..ba8ca1ad4f7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -40,25 +40,26 @@ template typename std::enable_if< std::is_floating_point::value && std::is_same::value>::type -elementwise_add(const framework::ExecutionContext &ctx, - const framework::Tensor *x, const framework::Tensor *y, - framework::Tensor *z) { - auto eigen_x = framework::EigenVector::Flatten(*x); - auto eigen_y = framework::EigenVector::Flatten(*y); - auto eigen_z = framework::EigenVector::Flatten(*z); - +elementwise_add_same_dims(const framework::ExecutionContext &ctx, + const framework::Tensor *x, + const framework::Tensor *y, framework::Tensor *z) { auto blas = math::GetBlas(ctx); - blas.VADD(x->numel(), eigen_x.data(), eigen_y.data(), eigen_z.data()); + blas.VADD(x->numel(), x->data(), y->data(), z->data()); } template typename std::enable_if< !std::is_floating_point::value || !std::is_same::value>::type -elementwise_add(const framework::ExecutionContext &ctx, - const framework::Tensor *x, const framework::Tensor *y, - framework::Tensor *z) { - default_elementwise_add(ctx, x, y, z); +elementwise_add_same_dims(const framework::ExecutionContext &ctx, + const framework::Tensor *x, + const framework::Tensor *y, framework::Tensor *z) { + auto eigen_x = framework::EigenVector::Flatten(*x); + auto eigen_y = framework::EigenVector::Flatten(*y); + auto eigen_z = framework::EigenVector::Flatten(*z); + + auto &place = *ctx.template device_context().eigen_device(); + eigen_z.device(place) = eigen_x + eigen_y; } template @@ -73,7 +74,7 @@ class ElementwiseAddKernel : public framework::OpKernel { auto dims_equal = x->dims() == y->dims(); if (dims_equal) { - elementwise_add(ctx, x, y, z); + elementwise_add_same_dims(ctx, x, y, z); } else { default_elementwise_add(ctx, x, y, z); } diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index 303070bd19a..d18c7e66f10 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + #include "paddle/fluid/operators/elementwise/elementwise_mul_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index f67c55f3102..105707b803e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -38,22 +38,26 @@ template typename std::enable_if< std::is_floating_point::value && std::is_same::value>::type -elementwise_mul(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - framework::Tensor* z) { +elementwise_mul_same_dims(const framework::ExecutionContext& ctx, + const framework::Tensor* x, + const framework::Tensor* y, framework::Tensor* z) { auto blas = math::GetBlas(ctx); - blas.VMUL(x->numel(), x->data(), y->data(), - z->mutable_data(ctx.GetPlace())); + blas.VMUL(x->numel(), x->data(), y->data(), z->data()); } template typename std::enable_if< !std::is_floating_point::value || !std::is_same::value>::type -elementwise_mul(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - framework::Tensor* z) { - default_elementwise_mul(ctx, x, y, z); +elementwise_mul_same_dims(const framework::ExecutionContext& ctx, + const framework::Tensor* x, + const framework::Tensor* y, framework::Tensor* z) { + auto eigen_x = framework::EigenVector::Flatten(*x); + auto eigen_y = framework::EigenVector::Flatten(*y); + auto eigen_z = framework::EigenVector::Flatten(*z); + + auto& place = *ctx.template device_context().eigen_device(); + eigen_z.device(place) = eigen_x * eigen_y; } template @@ -88,7 +92,7 @@ class ElementwiseMulKernel : public framework::OpKernel { z->mutable_data(ctx.GetPlace()); if (x.numel() == y->numel()) { - elementwise_mul(ctx, &x, y, z); + elementwise_mul_same_dims(ctx, &x, y, z); } else { default_elementwise_mul(ctx, &x, y, z); } -- GitLab