diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h new file mode 100644 index 0000000000000000000000000000000000000000..55e3931f870d62dcaddc6c067f66999c59e2a262 --- /dev/null +++ b/paddle/framework/data_type.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include "paddle/framework/framework.pb.h" + +namespace paddle { +namespace framework { + +inline DataType ToDataType(std::type_index type) { + if (typeid(float).hash_code() == type.hash_code()) { + return DataType::FP32; + } else if (typeid(double).hash_code() == type.hash_code()) { + return DataType::FP64; + } else if (typeid(int).hash_code() == type.hash_code()) { + return DataType::INT32; + } else { + PADDLE_THROW("Not supported"); + return static_cast(-1); + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 90077d0192421f3678a049a723972fcb1e8d67af..4db38badaea8ae22d9ad47951f4941f3bdeb401a 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -100,13 +100,39 @@ class OpRegistrar : public Registrar { } }; -template +template +struct OpKernelRegistrarFunctor; + +template +struct OpKernelRegistrarFunctor { + using KERNEL_TYPE = + typename std::tuple_element>::type; + + void operator()(const char* op_type) const { + using T = typename KERNEL_TYPE::ELEMENT_TYPE; + OperatorWithKernel::OpKernelKey key(ToDataType(std::type_index(typeid(T))), + PlaceType()); + OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); + + constexpr auto size = std::tuple_size>::value; + OpKernelRegistrarFunctor + func; + func(op_type); + } +}; + +template +struct OpKernelRegistrarFunctor { + void operator()(const char* op_type) const {} +}; + +// User can register many kernel in one place. The data type could be different. +template class OpKernelRegistrar : public Registrar { public: explicit OpKernelRegistrar(const char* op_type) { - OperatorWithKernel::OpKernelKey key; - key.place_ = PlaceType(); - OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType); + OpKernelRegistrarFunctor func; + func(op_type); } }; diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 79bda2e2f9173ab632307bc52167d7d8c17d4418..ba697a43e9ebdd1837720098d74b95e2dbad77d3 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -22,6 +22,7 @@ limitations under the License. */ #include "op_info.h" #include "paddle/framework/attribute.h" +#include "paddle/framework/data_type.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/scope.h" @@ -403,7 +404,7 @@ class RuntimeInferShapeContext : public InferShapeContextBase { const Scope& scope_; }; -class OpKernel { +class OpKernelBase { public: /** * ExecutionContext is the only parameter of Kernel Run function. @@ -414,33 +415,47 @@ class OpKernel { virtual void Compute(const ExecutionContext& context) const = 0; - virtual ~OpKernel() {} + virtual ~OpKernelBase() = default; +}; + +template +class OpKernel : public OpKernelBase { + public: + using ELEMENT_TYPE = T; }; class OperatorWithKernel : public OperatorBase { public: struct OpKernelKey { platform::Place place_; + DataType data_type_; - OpKernelKey() = default; - explicit OpKernelKey(const platform::DeviceContext& dev_ctx) { - place_ = dev_ctx.GetPlace(); - } + OpKernelKey(DataType data_type, platform::Place place) + : place_(place), data_type_(data_type) {} + + OpKernelKey(DataType data_type, const platform::DeviceContext& dev_ctx) + : place_(dev_ctx.GetPlace()), data_type_(data_type) {} bool operator==(const OpKernelKey& o) const { - return platform::places_are_same_class(place_, o.place_); + return platform::places_are_same_class(place_, o.place_) && + data_type_ == o.data_type_; } }; struct OpKernelHash { - std::hash hash_; + std::hash hash_; size_t operator()(const OpKernelKey& key) const { - return hash_(platform::is_gpu_place(key.place_)); + int place = key.place_.which(); + int data_type = static_cast(key.data_type_); + int pre_hash = data_type << NUM_PLACE_TYPE_LIMIT_IN_BIT | + (place & ((1 << NUM_PLACE_TYPE_LIMIT_IN_BIT) - 1)); + return hash_(pre_hash); } }; using OpKernelMap = - std::unordered_map, OpKernelHash>; + std::unordered_map, + OpKernelHash>; OperatorWithKernel(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) @@ -451,8 +466,10 @@ class OperatorWithKernel : public OperatorBase { RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); - auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); - opKernel->Compute(ExecutionContext(*this, scope, dev_ctx)); + ExecutionContext ctx(*this, scope, dev_ctx); + auto& opKernel = AllOpKernels().at(type_).at( + OpKernelKey(IndicateDataType(ctx), dev_ctx)); + opKernel->Compute(ctx); } static std::unordered_map& @@ -462,13 +479,43 @@ class OperatorWithKernel : public OperatorBase { } bool SupportGPU() const override { - OperatorWithKernel::OpKernelKey key; - key.place_ = platform::GPUPlace(); - return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0; + auto& op_kernels = OperatorWithKernel::AllOpKernels().at(type_); + return std::any_of(op_kernels.begin(), op_kernels.end(), + [](OpKernelMap::const_reference kern_pair) { + return platform::is_gpu_place(kern_pair.first.place_); + }); } protected: virtual void InferShape(InferShapeContextBase* ctx) const = 0; + + // indicate kernel DataType by input data. Defaultly all input data must be + // same. + virtual DataType IndicateDataType(const ExecutionContext& ctx) const { + auto& scope = ctx.scope(); + int data_type = -1; + for (auto& input : this->inputs_) { + for (auto& ipt_name : input.second) { + auto* var = scope.FindVar(ipt_name); + if (var != nullptr) { + const Tensor* t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t != nullptr) { + int tmp = static_cast(ToDataType(t->type())); + PADDLE_ENFORCE(tmp == data_type || data_type == -1, + "DataType of Paddle Op must be same."); + data_type = tmp; + } + } + } + } + PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input"); + return static_cast(data_type); + } }; } // namespace framework diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index e1d8f040b837a6ad598351dae0427cc7c231e79f..a0c17b41f27d9ec9a0f8e80576a052617919b000 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -114,10 +114,13 @@ class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase* ctx) const override {} + DataType IndicateDataType(const ExecutionContext& ctx) const override { + return DataType::FP32; + } }; template -class CPUKernelTest : public OpKernel { +class CPUKernelTest : public OpKernel { public: void Compute(const ExecutionContext& ctx) const { std::cout << "this is cpu kernel" << std::endl; @@ -144,7 +147,7 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker } }; -class CPUKernalMultiInputsTest : public OpKernel { +class CPUKernalMultiInputsTest : public OpKernel { public: void Compute(const ExecutionContext& ctx) const { auto xs = ctx.op().Inputs("xs"); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index f040c09c089ec75c9773d752685be5e232e8f4b7..80a3f0a3935ef6809ebd6f3bfb849d4e87d76d1b 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -29,20 +29,10 @@ limitations under the License. */ namespace paddle { -namespace pybind { -namespace details { -template -struct CastToPyBufferImpl; -} -} // namespace pybind - namespace framework { class Tensor { public: - template - friend struct pybind::details::CastToPyBufferImpl; - template friend struct EigenTensor; @@ -119,6 +109,8 @@ class Tensor { return holder_->place(); } + std::type_index type() const { return holder_->type(); } + private: template inline void check_memory_size() const; diff --git a/paddle/operators/accuracy_op.cu b/paddle/operators/accuracy_op.cu index 75e8a989036f0b818687e1fec3e600bb90e86b22..0ca9ef941d4cb15619caea2b6baed197e4b15e5a 100644 --- a/paddle/operators/accuracy_op.cu +++ b/paddle/operators/accuracy_op.cu @@ -47,7 +47,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, const int* Xdata, } template -class AccuracyOpCUDAKernel : public framework::OpKernel { +class AccuracyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), diff --git a/paddle/operators/accuracy_op.h b/paddle/operators/accuracy_op.h index fe704efe1c979f4fc6a5a37184e51b416f5e517f..12c6b9aac8819caedbc02017cee81b37322bb72a 100644 --- a/paddle/operators/accuracy_op.h +++ b/paddle/operators/accuracy_op.h @@ -35,7 +35,7 @@ template ; template -class AccuracyKernel : public framework::OpKernel { +class AccuracyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* inference = ctx.Input("Inference"); diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 15f8afb4ba45cc989fe7576b82b8bf853b1df7de..e400992ae29686d81a5ea32f9c50e05424246707 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class ActivationKernel : public framework::OpKernel { +class ActivationKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -36,7 +36,7 @@ class ActivationKernel : public framework::OpKernel { }; template -class ActivationGradKernel : public framework::OpKernel { +class ActivationGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -202,7 +202,7 @@ struct SquareGradFunctor { }; template -class BReluKernel : public framework::OpKernel { +class BReluKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -219,7 +219,7 @@ class BReluKernel : public framework::OpKernel { }; template -class BReluGradKernel : public framework::OpKernel { +class BReluGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -239,7 +239,7 @@ class BReluGradKernel : public framework::OpKernel { }; template -class SoftReluKernel : public framework::OpKernel { +class SoftReluKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -256,7 +256,7 @@ class SoftReluKernel : public framework::OpKernel { }; template -class SoftReluGradKernel : public framework::OpKernel { +class SoftReluGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -277,7 +277,7 @@ class SoftReluGradKernel : public framework::OpKernel { }; template -class PowKernel : public framework::OpKernel { +class PowKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -293,7 +293,7 @@ class PowKernel : public framework::OpKernel { }; template -class PowGradKernel : public framework::OpKernel { +class PowGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -312,7 +312,7 @@ class PowGradKernel : public framework::OpKernel { }; template -class STanhKernel : public framework::OpKernel { +class STanhKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); @@ -329,7 +329,7 @@ class STanhKernel : public framework::OpKernel { }; template -class STanhGradKernel : public framework::OpKernel { +class STanhGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); diff --git a/paddle/operators/add_op.h b/paddle/operators/add_op.h index a7307b6818aa3d10ff215d06281e2b53196fd101..75163032a1ff11a1f18cfd0a4ff7289ff0cb66bf 100644 --- a/paddle/operators/add_op.h +++ b/paddle/operators/add_op.h @@ -25,7 +25,7 @@ template ; template -class AddKernel : public framework::OpKernel { +class AddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input0 = context.Input("X"); diff --git a/paddle/operators/clip_op.h b/paddle/operators/clip_op.h index ce1d4e1f460414e6e4acee4fa3207f309c55d86b..ac702e9935201ba5263a80ebeb1ab22fa0bd1340 100644 --- a/paddle/operators/clip_op.h +++ b/paddle/operators/clip_op.h @@ -56,7 +56,7 @@ class ClipGradFunctor { }; template -class ClipKernel : public framework::OpKernel { +class ClipKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto max = context.Attr("max"); @@ -73,7 +73,7 @@ class ClipKernel : public framework::OpKernel { }; template -class ClipGradKernel : public framework::OpKernel { +class ClipGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto max = context.Attr("max"); diff --git a/paddle/operators/concat_op.h b/paddle/operators/concat_op.h index b37063261123bce1f22c39ab021e88f2faf58e9f..c113f19fb5cf806709bff845ee0f1078b34014bb 100644 --- a/paddle/operators/concat_op.h +++ b/paddle/operators/concat_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { template -class ConcatKernel : public framework::OpKernel { +class ConcatKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto ins = ctx.MultiInput("X"); @@ -44,7 +44,7 @@ class ConcatKernel : public framework::OpKernel { }; template -class ConcatGradKernel : public framework::OpKernel { +class ConcatGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* in = ctx.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index bcf6f758cae561a2e22f5be6c7a242647ef1c144..68c56f531f941e1b8f66ac7ba6bf318881642c4f 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -28,7 +28,7 @@ template ; template -class CosSimKernel : public framework::OpKernel { +class CosSimKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { // get Tensor @@ -67,7 +67,7 @@ class CosSimKernel : public framework::OpKernel { }; template -class CosSimGradKernel : public framework::OpKernel { +class CosSimGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { // get Tensor diff --git a/paddle/operators/crop_op.h b/paddle/operators/crop_op.h index ac3aeaf41e206c1deb74c7022c36f02c4777a84b..2e72583d68d0acf0e2f5044637dba55de3b57209 100644 --- a/paddle/operators/crop_op.h +++ b/paddle/operators/crop_op.h @@ -27,7 +27,7 @@ using EigenTensor = framework::EigenTensor; using framework::Tensor; template -class CropKernel : public framework::OpKernel { +class CropKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* x = context.Input("X"); @@ -69,7 +69,7 @@ void CropGradFunction(const framework::ExecutionContext& context) { } template -class CropGradKernel : public framework::OpKernel { +class CropGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { size_t rank = diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 26fc9b51c44d21d92851030449e116538f937846..4b67887f3638f32a89d1a4fd1316c0596b444629 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -47,6 +47,12 @@ class CrossEntropyOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Y", {x_dims[0], 1}); ctx->ShareLoD("X", /*->*/ "Y"); } + + // CrossEntropy's data type just determined by "X" + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("X")->type()); + } }; class CrossEntropyGradientOp : public framework::OperatorWithKernel { @@ -87,6 +93,12 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); } + + // CrossEntropy's data type just determined by "X" + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("X")->type()); + } }; class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 1cfeb7a53b047541322ac53c5b7249e660039d5c..76d63f77adccb0e7059b5dbe0bbfde1653dae6df 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -53,7 +53,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* dX, const T* dY, const T* X, } // namespace template -class CrossEntropyOpCUDAKernel : public framework::OpKernel { +class CrossEntropyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -69,7 +69,7 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel { }; template -class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { +class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index 1f67461d3fadb1a979832ad049d4e0098256b834..fa81d3b4310a889dc0b21f6969ab39dddf053186 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -26,7 +26,7 @@ template ; template -class CrossEntropyOpKernel : public framework::OpKernel { +class CrossEntropyOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), @@ -42,7 +42,7 @@ class CrossEntropyOpKernel : public framework::OpKernel { }; template -class CrossEntropyGradientOpKernel : public framework::OpKernel { +class CrossEntropyGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index a04e4a22cc09d4e8106a528e490ccf8e90681c08..30c769000f2b98c69eaa78a4c139630dd0956386 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -47,7 +47,7 @@ struct MaskGenerator { // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. template -class GPUDropoutKernel : public framework::OpKernel { +class GPUDropoutKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* x = context.Input("X"); diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index d57f64afcb3558aeea6aed23fae06866e9af874a..745525fe81dadb22cbb64d66203f5a75608d3718 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -26,7 +26,7 @@ template ; template -class CPUDropoutKernel : public framework::OpKernel { +class CPUDropoutKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* x = context.Input("X"); @@ -62,7 +62,7 @@ class CPUDropoutKernel : public framework::OpKernel { }; template -class DropoutGradKernel : public framework::OpKernel { +class DropoutGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE(context.Attr("is_training"), diff --git a/paddle/operators/elementwise_add_op.h b/paddle/operators/elementwise_add_op.h index e9f78ef26e05878053d968c35f17b456c128827a..f04fe3ec6069ab1bf227be6a3a5c10ee908e4824 100644 --- a/paddle/operators/elementwise_add_op.h +++ b/paddle/operators/elementwise_add_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class ElementwiseAddKernel : public framework::OpKernel { +class ElementwiseAddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseCompute(ctx); @@ -101,7 +101,7 @@ struct ElementwiseAddBroadCast2GradFunctor { }; template -class ElementwiseAddGradKernel : public framework::OpKernel { +class ElementwiseAddGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseGradCompute, diff --git a/paddle/operators/elementwise_div_op.h b/paddle/operators/elementwise_div_op.h index 99b6d9c1991edfb0018f8a459dfa373948cec434..8946ff3d25c2aff3dc3aa69368f0083371cd2fef 100644 --- a/paddle/operators/elementwise_div_op.h +++ b/paddle/operators/elementwise_div_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class ElementwiseDivKernel : public framework::OpKernel { +class ElementwiseDivKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseCompute(ctx); @@ -103,7 +103,7 @@ struct ElementwiseDivBroadCast2GradFunctor { }; template -class ElementwiseDivGradKernel : public framework::OpKernel { +class ElementwiseDivGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseGradCompute, diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index bda5dfe03e974740fe4a07191ae6b68ebfcd5d3a..da7765aa6a7a81c9e0b4f462022cad54c16aec47 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -36,7 +36,9 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker, elementwise_mul_grad, ops::ElementwiseOpGrad); REGISTER_OP_CPU_KERNEL( elementwise_mul, - ops::ElementwiseMulKernel); + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel); REGISTER_OP_CPU_KERNEL( elementwise_mul_grad, - ops::ElementwiseMulGradKernel); + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel); diff --git a/paddle/operators/elementwise_mul_op.cu b/paddle/operators/elementwise_mul_op.cu index da08a75596c4d3b89dc8892bd4405464fec96389..056f081d3e6ac349978ff00689700c035bed8e39 100644 --- a/paddle/operators/elementwise_mul_op.cu +++ b/paddle/operators/elementwise_mul_op.cu @@ -19,7 +19,9 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( elementwise_mul, - ops::ElementwiseMulKernel); + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel); REGISTER_OP_GPU_KERNEL( elementwise_mul_grad, - ops::ElementwiseMulGradKernel); + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel); diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h index 6ab642378bb0af8593ca0677014aede3c03cff8e..4469b07eaa08a3b011a88e58f1d645dd30b10ced 100644 --- a/paddle/operators/elementwise_mul_op.h +++ b/paddle/operators/elementwise_mul_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { template -class ElementwiseMulKernel : public framework::OpKernel { +class ElementwiseMulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseCompute(ctx); @@ -102,7 +102,7 @@ struct ElementwiseMulBroadCast2GradFunctor { }; template -class ElementwiseMulGradKernel : public framework::OpKernel { +class ElementwiseMulGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseGradCompute, diff --git a/paddle/operators/elementwise_sub_op.h b/paddle/operators/elementwise_sub_op.h index 3ca1376c73b3332b76a5973e201f9e4fba77cd21..3f40c1c5bcea5e8473765b039de4ee2a16054f0c 100644 --- a/paddle/operators/elementwise_sub_op.h +++ b/paddle/operators/elementwise_sub_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { template -class ElementwiseSubKernel : public framework::OpKernel { +class ElementwiseSubKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseCompute(ctx); @@ -102,7 +102,7 @@ struct ElementwiseSubBroadCast2GradFunctor { }; template -class ElementwiseSubGradKernel : public framework::OpKernel { +class ElementwiseSubGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElementwiseGradCompute, diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index 4474581784531faee1741f0b143743e31cc3788f..cdf56a723b117fe7b08ef2749aa2c2978c923d44 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class FillZerosLikeKernel : public framework::OpKernel { +class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* output = context.Output("Y"); diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 0e3cd174adee1e50d0a63861286a26d325484efb..da22bd0c52c27d7decd10e2e2b34fa38d0620da8 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -37,6 +37,11 @@ class GatherOp : public framework::OperatorWithKernel { output_dims[0] = batch_size; ctx->SetOutputDim("Out", output_dims); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("X")->type()); + } }; class GatherGradOp : public framework::OperatorWithKernel { @@ -47,6 +52,11 @@ class GatherGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContextBase* ctx) const override { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("X")->type()); + } }; class GatherOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h index 381854f301870beadb72d9e9b4eb17ff199960fb..073e566e8f6962d62cc1b738672843421dcb4ee5 100644 --- a/paddle/operators/gather_op.h +++ b/paddle/operators/gather_op.h @@ -24,7 +24,7 @@ namespace operators { using Tensor = framework::Tensor; template -class GatherOpKernel : public framework::OpKernel { +class GatherOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *X = ctx.Input("X"); @@ -37,7 +37,7 @@ class GatherOpKernel : public framework::OpKernel { }; template -class GatherGradientOpKernel : public framework::OpKernel { +class GatherGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *Index = ctx.Input("Index"); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 05120a6e7bcfdb8641c722731f462c89e4223339..5cd2c7d2c066cd31e2d38a3c0d682f02339b4d59 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -16,7 +16,7 @@ namespace paddle { namespace operators { template -class CPUGaussianRandomKernel : public framework::OpKernel { +class CPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); @@ -56,6 +56,11 @@ class GaussianRandomOp : public framework::OperatorWithKernel { "dims can be one int or array. dims must be set."); ctx->SetOutputDim("Out", framework::make_ddim(temp)); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return static_cast(Attr("data_type")); + } }; class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { @@ -76,6 +81,8 @@ Use to initialize tensor with gaussian random generator. "Random seed of generator." "0 means use system wide seed") .SetDefault(0); + AddAttr("data_type", "output data type") + .SetDefault(framework::DataType::FP32); } }; diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 2d63b3049988cfc3135a87a57dad56b970df3eab..315560bf1ba8a66b9a3b7d79510d202885e845d6 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -37,7 +37,7 @@ struct GaussianGenerator { }; template -class GPUGaussianRandomKernel : public framework::OpKernel { +class GPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); diff --git a/paddle/operators/gemm_conv2d_op.h b/paddle/operators/gemm_conv2d_op.h index 5c9e81732aa72211c2021382cf9a907880c53c17..323e3f7c3bd506c6b63bf4d1152384649f5da575 100644 --- a/paddle/operators/gemm_conv2d_op.h +++ b/paddle/operators/gemm_conv2d_op.h @@ -25,7 +25,7 @@ namespace operators { using Tensor = framework::Tensor; template -class GemmConv2DKernel : public framework::OpKernel { +class GemmConv2DKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); @@ -98,7 +98,7 @@ class GemmConv2DKernel : public framework::OpKernel { }; template -class GemmConvGrad2DKernel : public framework::OpKernel { +class GemmConvGrad2DKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 9b1314bfbade8551d98b0fbabb7c2968d7600db5..929008fbcbe03bd6591b0a02252b343c46d00b8f 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -36,6 +36,11 @@ class LookupTableOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", {ids_dims[0], table_dims[1]}); ctx->ShareLoD("Ids", /*->*/ "Out"); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("W")->type()); + } }; class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { @@ -69,6 +74,11 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { auto table_dims = ctx->GetInputDim("W"); ctx->SetOutputDim(framework::GradVarName("W"), table_dims); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("W")->type()); + } }; } // namespace operators diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 62f63b4f3c876e084e2468001e8bcb9310d16a82..c3808fa9a8de031fcae3ac0417e8c4330b2f5aad 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -61,7 +61,7 @@ __global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids, } template -class LookupTableCUDAKernel : public framework::OpKernel { +class LookupTableCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto table_t = context.Input("W"); @@ -85,7 +85,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { }; template -class LookupTableGradCUDAKernel : public framework::OpKernel { +class LookupTableGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto ids_t = context.Input("Ids"); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index a1298906dd4b4209644fe06584f70169519de01c..dfead2fc5b25b9be26bb19cd74a3a94daf62cca6 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -23,7 +23,7 @@ namespace operators { using Tensor = framework::Tensor; template -class LookupTableKernel : public framework::OpKernel { +class LookupTableKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto table_t = context.Input("W"); // float tensor @@ -44,7 +44,7 @@ class LookupTableKernel : public framework::OpKernel { }; template -class LookupTableGradKernel : public framework::OpKernel { +class LookupTableGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto ids_t = context.Input("Ids"); diff --git a/paddle/operators/lstm_unit_op.cu b/paddle/operators/lstm_unit_op.cu index 6e5e4978994c281416a65af5f8ffdec688768d63..b1db0d53227148de53b04587b943945f8563346e 100644 --- a/paddle/operators/lstm_unit_op.cu +++ b/paddle/operators/lstm_unit_op.cu @@ -90,7 +90,7 @@ __global__ void LSTMUnitGradientKernel(const int nthreads, const int dim, } template -class LstmUnitOpCUDAKernel : public framework::OpKernel { +class LstmUnitOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -121,7 +121,7 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel { }; template -class LstmUnitGradOpCUDAKernel : public framework::OpKernel { +class LstmUnitGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), diff --git a/paddle/operators/lstm_unit_op.h b/paddle/operators/lstm_unit_op.h index 683034fe15df8cabfdff5e856adb5c0467055064..0dc9a7d9a7aae2e16bc4488731f572f43778baf8 100644 --- a/paddle/operators/lstm_unit_op.h +++ b/paddle/operators/lstm_unit_op.h @@ -33,7 +33,7 @@ inline T tanh(T x) { } template -class LstmUnitKernel : public framework::OpKernel { +class LstmUnitKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), @@ -76,7 +76,7 @@ class LstmUnitKernel : public framework::OpKernel { }; template -class LstmUnitGradKernel : public framework::OpKernel { +class LstmUnitGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index ce31e178d8e375dc59be80a6c05133201308da70..c99286a5b928f1edcd845b01b21b95654c25db07 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -28,7 +28,7 @@ template ; template -class MeanKernel : public framework::OpKernel { +class MeanKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input = context.Input("X"); @@ -45,7 +45,7 @@ class MeanKernel : public framework::OpKernel { }; template -class MeanGradKernel : public framework::OpKernel { +class MeanGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto OG = context.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/minus_op.h b/paddle/operators/minus_op.h index 6310a4fd5141516cff4fc7acbe1d17913a1b5506..bd9a2790aa2b208c2d3dfc792031283eb6c42397 100644 --- a/paddle/operators/minus_op.h +++ b/paddle/operators/minus_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class MinusKernel : public framework::OpKernel { +class MinusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* left_tensor = context.Input("X"); diff --git a/paddle/operators/modified_huber_loss_op.cu b/paddle/operators/modified_huber_loss_op.cu index bce760f95e72cfec05b07591e0fa1250168b112f..8854e166cd99ce914d7f9f9bcead3234b0649506 100644 --- a/paddle/operators/modified_huber_loss_op.cu +++ b/paddle/operators/modified_huber_loss_op.cu @@ -39,7 +39,7 @@ struct ModifiedHuberLossBackward { }; template -class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { +class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("Y"); diff --git a/paddle/operators/modified_huber_loss_op.h b/paddle/operators/modified_huber_loss_op.h index cb51007749e3c59572d4852959f4119ac377decc..aba75efad9c19e3e113b4f09bc1fbd4732f4e187 100644 --- a/paddle/operators/modified_huber_loss_op.h +++ b/paddle/operators/modified_huber_loss_op.h @@ -47,7 +47,7 @@ struct ModifiedHuberLossForward { }; template -class ModifiedHuberLossKernel : public framework::OpKernel { +class ModifiedHuberLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("X"); @@ -73,7 +73,7 @@ class ModifiedHuberLossKernel : public framework::OpKernel { // CPU backward kernel template -class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { +class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("Y"); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index ac7136a76933d1f3ead86518c65d589747227631..684b1ea0c0c8ddabc9809cc05ed985e0cc250955 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -28,7 +28,7 @@ template ; template -class MulKernel : public framework::OpKernel { +class MulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* x = context.Input("X"); @@ -52,7 +52,7 @@ class MulKernel : public framework::OpKernel { }; template -class MulGradKernel : public framework::OpKernel { +class MulGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { int x_num_col_dims = ctx.template Attr("x_num_col_dims"); diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index 9896d269ccc86d8fdc3bf6375e44ef5bf3e6b9c7..a069127a19a1d0ba4eaa2b3450a1c46262ace3ed 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -50,6 +50,11 @@ class MultiplexOp : public framework::OperatorWithKernel { } ctx->SetOutputDim("Out", in_dim); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.MultiInput("X")[0]->type()); + } }; class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { @@ -99,6 +104,11 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } ctx->SetOutputsDim(framework::GradVarName("X"), d_ins); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.MultiInput("X")[0]->type()); + } }; } // namespace operators diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu index 505776612e7119e568493506b113661a839e5bd1..72b1f96eafde37976b4b067b534112b17e02b807 100644 --- a/paddle/operators/multiplex_op.cu +++ b/paddle/operators/multiplex_op.cu @@ -21,7 +21,7 @@ namespace operators { using Tensor = framework::Tensor; template -class MultiplexGPUKernel : public framework::OpKernel { +class MultiplexGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto ins = ctx.MultiInput("X"); @@ -51,7 +51,7 @@ class MultiplexGPUKernel : public framework::OpKernel { }; template -class MultiplexGradGPUKernel : public framework::OpKernel { +class MultiplexGradGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* d_out = ctx.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/multiplex_op.h b/paddle/operators/multiplex_op.h index 637c63a34af394f5f54997c46c00a9ff00577476..ab3cafaa324a29d6f249cf1f73db92e1364eebc8 100644 --- a/paddle/operators/multiplex_op.h +++ b/paddle/operators/multiplex_op.h @@ -23,7 +23,7 @@ namespace paddle { namespace operators { template -class MultiplexCPUKernel : public framework::OpKernel { +class MultiplexCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto ins = ctx.MultiInput("X"); @@ -48,7 +48,7 @@ class MultiplexCPUKernel : public framework::OpKernel { }; template -class MultiplexGradCPUKernel : public framework::OpKernel { +class MultiplexGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* d_out = ctx.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/pad_op.h b/paddle/operators/pad_op.h index 2cc3b945ae5b2e2e93d8531c7f99e4c215d1d806..9534dbf54529e3b9ae2b6640d51fe291e9521927 100644 --- a/paddle/operators/pad_op.h +++ b/paddle/operators/pad_op.h @@ -47,7 +47,7 @@ void PadFunction(const framework::ExecutionContext& context) { } template -class PadKernel : public framework::OpKernel { +class PadKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { int rank = context.Input("X")->dims().size(); @@ -97,7 +97,7 @@ void PadGradFunction(const framework::ExecutionContext& context) { } template -class PadGradKernel : public framework::OpKernel { +class PadGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { size_t rank = diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h index 6b78ed295cbac060d816fb3dd27a4b80145cb1ce..5ad31c2203ae6c9bf6f48bb9ecf9a714597e7da8 100644 --- a/paddle/operators/prelu_op.h +++ b/paddle/operators/prelu_op.h @@ -40,7 +40,7 @@ class PReluFunctor { }; template -class PReluKernel : public framework::OpKernel { +class PReluKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* x = context.Input("X"); @@ -77,7 +77,7 @@ class PReluGradFunctor { }; template -class PReluGradKernel : public framework::OpKernel { +class PReluGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* dx = context.Output(framework::GradVarName("X")); diff --git a/paddle/operators/rank_loss_op.h b/paddle/operators/rank_loss_op.h index 7df195ff47ecfd79388385eed4bd37b8c9b45979..f184d6efcb496a1d7f38540712b6c431f816482e 100644 --- a/paddle/operators/rank_loss_op.h +++ b/paddle/operators/rank_loss_op.h @@ -21,7 +21,7 @@ namespace paddle { namespace operators { template -class RankLossKernel : public framework::OpKernel { +class RankLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* out_t = ctx.Output("Out"); @@ -42,7 +42,7 @@ class RankLossKernel : public framework::OpKernel { }; template -class RankLossGradKernel : public framework::OpKernel { +class RankLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* d_left_t = diff --git a/paddle/operators/reduce_op.h b/paddle/operators/reduce_op.h index 2fbf94e34f3961a9b3140fb682a7c479f3b71f4d..ba3f3db81dc6251a063d27e597fd7e486e7b6c14 100644 --- a/paddle/operators/reduce_op.h +++ b/paddle/operators/reduce_op.h @@ -87,7 +87,7 @@ struct MaxOrMinGradFunctor { }; template -class ReduceKernel : public framework::OpKernel { +class ReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { int rank = context.Input("X")->dims().size(); @@ -141,7 +141,7 @@ class ReduceKernel : public framework::OpKernel { }; template -class ReduceGradKernel : public framework::OpKernel { +class ReduceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { int rank = context.Input("X")->dims().size(); diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h index 873acf30782d390cdca5e7e864c76e1f743f9a7c..628dfe4c0fadcfeec188d8ae5049a994e3281bc1 100644 --- a/paddle/operators/reshape_op.h +++ b/paddle/operators/reshape_op.h @@ -21,7 +21,7 @@ namespace paddle { namespace operators { template -class ReshapeKernel : public framework::OpKernel { +class ReshapeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* out = ctx.Output("Out"); @@ -39,7 +39,7 @@ class ReshapeKernel : public framework::OpKernel { }; template -class ReshapeGradKernel : public framework::OpKernel { +class ReshapeGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* d_out = ctx.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 35774b940926f77167b8f19597027e74d3477e5b..b43e5d868b38350a74ca1a94880990da6d7da0bc 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -28,7 +28,7 @@ template ; template -class RowwiseAddKernel : public framework::OpKernel { +class RowwiseAddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto out = context.Output("Out"); @@ -50,7 +50,7 @@ class RowwiseAddKernel : public framework::OpKernel { }; template -class RowwiseAddGradKernel : public framework::OpKernel { +class RowwiseAddGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* dout = context.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/scale_op.h b/paddle/operators/scale_op.h index 02fbdc52bbf89c9f2acc5eeaa1197e4ccbca9d31..dc6bc768997f4fdd049bb63bdc11252ab52fcda9 100644 --- a/paddle/operators/scale_op.h +++ b/paddle/operators/scale_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { template -class ScaleKernel : public framework::OpKernel { +class ScaleKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& context) const { auto* tensor = context.Output("Out"); diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 3fc4a39ebc5526bfed61ba667c3cdc214cdd056c..cadd8841b6ab3a3674054240265eb6d4b474db1e 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -48,6 +48,11 @@ class ScatterOp : public framework::OperatorWithKernel { } ctx->SetOutputDim("Out", ref_dims); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("Ref")->type()); + } }; class ScatterGradOp : public framework::OperatorWithKernel { @@ -60,6 +65,11 @@ class ScatterGradOp : public framework::OperatorWithKernel { ctx->GetInputDim("Updates")); ctx->SetOutputDim(framework::GradVarName("Ref"), ctx->GetInputDim("Ref")); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("Ref")->type()); + } }; class ScatterOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/operators/scatter_op.h b/paddle/operators/scatter_op.h index e9595638a86a4a4536ddad4e6f20fd80a54b1608..a8eb54399a932913de208e1ddc90a6ff0dfaa452 100644 --- a/paddle/operators/scatter_op.h +++ b/paddle/operators/scatter_op.h @@ -24,7 +24,7 @@ namespace operators { using Tensor = framework::Tensor; template -class ScatterOpKernel : public framework::OpKernel { +class ScatterOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *Ref = ctx.Input("Ref"); @@ -40,7 +40,7 @@ class ScatterOpKernel : public framework::OpKernel { }; template -class ScatterGradientOpKernel : public framework::OpKernel { +class ScatterGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *dRef = ctx.Output(framework::GradVarName("Ref")); diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index cb80586e88f8d9e31b7b91a54f5e05ac6fa73f0f..752d714125578b2d1f926765b183495ec5cc203e 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -38,7 +38,7 @@ enum SeqPoolType { }; template -class SequencePoolKernel : public framework::OpKernel { +class SequencePoolKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); @@ -85,7 +85,7 @@ class SequencePoolKernel : public framework::OpKernel { }; template -class SequencePoolGradKernel : public framework::OpKernel { +class SequencePoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index f8888f9c362e1c39af42236bb3a23be37aa3ae15..a3fe3308942f98e2c28376b589b6fc930e6878a1 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -25,7 +25,7 @@ template ; template -class SGDOpKernel : public framework::OpKernel { +class SGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto param = ctx.Input("param"); diff --git a/paddle/operators/sigmoid_cross_entropy_with_logits_op.h b/paddle/operators/sigmoid_cross_entropy_with_logits_op.h index a6de9043fdbcdcca47407aac0b4892cbad3a9a42..41c619f181c878f08959a8ca461c60af5ffdff2a 100644 --- a/paddle/operators/sigmoid_cross_entropy_with_logits_op.h +++ b/paddle/operators/sigmoid_cross_entropy_with_logits_op.h @@ -21,7 +21,7 @@ namespace operators { // Out = max(X, 0) - X * Labels + log(1 + exp(-abs(X))) template -class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel { +class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { const framework::Tensor *X = context.Input("X"); @@ -48,7 +48,7 @@ class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel { // dX = sigmoid(X) - labels template -class SigmoidCrossEntropyWithLogitsGradKernel : public framework::OpKernel { +class SigmoidCrossEntropyWithLogitsGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { const framework::Tensor *X = context.Input("X"); diff --git a/paddle/operators/smooth_l1_loss_op.h b/paddle/operators/smooth_l1_loss_op.h index 0604fb5e1c2f17c702208520a1d23bd5c3c65b5d..39d0070b6c8909b8f433de48038240e851d9d6cf 100644 --- a/paddle/operators/smooth_l1_loss_op.h +++ b/paddle/operators/smooth_l1_loss_op.h @@ -45,7 +45,7 @@ struct SmoothL1LossForward { }; template -class SmoothL1LossKernel : public framework::OpKernel { +class SmoothL1LossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("X"); @@ -115,7 +115,7 @@ struct SmoothL1LossBackward { }; template -class SmoothL1LossGradKernel : public framework::OpKernel { +class SmoothL1LossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("InsideWeight"); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 7220f486be055e1b841a06b15f519717c54f575c..9996536454b1b6b992385787301faa6d66a4cd20 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -26,7 +26,7 @@ template ; template -class SoftmaxKernel : public framework::OpKernel { +class SoftmaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto X = context.Input("X"); @@ -40,7 +40,7 @@ class SoftmaxKernel : public framework::OpKernel { }; template -class SoftmaxGradKernel : public framework::OpKernel { +class SoftmaxGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto Y = context.Input("Y"); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index e2299b254458cdd42dee4683561d4d5c81653fb1..a76489871f30dc8d852b6a783efeff41704fd4a4 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/operators/softmax_with_cross_entropy_op.h" +#include namespace paddle { namespace operators { @@ -115,6 +116,11 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { ctx->ShareLoD("Logits", /*->*/ "Softmax"); ctx->ShareLoD("Logits", /*->*/ "Loss"); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("Logits")->type()); + } }; class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { @@ -149,6 +155,12 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("Logits"), ctx->GetInputDim("Softmax")); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType( + ctx.Input(framework::GradVarName("Loss"))->type()); + } }; } // namespace operators diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 1cf4296dccf68aece6fdfb7910a9c68449633b76..c3086e729e493228e06a176e1a64e5e95fad148b 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -53,7 +53,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad, } // namespace template -class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), @@ -73,7 +73,7 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel { }; template -class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index bf792c1f59e2e43a98c93bddbc2aa63d646dee6f..a8b18504e1c3a1d617b6040d2c68f24f1cb2787d 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -27,7 +27,7 @@ template ; template -class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE(platform::is_cpu_place(context.GetPlace()), @@ -47,7 +47,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { }; template -class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* out_grad = diff --git a/paddle/operators/split_op.h b/paddle/operators/split_op.h index 8ab8e0ee4fea621b34da73507c53846100d61a17..fa26e5f677b18c84b45dd583004d02cab4c1d375 100644 --- a/paddle/operators/split_op.h +++ b/paddle/operators/split_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { template -class SplitOpKernel : public framework::OpKernel { +class SplitOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index 097ac04fc09a10b3b624f491a847e281e41a802c..259ef4029646914f83a112b9c6d7fdf8401483f6 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -28,7 +28,7 @@ template ; template -class SquaredL2DistanceKernel : public framework::OpKernel { +class SquaredL2DistanceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("X"); @@ -68,7 +68,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel { }; template -class SquaredL2DistanceGradKernel : public framework::OpKernel { +class SquaredL2DistanceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in0 = context.Input("sub_result"); diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 0b1e9ebaa38d455fb5e3ce8c1a39cbbcdad9a940..7e8fbb9e41c694df9169ea583ce47c33d3bcf2bb 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -22,7 +22,7 @@ template ; template -class SumKernel : public framework::OpKernel { +class SumKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto ins = context.MultiInput("X"); @@ -43,7 +43,7 @@ class SumKernel : public framework::OpKernel { }; template -class SumGradKernel : public framework::OpKernel { +class SumGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input = context.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/top_k_op.cu b/paddle/operators/top_k_op.cu index 53fe505b77bfac8a33803f082f8e935d3ed403b6..7be6932f1e301d06e0e232367a38bfa673ff45be 100644 --- a/paddle/operators/top_k_op.cu +++ b/paddle/operators/top_k_op.cu @@ -279,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int* indices, } template -class TopkOpCUDAKernel : public framework::OpKernel { +class TopkOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), diff --git a/paddle/operators/top_k_op.h b/paddle/operators/top_k_op.h index ef66acc1d569282a42be64b7a5e90f3fbdb20690..4b248faa120bcfd20e70d288cce2d485d3e6371e 100644 --- a/paddle/operators/top_k_op.h +++ b/paddle/operators/top_k_op.h @@ -28,7 +28,7 @@ template ; template -class TopkKernel : public framework::OpKernel { +class TopkKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // Get the top k elements of each row of input tensor diff --git a/paddle/operators/transpose_op.h b/paddle/operators/transpose_op.h index ea299dce72ad340b0a65ee50582dc156b5ad7abb..aaa3f47ab5545accd4d1108e0ad6f5a3062186d0 100644 --- a/paddle/operators/transpose_op.h +++ b/paddle/operators/transpose_op.h @@ -38,7 +38,7 @@ void EigenTranspose(const framework::ExecutionContext& context, } template -class TransposeKernel : public framework::OpKernel { +class TransposeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* x = context.Input("X"); @@ -73,7 +73,7 @@ class TransposeKernel : public framework::OpKernel { }; template -class TransposeGradKernel : public framework::OpKernel { +class TransposeGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* out_grad = diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 2771df56086ff261728af84edcdf01cda3e45e9f..97b1d0bed4595cb750e4d2122f294f10edfbe0ff 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -21,7 +21,7 @@ namespace operators { // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. template -class CPUUniformRandomKernel : public framework::OpKernel { +class CPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* tensor = ctx.Output("Out"); @@ -62,6 +62,11 @@ class UniformRandomOp : public framework::OperatorWithKernel { } ctx->SetOutputDim("Out", framework::make_ddim(temp)); } + + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return static_cast(Attr("data_type")); + } }; class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { @@ -80,6 +85,8 @@ Used to initialize tensor with uniform random generator. "Random seed of uniform random. " "0 means generate a seed by system") .SetDefault(0); + AddAttr("data_type", "output tensor data type") + .SetDefault(framework::DataType::FP32); } }; } // namespace operators diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index 6614b53b3f990d10c82633f3c1f079acea0cd827..5612ce9eb1c644d6271b4a9bb949f685848e05c0 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -40,7 +40,7 @@ struct UniformGenerator { // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. template -class GPUUniformRandomKernel : public framework::OpKernel { +class GPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index b31515e1f028acac885a506ff1c20479407a05e3..856e54df89c1c18ade040957188a2fbda0901473 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -47,7 +47,7 @@ bool is_cpu_place(const Place &p) { } bool places_are_same_class(const Place &p1, const Place &p2) { - return is_gpu_place(p1) == is_gpu_place(p2); + return p1.which() == p2.which(); } std::ostream &operator<<(std::ostream &os, const Place &p) { diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 1117476bb37f1b0f3876c55e610803d5ee2558ce..0efc6932349a5b3ad295d195a16737a642e18943 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/platform/variant.h" namespace paddle { @@ -46,8 +47,18 @@ struct IsGPUPlace : public boost::static_visitor { bool operator()(const GPUPlace &gpu) const { return true; } }; +// Define the max number of Place in bit length. i.e., the max number of places +// should be less equal than 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) +#define NUM_PLACE_TYPE_LIMIT_IN_BIT 4 + typedef boost::variant Place; +// static check number of place types is less equal than +// 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) +BOOST_MPL_ASSERT((boost::mpl::less_equal< + Place::types::size, + boost::mpl::long_<1 << NUM_PLACE_TYPE_LIMIT_IN_BIT>>)); + void set_place(const Place &); const Place &get_place(); diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h index c2257af1b5dd1a1e284979bf17e1a947072baa85..16ee00efe7a9b0406f8459e19a55e1e1b9ca7419 100644 --- a/paddle/platform/variant.h +++ b/paddle/platform/variant.h @@ -29,4 +29,6 @@ #endif #endif +#include +#include #include diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index f0d5a6f9ff963ecd80d0c261daff56bff50663d4..10621e90eebf5cf197893a548c32d8b67af8e0b6 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -42,7 +42,7 @@ template struct CastToPyBufferImpl { using CUR_TYPE = typename std::tuple_element>::type; py::buffer_info operator()(framework::Tensor &tensor) { - if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { + if (std::type_index(typeid(CUR_TYPE)) == tensor.type()) { auto dim_vec = framework::vectorize(tensor.dims()); std::vector dims_outside; std::vector strides; @@ -56,13 +56,13 @@ struct CastToPyBufferImpl { prod *= dims_outside[i - 1]; } framework::Tensor dst_tensor; - if (paddle::platform::is_gpu_place(tensor.holder_->place())) { + if (paddle::platform::is_gpu_place(tensor.place())) { dst_tensor.CopyFrom(tensor, platform::CPUPlace()); - } else if (paddle::platform::is_cpu_place(tensor.holder_->place())) { + } else if (paddle::platform::is_cpu_place(tensor.place())) { dst_tensor = tensor; } return py::buffer_info( - dst_tensor.mutable_data(dst_tensor.holder_->place()), + dst_tensor.mutable_data(dst_tensor.place()), sizeof(CUR_TYPE), py::format_descriptor::format(), (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); } else {