提交 d1e67e86 编写于 作者: X xiaoli.liu@intel.com

modify quantize and dequantize related files to fix mkldnn library check issue

上级 ae4e9075
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
namespace paddle {
namespace operators {
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::reorder;
using platform::to_void_cast;
using Tensor = framework::Tensor;
using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
template <typename T>
class DeQuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine();
const T* input_data = input->data<T>();
float* output_data = output->mutable_data<float>(ctx.GetPlace());
std::vector<float> reorder_scale = {1.0f / scale_data};
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::format src_fmt = input->format();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::f32, memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_format(GetMKLDNNFormat(dst_memory));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace,
ops::DeQuantOpKernel<uint8_t>);
...@@ -12,74 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,74 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h" #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using mkldnn::memory; framework::OpKernelType DeQuantOp::GetExpectedKernelType(
using mkldnn::primitive; const framework::ExecutionContext& ctx) const {
using mkldnn::reorder;
using platform::to_void_cast;
using Tensor = framework::Tensor;
using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
template <typename T>
class DeQuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine();
const T* input_data = input->data<T>();
float* output_data = output->mutable_data<float>(ctx.GetPlace());
std::vector<float> reorder_scale = {1.0f / scale_data};
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::format src_fmt = input->format();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::f32, memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_format(GetMKLDNNFormat(dst_memory));
}
};
framework::OpKernelType DeQuantOp::GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format"); std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format); framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
...@@ -93,13 +35,15 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(const framework::Execut ...@@ -93,13 +35,15 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(const framework::Execut
#endif #endif
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("Input")->type()),ctx.GetPlace(),layout_, library_); framework::ToDataType(ctx.Input<framework::Tensor>("Input")->type()),
ctx.GetPlace(), layout_, library_);
} }
void DeQuantOpMaker::Make() { void DeQuantOpMaker::Make() {
AddInput("Input","input data"); AddInput("Input","input data");
AddOutput("Output","output data"); AddOutput("Output","output data");
AddAttr<float>("Scale","scale data").SetDefault({1.0f}); AddAttr<float>("Scale","scale data")
.SetDefault({1.0f});
AddComment(R"DOC(This op will quantize data from INT8 to FP32)DOC"); AddComment(R"DOC(This op will quantize data from INT8 to FP32)DOC");
} }
...@@ -108,7 +52,5 @@ void DeQuantOpMaker::Make() { ...@@ -108,7 +52,5 @@ void DeQuantOpMaker::Make() {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>); REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace, ops::DeQuantOpKernel<uint8_t>, ops::DeQuantOpKernel<int8_t>);
...@@ -28,15 +28,14 @@ class DeQuantOp : public framework::OperatorWithKernel { ...@@ -28,15 +28,14 @@ class DeQuantOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{ void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim("Output", ctx->GetInputDim("Input")); ctx->SetOutputDim("Output", ctx->GetInputDim("Input"));
ctx->ShareLoD("Input", /*->*/ "Output"); ctx->ShareLoD("Input", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override; const framework::ExecutionContext& ctx) const override;
}; };
class DeQuantOpMaker : public framework::OpProtoAndCheckerMaker { class DeQuantOpMaker : public framework::OpProtoAndCheckerMaker {
...@@ -53,4 +52,3 @@ class DeQuantGradOp : public framework::OperatorWithKernel { ...@@ -53,4 +52,3 @@ class DeQuantGradOp : public framework::OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/quantize_op.h"
namespace paddle {
namespace operators {
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::reorder;
using platform::to_void_cast;
using Tensor = framework::Tensor;
using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
template <typename T>
class QuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine();
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
const T* input_data = input->data<T>();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, memory::data_type::f32, input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
bool is_negative = ctx.Attr<bool>("is_negative_input");
mkldnn::memory::primitive_desc dst_pd;
std::shared_ptr<mkldnn::memory> dst_memory;
if (is_negative) {
int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace());
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::s8, memory::format::nhwc);
dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory.reset(new mkldnn::memory(dst_pd, to_void_cast<int8_t>(output_data)));
} else {
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::u8, memory::format::nhwc);
dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory.reset(new mkldnn::memory(dst_pd, to_void_cast<uint8_t>(output_data)));
}
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, *dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
//TODO Support FP32->S8 quantization.
REGISTER_OP_KERNEL(quantize, MKLDNN, ::paddle::platform::CPUPlace,
ops::QuantOpKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/quantize_op.h" #include "paddle/fluid/operators/quantize_op.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using mkldnn::memory; framework::OpKernelType QuantOp::GetExpectedKernelType(
using mkldnn::primitive; const framework::ExecutionContext& ctx) const {
using mkldnn::reorder;
using platform::to_void_cast;
using Tensor = framework::Tensor;
using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
template <typename T>
class QuantOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale");
auto* output = ctx.Output<Tensor>("Output");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine();
std::vector<primitive> pipeline;
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
const T* input_data = input->data<T>();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, memory::data_type::f32, input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
bool is_negative = ctx.Attr<bool>("is_negative_input");
mkldnn::memory::primitive_desc dst_pd;
std::shared_ptr<mkldnn::memory> dst_memory;
if (is_negative) {
int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace());
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::s8, memory::format::nhwc);
dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory.reset(new mkldnn::memory(dst_pd, to_void_cast<int8_t>(output_data)));
} else {
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
auto dst_md = platform::MKLDNNMemDesc(
{dst_tz}, memory::data_type::u8, memory::format::nhwc);
dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory.reset(new mkldnn::memory(dst_pd, to_void_cast<uint8_t>(output_data)));
}
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, *dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory));
}
};
framework::OpKernelType QuantOp::GetExpectedKernelType(const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format"); std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format); framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
...@@ -100,13 +35,13 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(const framework::Executio ...@@ -100,13 +35,13 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(const framework::Executio
#endif #endif
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("Input")->type()),ctx.GetPlace(),layout_, library_); framework::ToDataType(ctx.Input<framework::Tensor>("Input")->type()),
ctx.GetPlace(), layout_, library_);
} }
void QuantOpMaker::Make() { void QuantOpMaker::Make() {
AddInput("Input","input data"); AddInput("Input", "input data");
AddOutput("Output","output data"); AddOutput("Output", "output data");
AddAttr<bool>("is_negative_input", AddAttr<bool>("is_negative_input",
"(bool, default false) Only used in mkldnn INT8 kernel") "(bool, default false) Only used in mkldnn INT8 kernel")
.SetDefault(false); .SetDefault(false);
...@@ -119,12 +54,5 @@ void QuantOpMaker::Make() { ...@@ -119,12 +54,5 @@ void QuantOpMaker::Make() {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
//TODO Support FP32->S8 quantization. REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_KERNEL(quantize, MKLDNN, ::paddle::platform::CPUPlace, ops::QuantOpKernel<float>);
...@@ -28,28 +28,19 @@ class QuantOp : public framework::OperatorWithKernel { ...@@ -28,28 +28,19 @@ class QuantOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override{ void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim("Output", ctx->GetInputDim("Input")); ctx->SetOutputDim("Output", ctx->GetInputDim("Input"));
ctx->ShareLoD("Input", /*->*/ "Output"); ctx->ShareLoD("Input", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override; const framework::ExecutionContext& ctx) const override;
}; };
class QuantOpMaker : public framework::OpProtoAndCheckerMaker { class QuantOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override; void Make() override;
//void Make() {
// AddInput("Input","input");
// AddInput("Scale","scale...");
// AddOutput("Output","output");
//}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册