提交 1f3300ba 编写于 作者: X xiaolil1

modify quantization op name and fmt source

上级 350a58b0
......@@ -278,7 +278,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto* bias = ctx.HasInput("Bias") ? ctx.Input<Tensor>("Bias") : nullptr;
auto* output = ctx.Output<Tensor>("Output");
bool is_INT8 = ctx.HasInput("Bias")? true : false;
bool is_INT8 = ctx.HasInput("Scale_in")? true : false;
auto* scale_in = ctx.HasInput("Scale_in") ? ctx.Input<Tensor>("Scale_in") : nullptr;
auto* scale_in_eltwise = ctx.HasInput("Scale_in_eltwise")? ctx.Input<Tensor>("Scale_in_eltwise") : nullptr;
auto* scale_weights = ctx.HasInput("Scale_weights")? ctx.Input<Tensor>("Scale_weights") : nullptr;
......@@ -478,7 +478,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (fuse_relu) {
constexpr float scale = 1.0f;
constexpr float negative_slope = 0.0f;
constexpr float placeholder = 0.0f;
constexpr float placeholder = 0.0f; //beta
post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu,
negative_slope, placeholder);
}
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/dequantization_op.h"
#include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
namespace paddle {
......@@ -36,29 +36,6 @@ template <typename DeviceContext, typename T>
class DeQuantOpKernel : public framework::OpKernel<T> {
public:
// MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
// static const std::map<std::type_index, MKLDNNDataType> dict{
// {std::type_index(typeid(float)), MKLDNNDataType::f32}, // NOLINT
// {std::type_index(typeid(char)), MKLDNNDataType::s8}, // NOLINT
// {std::type_index(typeid(unsigned char)), MKLDNNDataType::u8},
// {std::type_index(typeid(int16_t)), MKLDNNDataType::s16},
// {std::type_index(typeid(int32_t)), MKLDNNDataType::s32}};
// auto iter = dict.find(type);
// if (iter != dict.end()) return iter->second;
// return MKLDNNDataType::data_undef;
// }
//mkldnn::memory::data_type ToMKLDNNDataType(const std::type_index type) const override{
// static const std::map<std::type_index, mkldnn::memory::data_type> dict{
// {std::type_index(typeid(float)), mkldnn::memory::data_type::f32}, // NOLINT
// {std::type_index(typeid(char)), mkldnn::memory::data_type::s8}, // NOLINT
// {std::type_index(typeid(unsigned char)), mkldnn::memory::data_type::u8},
// {std::type_index(typeid(int16_t)), mkldnn::memory::data_type::s16},
// {std::type_index(typeid(int32_t)), mkldnn::memory::data_type::s32}};
// auto iter = dict.find(type);
// if (iter != dict.end()) return iter->second;
// return mkldnn::memory::data_type::data_undef;
//}
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input");
auto* scale = ctx.Input<Tensor>("Scale");
......@@ -77,15 +54,14 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::format src_fmt = input->format();
mkldnn::memory::format src_fmt = memory::format::nhwc;//input->format();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, scale_data);
//attri.set_int_output_round_mode(round_nearest); //FIX ME
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, src_dt, src_fmt); //FIX ME WITH S8
{src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc{src_md, engine};
auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
......@@ -130,8 +106,8 @@ This op will quantize data from INT8 to FP32
namespace ops = paddle::operators;
REGISTER_OPERATOR(dequantization, ops::DeQuantOp, ops::DeQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_CPU_KERNEL(dequantization, ops::DeQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(dequantize, ops::DeQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/quantization_op.h"
#include "paddle/fluid/operators/quantize_op.h"
namespace paddle {
namespace operators {
......@@ -53,7 +53,6 @@ class QuantOpKernel : public framework::OpKernel<T> {
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, scale_data);
//attri.set_int_output_round_mode(round_nearest); //FIX ME
auto src_md = platform::MKLDNNMemDesc(
{src_tz}, memory::data_type::f32, input->format());
......@@ -102,9 +101,9 @@ This op will quantize data from FP32 to INT8
namespace ops = paddle::operators;
REGISTER_OPERATOR(quantization, ops::QuantOp, ops::QuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_CPU_KERNEL(quantization, ops::QuantOpKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(quantize, ops::QuantOpKernel<paddle::platform::CPUDeviceContext, float>);
//REGISTER_OP_KERNEL(quantization, MKLDNN, paddle::platform::CPUPlace, ops::QuantOpKernel<paddle::platform::CPUDeviceContext, float>);
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/requantization_op.h"
#include "paddle/fluid/operators/requantize_op.h"
#include "paddle/fluid/framework/data_layout_transform.h"
namespace paddle {
......@@ -48,8 +48,8 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::data_type dst_dt = paddle::framework::ToMKLDNNDataType(output->type());
mkldnn::memory::format src_fmt = input->format();
mkldnn::memory::format dst_fmt = output->format();
mkldnn::memory::format src_fmt = memory::format::nhwc;//input->format();
mkldnn::memory::format dst_fmt = memory::format::nhwc;//output->format();
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
......@@ -107,7 +107,7 @@ This op will requantize data from INT8 to INT8
namespace ops = paddle::operators;
REGISTER_OPERATOR(requantization, ops::ReQuantOp, ops::ReQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(requantize, ops::ReQuantOp, ops::ReQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OP_CPU_KERNEL(requantization, ops::ReQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(requantize, ops::ReQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册