enable conv_int8 and quantization

cc50f7d5 · xiaolil1 · 606dfb13 · cc50f7d5 · cc50f7d5 · cc50f7d5
8 changed file
--- a/paddle/fluid/operators/conv_int8_op.cc
+++ b/paddle/fluid/operators/conv_int8_op.cc
--- a/paddle/fluid/operators/conv_int8_op.h
+++ b/paddle/fluid/operators/conv_int8_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+using framework::OpKernelType;
+class Convint8Op : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override{}
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
+};
+class Convint8OpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override;
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/operators/dequantization_op.cc
+++ b/paddle/fluid/operators/dequantization_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "mkldnn.hpp"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/operators/dequantization_op.h"
+#include "paddle/fluid/framework/data_layout_transform.h"
+namespace paddle {
+namespace operators {
+using mkldnn::memory;
+using mkldnn::primitive;
+using mkldnn::reorder;
+using platform::to_void_cast;
+using Tensor = framework::Tensor;
+using framework::DataLayout;
+using mkldnn::stream;
+using platform::GetMKLDNNFormat;
+//using MKLDNNDataType = mkldnn::memory::data_type;
+template <typename DeviceContext, typename T>
+class DeQuantOpKernel : public framework::OpKernel<T> {
+ public:
+ // MKLDNNDataType ToMKLDNNDataType(const std::type_index type) {
+ //   static const std::map<std::type_index, MKLDNNDataType> dict{
+ //       {std::type_index(typeid(float)), MKLDNNDataType::f32},  // NOLINT
+ //       {std::type_index(typeid(char)), MKLDNNDataType::s8},    // NOLINT
+ //       {std::type_index(typeid(unsigned char)), MKLDNNDataType::u8},
+ //       {std::type_index(typeid(int16_t)), MKLDNNDataType::s16},
+ //       {std::type_index(typeid(int32_t)), MKLDNNDataType::s32}};
+ //   auto iter = dict.find(type);
+ //   if (iter != dict.end()) return iter->second;
+ //   return MKLDNNDataType::data_undef;
+ // }
+  //mkldnn::memory::data_type ToMKLDNNDataType(const std::type_index type) const override{
+  //  static const std::map<std::type_index, mkldnn::memory::data_type> dict{
+  //      {std::type_index(typeid(float)), mkldnn::memory::data_type::f32},  // NOLINT
+  //      {std::type_index(typeid(char)), mkldnn::memory::data_type::s8},    // NOLINT
+  //      {std::type_index(typeid(unsigned char)), mkldnn::memory::data_type::u8},
+  //      {std::type_index(typeid(int16_t)), mkldnn::memory::data_type::s16},
+  //      {std::type_index(typeid(int32_t)), mkldnn::memory::data_type::s32}};
+  //  auto iter = dict.find(type);
+  //  if (iter != dict.end()) return iter->second;
+  //  return mkldnn::memory::data_type::data_undef;
+  //}
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<Tensor>("Input");
+    auto* scale = ctx.Input<Tensor>("Scale");
+    auto* output = ctx.Output<Tensor>("Output");
+    auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& engine = dev_ctx.GetEngine();
+    const T* input_data = input->data<T>();
+    T* output_data = output->mutable_data<T>(ctx.GetPlace());
+    //T scale_data = *(scale->data<T>());
+    std::vector<T> scale_data = {*(scale->data<T>())};
+    std::vector<primitive> pipeline;
+    std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
+    std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
+    mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
+    mkldnn::memory::format src_fmt = input->format();    
+    mkldnn::primitive_attr attri;
+    int mask = 0;
+    attri.set_output_scales(mask, scale_data);
+    //attri.set_int_output_round_mode(round_nearest); //FIX ME
+    auto src_md = platform::MKLDNNMemDesc(
+            {src_tz}, src_dt, src_fmt); //FIX ME WITH S8
+    auto src_pd = mkldnn::memory::primitive_desc{src_md, engine};
+    auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
+    std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
+    auto dst_md = platform::MKLDNNMemDesc(
+            {dst_tz}, memory::data_type::f32, memory::format::nchw);
+    auto dst_pd = mkldnn::memory::primitive_desc{dst_md, engine};
+    auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<T>(output_data));
+    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
+        new reorder::primitive_desc(dst_pd, src_pd, attri));    
+    auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
+    pipeline.push_back(*reorder_p);
+  }
+};
+framework::OpKernelType DeQuantOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<framework::LoDTensor>("Input")->type()),
+      ctx.device_context());
+}
+void DeQuantOpMaker::Make() {
+  AddInput("Input","input");
+  AddInput("Scale","scale...");
+  AddOutput("Output","output");
+}
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(dequantization, ops::DeQuantOp, ops::DeQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
+REGISTER_OP_CPU_KERNEL(dequantization, ops::DeQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
--- a/paddle/fluid/operators/dequantization_op.h
+++ b/paddle/fluid/operators/dequantization_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+using framework::OpKernelType;
+using framework::Tensor;
+class DeQuantOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {}
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
+};
+class DeQuantOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override;
+};
+class DeQuantGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {}
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/operators/quantization_op.cc
+++ b/paddle/fluid/operators/quantization_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "mkldnn.hpp"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/operators/quantization_op.h"
+namespace paddle {
+namespace operators {
+using mkldnn::memory;
+using mkldnn::primitive;
+using mkldnn::reorder;
+using platform::to_void_cast;
+using Tensor = framework::Tensor;
+using framework::DataLayout;
+using mkldnn::stream;
+using platform::GetMKLDNNFormat;
+template <typename DeviceContext, typename T>
+class QuantOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<Tensor>("Input");
+    auto* scale = ctx.Input<Tensor>("Scale");
+    auto* output = ctx.Output<Tensor>("Output");
+    auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& engine = dev_ctx.GetEngine();
+    std::vector<primitive> pipeline;
+    std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
+    std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
+    const T* input_data = input->data<T>();
+    T* output_data = output->mutable_data<T>(ctx.GetPlace());
+    std::vector<T> scale_data = {*(scale->data<T>())};
+    mkldnn::primitive_attr attri;
+    int mask = 0;
+    attri.set_output_scales(mask, scale_data);
+    //attri.set_int_output_round_mode(round_nearest); //FIX ME
+    auto src_md = platform::MKLDNNMemDesc(
+            {src_tz}, memory::data_type::f32, input->format());
+    auto src_pd = mkldnn::memory::primitive_desc{src_md, engine};
+    auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
+    std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
+    auto dst_md = platform::MKLDNNMemDesc(
+            {dst_tz}, memory::data_type::u8, memory::format::nhwc);
+    auto dst_pd = mkldnn::memory::primitive_desc{dst_md, engine};
+    auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<T>(output_data));
+    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
+        new reorder::primitive_desc(dst_pd, src_pd, attri));    
+    auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
+    pipeline.push_back(*reorder_p);
+  }
+};
+framework::OpKernelType QuantOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<framework::LoDTensor>("Input")->type()),
+      ctx.device_context());
+}
+void QuantOpMaker::Make() {
+  AddInput("Input","input");
+  AddInput("Scale","scale...");
+  AddOutput("Output","output");
+}
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(quantization, ops::QuantOp, ops::QuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
+REGISTER_OP_CPU_KERNEL(quantization, ops::QuantOpKernel<paddle::platform::CPUDeviceContext, float>);
+//REGISTER_OP_KERNEL(quantization, MKLDNN, paddle::platform::CPUPlace, ops::QuantOpKernel<paddle::platform::CPUDeviceContext, float>);
--- a/paddle/fluid/operators/quantization_op.h
+++ b/paddle/fluid/operators/quantization_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+using framework::OpKernelType;
+using framework::Tensor;
+class QuantOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override{} 
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
+};
+class QuantOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override;
+//void Make() {
+//  AddInput("Input","input");
+//  AddInput("Scale","scale...");
+//  AddOutput("Output","output");
+//} 
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/operators/requantization_op.cc
+++ b/paddle/fluid/operators/requantization_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "mkldnn.hpp"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/operators/requantization_op.h"
+#include "paddle/fluid/framework/data_layout_transform.h"
+namespace paddle {
+namespace operators {
+using mkldnn::memory;
+using mkldnn::primitive;
+using mkldnn::reorder;
+using platform::to_void_cast;
+using Tensor = framework::Tensor;
+using framework::DataLayout;
+using mkldnn::stream;
+using platform::GetMKLDNNFormat;
+template <typename DeviceContext, typename T>
+class ReQuantOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<Tensor>("Input");
+    auto* scale = ctx.Input<Tensor>("Scale");
+    auto* output = ctx.Output<Tensor>("Output");
+    auto& dev_ctx =
+        ctx.template device_context<platform::MKLDNNDeviceContext>();
+    const auto& engine = dev_ctx.GetEngine();
+    std::vector<primitive> pipeline;
+    std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
+    std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
+    mkldnn::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(input->type());
+    mkldnn::memory::data_type dst_dt = paddle::framework::ToMKLDNNDataType(output->type());
+    mkldnn::memory::format src_fmt = input->format();
+    mkldnn::memory::format dst_fmt = output->format();
+    const T* input_data = input->data<T>();
+    T* output_data = output->mutable_data<T>(ctx.GetPlace());
+    //T scale_data = *(scale->data<T>());
+    std::vector<T> scale_data = {*(scale->data<T>())};
+    mkldnn::primitive_attr attri;
+    int mask = 0;
+    attri.set_output_scales(mask, scale_data);
+    //attri.set_int_output_round_mode(round_nearest); //FIX ME
+    auto src_md = platform::MKLDNNMemDesc(
+            {src_tz}, src_dt, src_fmt); //FIX ME WITH S8
+    auto src_pd = mkldnn::memory::primitive_desc{src_md, engine};
+    auto src_memory = std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
+    std::shared_ptr<primitive::at> src_memory_p = std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
+    auto dst_md = platform::MKLDNNMemDesc(
+            {dst_tz}, dst_dt, dst_fmt);
+    auto dst_pd = mkldnn::memory::primitive_desc{dst_md, engine};
+    auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<T>(output_data));
+    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
+        new reorder::primitive_desc(dst_pd, src_pd, attri));    
+    auto reorder_p= std::shared_ptr<reorder>(new reorder(*reorder_pd, *src_memory_p, dst_memory));
+    pipeline.push_back(*reorder_p);
+  }
+};
+framework::OpKernelType ReQuantOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<framework::LoDTensor>("Input")->type()),
+      ctx.device_context());
+}
+void ReQuantOpMaker::Make() {
+  AddInput("Input","input");
+  AddInput("Scale","scale...");
+  AddOutput("Output","output");
+}
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(requantization, ops::ReQuantOp, ops::ReQuantOpMaker, paddle::framework::DefaultGradOpDescMaker<true>);
+REGISTER_OP_CPU_KERNEL(requantization, ops::ReQuantOpKernel<paddle::platform::CPUDeviceContext, float>);
--- a/paddle/fluid/operators/requantization_op.h
+++ b/paddle/fluid/operators/requantization_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+using framework::OpKernelType;
+using framework::Tensor;
+class ReQuantOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {}
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
+};
+class ReQuantOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override;
+};
+}  // namespace operators
+}  // namespace paddle