提交 1e2f7619 编写于 作者: H hjchen2

Refine: implement naive quantize and dequantize kernel

上级 a5360331
...@@ -7,8 +7,8 @@ option(DEBUGING "enable debug mode" ON) ...@@ -7,8 +7,8 @@ option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF) option(USE_EXCEPTION "use std exception" OFF)
option(LOG_PROFILE "log profile" OFF) option(LOG_PROFILE "log profile" OFF)
# select the platform to build # select the platform to build
option(X86 "x86" OFF) option(X86 "x86" ON)
option(CPU "armv7 with neon" ON) option(CPU "armv7 with neon" OFF)
option(MALI_GPU "mali gpu" OFF) option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF) option(FPGA "fpga" OFF)
......
...@@ -64,6 +64,9 @@ const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp"; ...@@ -64,6 +64,9 @@ const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_FLATTEN = "flatten"; const char *G_OP_TYPE_FLATTEN = "flatten";
const char *G_OP_TYPE_SHAPE = "shape"; const char *G_OP_TYPE_SHAPE = "shape";
const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
std::unordered_map< std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = { op_input_output_key = {
...@@ -112,6 +115,9 @@ std::unordered_map< ...@@ -112,6 +115,9 @@ std::unordered_map<
{G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}}, {G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}},
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}}, {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
{G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}}; {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
{G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
{G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}
};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -82,8 +82,9 @@ enum PMStatus { ...@@ -82,8 +82,9 @@ enum PMStatus {
enum RoundType { enum RoundType {
ROUND_UNK = 0, ROUND_UNK = 0,
ROUND_NEAREST_TOWARDS_ZERO = 1, ROUND_NEAREST_AWAY_ZERO = 1,
ROUND_NEAREST_TO_EVEN = 2 ROUND_NEAREST_TOWARDS_ZERO = 2,
ROUND_NEAREST_TO_EVEN = 3
}; };
extern const char *G_OP_TYPE_CONV; extern const char *G_OP_TYPE_CONV;
...@@ -127,6 +128,9 @@ extern const char *G_OP_TYPE_FUSION_CONV_BN; ...@@ -127,6 +128,9 @@ extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE; extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU; extern const char *G_OP_TYPE_PRELU;
extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key; op_input_output_key;
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_MOBILE_CPU
#include "operators/kernel/dequantize_kernel.h" #include "operators/kernel/dequantize_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -26,7 +28,18 @@ template<> ...@@ -26,7 +28,18 @@ template<>
void DequantizeKernel<CPU, float>::Compute( void DequantizeKernel<CPU, float>::Compute(
const DequantizeParam<CPU> &param) const { const DequantizeParam<CPU> &param) const {
// TODO // TODO
const Tensor *input = param.input_;
Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>();
float *y = output->mutable_data<float>();
for (size_t i = 0; i < output->numel(); ++i) {
y[i] = x[i] / activation_scale / weight_scale;
}
} }
} // namespace paddle_mobile } // namespace paddle_mobile
} // namespace operators } // namespace operators
#endif
...@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_MOBILE_CPU
#include "operators/kernel/quantize_kernel.h" #include "operators/kernel/quantize_kernel.h"
#include <cmath>
#include <limits>
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
static float find_abs_max(const Tensor *input) {
float max_abs = float(0);
const float *x = input->data<const float>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = std::abs(x[i]);
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
static void quantize_round_to_even(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = x[i] * scale;
long long quant = llround(value);
if (abs(abs(round(value) - value) - 0.5) > 0) {
y[i] = quant;
} else {
if (abs(quant) % 2 == 0) {
y[i] = quant;
} else {
y[i] = quant + (quant > 0) ? -1 : 1;
}
}
}
}
static void quantize_round_to_zero(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = trunc(x[i] * scale);
}
}
static void quantize_round_to_nearest(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = round(x[i] * scale);
}
}
template<> template<>
bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) { bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
return true; return true;
...@@ -26,7 +82,37 @@ template<> ...@@ -26,7 +82,37 @@ template<>
void QuantizeKernel<CPU, float>::Compute( void QuantizeKernel<CPU, float>::Compute(
const QuantizeParam<CPU> &param) const { const QuantizeParam<CPU> &param) const {
// TODO // TODO
float max_abs = 0.f;
const Tensor *input = param.input_;
Tensor *output = param.out_;
Tensor *output_scale = param.online_scale_;
if (param.is_static_) {
max_abs = param.static_scale_;
} else {
max_abs = find_abs_max(input);
}
if (max_abs < std::numeric_limits<float>::min()) {
max_abs = std::numeric_limits<float>::min();
}
// only support int8 currently
float online_scale = 127 / max_abs;
param.online_scale_->mutable_data<float>()[0] = online_scale;
switch (param.round_type_) {
case ROUND_NEAREST_TO_EVEN:
quantize_round_to_even(input, online_scale, output);
break;
case ROUND_NEAREST_TOWARDS_ZERO:
quantize_round_to_zero(input, online_scale, output);
break;
case ROUND_NEAREST_AWAY_ZERO:
quantize_round_to_nearest(input, online_scale, output);
default:
LOG(kLOG_ERROR) << "round type is not supported.";
break;
}
} }
} // namespace paddle_mobile } // namespace paddle_mobile
} // namespace operators } // namespace operators
#endif
...@@ -26,6 +26,15 @@ template<> ...@@ -26,6 +26,15 @@ template<>
void DequantizeKernel<X86, float>::Compute( void DequantizeKernel<X86, float>::Compute(
const DequantizeParam<X86> &param) const { const DequantizeParam<X86> &param) const {
// TODO // TODO
const Tensor *input = param.input_;
Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>();
float *y = output->mutable_data<float>();
for (size_t i = 0; i < output->numel(); ++i) {
y[i] = x[i] / activation_scale / weight_scale;
}
} }
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_MOBILE_X86
#include "operators/kernel/quantize_kernel.h" #include "operators/kernel/quantize_kernel.h"
#include <cmath>
#include <limits>
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
static float find_abs_max(const Tensor *input) {
float max_abs = float(0);
const float *x = input->data<const float>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = std::abs(x[i]);
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
static void quantize_round_to_even(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = x[i] * scale;
long long quant = llround(value);
if (abs(abs(round(value) - value) - 0.5) > 0) {
y[i] = quant;
} else {
if (abs(quant) % 2 == 0) {
y[i] = quant;
} else {
y[i] = quant + (quant > 0) ? -1 : 1;
}
}
}
}
static void quantize_round_to_zero(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = trunc(x[i] * scale);
}
}
static void quantize_round_to_nearest(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = round(x[i] * scale);
}
}
template<> template<>
bool QuantizeKernel<X86, float>::Init(QuantizeParam<X86> *param) { bool QuantizeKernel<X86, float>::Init(QuantizeParam<X86> *param) {
return true; return true;
...@@ -26,7 +82,37 @@ template<> ...@@ -26,7 +82,37 @@ template<>
void QuantizeKernel<X86, float>::Compute( void QuantizeKernel<X86, float>::Compute(
const QuantizeParam<X86> &param) const { const QuantizeParam<X86> &param) const {
// TODO // TODO
float max_abs = 0.f;
const Tensor *input = param.input_;
Tensor *output = param.out_;
Tensor *output_scale = param.online_scale_;
if (param.is_static_) {
max_abs = param.static_scale_;
} else {
max_abs = find_abs_max(input);
}
if (max_abs < std::numeric_limits<float>::min()) {
max_abs = std::numeric_limits<float>::min();
}
// only support int8 currently
float online_scale = 127 / max_abs;
param.online_scale_->mutable_data<float>()[0] = online_scale;
switch (param.round_type_) {
case ROUND_NEAREST_TO_EVEN:
quantize_round_to_even(input, online_scale, output);
break;
case ROUND_NEAREST_TOWARDS_ZERO:
quantize_round_to_zero(input, online_scale, output);
break;
case ROUND_NEAREST_AWAY_ZERO:
quantize_round_to_nearest(input, online_scale, output);
default:
LOG(kLOG_ERROR) << "round type is not supported.";
break;
}
} }
} // namespace paddle_mobile } // namespace paddle_mobile
} // namespace operators } // namespace operators
#endif
...@@ -2349,12 +2349,6 @@ class QuantizeParam : public OpParam { ...@@ -2349,12 +2349,6 @@ class QuantizeParam : public OpParam {
// online // online
// scale = max(abs(x)) // scale = max(abs(x))
online_scale_ = GetVarValue<GType>("OutScale", outputs, scope); online_scale_ = GetVarValue<GType>("OutScale", outputs, scope);
if (HasAttr("is_signed", attrs)) {
is_signed_ = GetAttr<bool>("signed", attrs);
}
if (HasAttr("mantissa", attrs)) {
mantissa_bits_ = GetAttr<bool>("mantissa", attrs);
}
// offline // offline
if (HasAttr("static_scale", attrs)) { if (HasAttr("static_scale", attrs)) {
static_scale_ = GetAttr<float>("static_scale", attrs); static_scale_ = GetAttr<float>("static_scale", attrs);
...@@ -2372,11 +2366,6 @@ class QuantizeParam : public OpParam { ...@@ -2372,11 +2366,6 @@ class QuantizeParam : public OpParam {
RType *out_; RType *out_;
// //
RType *online_scale_; RType *online_scale_;
// signed quantize or unsigned quantize
bool is_signed_ = true;
// mantissa bit width
// for int8, mantissa bits is 7
int mantissa_bits_ = 7;
// if static scale or not // if static scale or not
bool is_static_ = false; bool is_static_ = false;
// quantize scale // quantize scale
......
...@@ -17,8 +17,11 @@ limitations under the License. */ ...@@ -17,8 +17,11 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
#if defined(PADDLE_MOBILE_CPU)
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#elif defined(PADDLE_MOBILE_X86)
paddle_mobile::PaddleMobile<paddle_mobile::X86> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::X86> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(4); paddle_mobile.SetThreadNum(4);
bool optimize = true; bool optimize = true;
auto time1 = time(); auto time1 = time();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册