提交 1e2f7619 编写于 作者: H hjchen2

Refine: implement naive quantize and dequantize kernel

上级 a5360331
......@@ -7,8 +7,8 @@ option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF)
option(LOG_PROFILE "log profile" OFF)
# select the platform to build
option(X86 "x86" OFF)
option(CPU "armv7 with neon" ON)
option(X86 "x86" ON)
option(CPU "armv7 with neon" OFF)
option(MALI_GPU "mali gpu" OFF)
option(FPGA "fpga" OFF)
......
......@@ -64,6 +64,9 @@ const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_FLATTEN = "flatten";
const char *G_OP_TYPE_SHAPE = "shape";
const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {
......@@ -112,6 +115,9 @@ std::unordered_map<
{G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}},
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
{G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
{G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
{G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}}
};
} // namespace paddle_mobile
......@@ -82,8 +82,9 @@ enum PMStatus {
enum RoundType {
ROUND_UNK = 0,
ROUND_NEAREST_TOWARDS_ZERO = 1,
ROUND_NEAREST_TO_EVEN = 2
ROUND_NEAREST_AWAY_ZERO = 1,
ROUND_NEAREST_TOWARDS_ZERO = 2,
ROUND_NEAREST_TO_EVEN = 3
};
extern const char *G_OP_TYPE_CONV;
......@@ -127,6 +128,9 @@ extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU;
extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE;
extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key;
......
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_MOBILE_CPU
#include "operators/kernel/dequantize_kernel.h"
namespace paddle_mobile {
......@@ -26,7 +28,18 @@ template<>
void DequantizeKernel<CPU, float>::Compute(
const DequantizeParam<CPU> &param) const {
// TODO
const Tensor *input = param.input_;
Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>();
float *y = output->mutable_data<float>();
for (size_t i = 0; i < output->numel(); ++i) {
y[i] = x[i] / activation_scale / weight_scale;
}
}
} // namespace paddle_mobile
} // namespace operators
#endif
......@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_MOBILE_CPU
#include "operators/kernel/quantize_kernel.h"
#include <cmath>
#include <limits>
namespace paddle_mobile {
namespace operators {
static float find_abs_max(const Tensor *input) {
float max_abs = float(0);
const float *x = input->data<const float>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = std::abs(x[i]);
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
static void quantize_round_to_even(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = x[i] * scale;
long long quant = llround(value);
if (abs(abs(round(value) - value) - 0.5) > 0) {
y[i] = quant;
} else {
if (abs(quant) % 2 == 0) {
y[i] = quant;
} else {
y[i] = quant + (quant > 0) ? -1 : 1;
}
}
}
}
static void quantize_round_to_zero(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = trunc(x[i] * scale);
}
}
static void quantize_round_to_nearest(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = round(x[i] * scale);
}
}
template<>
bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
return true;
......@@ -26,7 +82,37 @@ template<>
void QuantizeKernel<CPU, float>::Compute(
const QuantizeParam<CPU> &param) const {
// TODO
float max_abs = 0.f;
const Tensor *input = param.input_;
Tensor *output = param.out_;
Tensor *output_scale = param.online_scale_;
if (param.is_static_) {
max_abs = param.static_scale_;
} else {
max_abs = find_abs_max(input);
}
if (max_abs < std::numeric_limits<float>::min()) {
max_abs = std::numeric_limits<float>::min();
}
// only support int8 currently
float online_scale = 127 / max_abs;
param.online_scale_->mutable_data<float>()[0] = online_scale;
switch (param.round_type_) {
case ROUND_NEAREST_TO_EVEN:
quantize_round_to_even(input, online_scale, output);
break;
case ROUND_NEAREST_TOWARDS_ZERO:
quantize_round_to_zero(input, online_scale, output);
break;
case ROUND_NEAREST_AWAY_ZERO:
quantize_round_to_nearest(input, online_scale, output);
default:
LOG(kLOG_ERROR) << "round type is not supported.";
break;
}
}
} // namespace paddle_mobile
} // namespace operators
#endif
......@@ -26,6 +26,15 @@ template<>
void DequantizeKernel<X86, float>::Compute(
const DequantizeParam<X86> &param) const {
// TODO
const Tensor *input = param.input_;
Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>();
float *y = output->mutable_data<float>();
for (size_t i = 0; i < output->numel(); ++i) {
y[i] = x[i] / activation_scale / weight_scale;
}
}
} // namespace paddle_mobile
......
......@@ -12,11 +12,67 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_MOBILE_X86
#include "operators/kernel/quantize_kernel.h"
#include <cmath>
#include <limits>
namespace paddle_mobile {
namespace operators {
static float find_abs_max(const Tensor *input) {
float max_abs = float(0);
const float *x = input->data<const float>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = std::abs(x[i]);
if (value > max_abs) {
max_abs = value;
}
}
return max_abs;
}
static void quantize_round_to_even(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
float value = x[i] * scale;
long long quant = llround(value);
if (abs(abs(round(value) - value) - 0.5) > 0) {
y[i] = quant;
} else {
if (abs(quant) % 2 == 0) {
y[i] = quant;
} else {
y[i] = quant + (quant > 0) ? -1 : 1;
}
}
}
}
static void quantize_round_to_zero(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = trunc(x[i] * scale);
}
}
static void quantize_round_to_nearest(const Tensor *input,
const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
for (size_t i = 0; i < input->numel(); ++i) {
y[i] = round(x[i] * scale);
}
}
template<>
bool QuantizeKernel<X86, float>::Init(QuantizeParam<X86> *param) {
return true;
......@@ -26,7 +82,37 @@ template<>
void QuantizeKernel<X86, float>::Compute(
const QuantizeParam<X86> &param) const {
// TODO
float max_abs = 0.f;
const Tensor *input = param.input_;
Tensor *output = param.out_;
Tensor *output_scale = param.online_scale_;
if (param.is_static_) {
max_abs = param.static_scale_;
} else {
max_abs = find_abs_max(input);
}
if (max_abs < std::numeric_limits<float>::min()) {
max_abs = std::numeric_limits<float>::min();
}
// only support int8 currently
float online_scale = 127 / max_abs;
param.online_scale_->mutable_data<float>()[0] = online_scale;
switch (param.round_type_) {
case ROUND_NEAREST_TO_EVEN:
quantize_round_to_even(input, online_scale, output);
break;
case ROUND_NEAREST_TOWARDS_ZERO:
quantize_round_to_zero(input, online_scale, output);
break;
case ROUND_NEAREST_AWAY_ZERO:
quantize_round_to_nearest(input, online_scale, output);
default:
LOG(kLOG_ERROR) << "round type is not supported.";
break;
}
}
} // namespace paddle_mobile
} // namespace operators
#endif
......@@ -2349,12 +2349,6 @@ class QuantizeParam : public OpParam {
// online
// scale = max(abs(x))
online_scale_ = GetVarValue<GType>("OutScale", outputs, scope);
if (HasAttr("is_signed", attrs)) {
is_signed_ = GetAttr<bool>("signed", attrs);
}
if (HasAttr("mantissa", attrs)) {
mantissa_bits_ = GetAttr<bool>("mantissa", attrs);
}
// offline
if (HasAttr("static_scale", attrs)) {
static_scale_ = GetAttr<float>("static_scale", attrs);
......@@ -2372,11 +2366,6 @@ class QuantizeParam : public OpParam {
RType *out_;
//
RType *online_scale_;
// signed quantize or unsigned quantize
bool is_signed_ = true;
// mantissa bit width
// for int8, mantissa bits is 7
int mantissa_bits_ = 7;
// if static scale or not
bool is_static_ = false;
// quantize scale
......
......@@ -17,8 +17,11 @@ limitations under the License. */
#include "../test_include.h"
int main() {
#if defined(PADDLE_MOBILE_CPU)
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#elif defined(PADDLE_MOBILE_X86)
paddle_mobile::PaddleMobile<paddle_mobile::X86> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(4);
bool optimize = true;
auto time1 = time();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册