提交 af034d37 编写于 作者: H hjchen2

Merge branch 'dev-latest' of https://github.com/hjchen2/paddle-mobile into dev-latest

...@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io/executor.h" #include "io/executor.h"
#include <operators/math/gemm.h>
#include <algorithm> #include <algorithm>
#include <utility>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/log.h" #include "common/log.h"
...@@ -26,7 +26,7 @@ limitations under the License. */ ...@@ -26,7 +26,7 @@ limitations under the License. */
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "operators/math/gemm.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -34,9 +34,8 @@ using framework::Variable; ...@@ -34,9 +34,8 @@ using framework::Variable;
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
const bool use_optimize, const bool use_optimize, const bool loddable)
const bool loddable) : program_(p), use_optimize_(use_optimize), loddable_(loddable) {
: program_(p), use_optimize_(use_optimize), loddable_(loddable) {
Variable *variable_ptr = program_.scope->Var("batch_size"); Variable *variable_ptr = program_.scope->Var("batch_size");
variable_ptr->SetValue<int>(1); variable_ptr->SetValue<int>(1);
to_predict_program_ = to_predict_program_ =
...@@ -77,20 +76,20 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, ...@@ -77,20 +76,20 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
} }
} }
template<typename Dtype> template <typename Dtype>
void LoadMemInternal(void **data, framework::LoDTensor *tensor) { void LoadMemInternal(void **data, framework::LoDTensor *tensor) {
char **data_buf = reinterpret_cast<char **>(data); char **data_buf = reinterpret_cast<char **>(data);
int64_t size = tensor->numel(); int64_t size = tensor->numel();
Dtype* tensor_data = tensor->mutable_data<Dtype>(); Dtype *tensor_data = tensor->mutable_data<Dtype>();
if (0) { if (0) {
// TODO should be moved into operator init function // TODO(hjchen2) should be moved into operator init function
float min_value; float min_value;
float max_value; float max_value;
memcpy(&min_value, data_buf, sizeof(float)); memcpy(&min_value, data_buf, sizeof(float));
memcpy(&max_value, data_buf + sizeof(float), sizeof(float)); memcpy(&max_value, data_buf + sizeof(float), sizeof(float));
data_buf += 2 * sizeof(float); data_buf += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0; const float factor = (max_value - min_value) / 255.0;
const uint8_t *uint8_data = reinterpret_cast<uint8_t*>(data_buf); const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(data_buf);
for (int k = 0; k < size; ++k) { for (int k = 0; k < size; ++k) {
tensor_data[k] = uint8_data[k] * factor + min_value; tensor_data[k] = uint8_data[k] * factor + min_value;
} }
...@@ -103,21 +102,20 @@ void LoadMemInternal(void **data, framework::LoDTensor *tensor) { ...@@ -103,21 +102,20 @@ void LoadMemInternal(void **data, framework::LoDTensor *tensor) {
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::LoadMemory( void Executor<Dtype, P>::LoadMemory(
void **data, void **data, const std::shared_ptr<framework::VarDesc> var_desc,
const std::shared_ptr<framework::VarDesc> var_desc, framework::LoDTensor *tensor) {
framework::LoDTensor *tensor) { char **data_buf = reinterpret_cast<char **>(data);
char **data_buf = reinterpret_cast<char**>(data);
// version // version
uint32_t version = *(reinterpret_cast<uint32_t*>(*data_buf)); uint32_t version = *(reinterpret_cast<uint32_t *>(*data_buf));
*data_buf += sizeof(uint32_t); *data_buf += sizeof(uint32_t);
// lod information // lod information
uint64_t lod_level = *(reinterpret_cast<uint64_t*>(*data_buf)); uint64_t lod_level = *(reinterpret_cast<uint64_t *>(*data_buf));
*data_buf += sizeof(uint64_t); *data_buf += sizeof(uint64_t);
auto *lod = tensor->mutable_lod(); auto *lod = tensor->mutable_lod();
lod->resize(lod_level); lod->resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) { for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *(reinterpret_cast<uint64_t*>(*data_buf)); uint64_t size = *(reinterpret_cast<uint64_t *>(*data_buf));
*data_buf += sizeof(uint64_t); *data_buf += sizeof(uint64_t);
std::vector<size_t> tmp_dim(size / sizeof(size_t)); std::vector<size_t> tmp_dim(size / sizeof(size_t));
memcpy(tmp_dim.data(), *data_buf, size); memcpy(tmp_dim.data(), *data_buf, size);
...@@ -125,10 +123,10 @@ void Executor<Dtype, P>::LoadMemory( ...@@ -125,10 +123,10 @@ void Executor<Dtype, P>::LoadMemory(
*data_buf += size; *data_buf += size;
} }
// tensor version // tensor version
uint32_t tensor_version = *(reinterpret_cast<uint32_t*>(*data_buf)); uint32_t tensor_version = *(reinterpret_cast<uint32_t *>(*data_buf));
*data_buf += sizeof(uint32_t); *data_buf += sizeof(uint32_t);
// tensor desc size // tensor desc size
int32_t tensor_desc_size = *(reinterpret_cast<int32_t*>(*data_buf)); int32_t tensor_desc_size = *(reinterpret_cast<int32_t *>(*data_buf));
*data_buf += sizeof(int32_t); *data_buf += sizeof(int32_t);
// skip tensor desc // skip tensor desc
*data_buf += tensor_desc_size; *data_buf += tensor_desc_size;
...@@ -138,13 +136,13 @@ void Executor<Dtype, P>::LoadMemory( ...@@ -138,13 +136,13 @@ void Executor<Dtype, P>::LoadMemory(
// parse tensor from stream // parse tensor from stream
switch (tensor_desc.DataType()) { switch (tensor_desc.DataType()) {
case framework::VARTYPE_TYPE_FP32: case framework::VARTYPE_TYPE_FP32:
LoadMemInternal<float>((void**)data_buf, tensor); LoadMemInternal<float>(reinterpret_cast<void **>(data_buf), tensor);
break; break;
case framework::VARTYPE_TYPE_INT8: case framework::VARTYPE_TYPE_INT8:
LoadMemInternal<int8_t>((void**)data_buf, tensor); LoadMemInternal<int8_t>(reinterpret_cast<void **>(data_buf), tensor);
break; break;
case framework::VARTYPE_TYPE_INT32: case framework::VARTYPE_TYPE_INT32:
LoadMemInternal<int>((void**)data_buf, tensor); LoadMemInternal<int>(reinterpret_cast<void **>(data_buf), tensor);
break; break;
default: default:
LOG(kLOG_ERROR) << "data type is not supported"; LOG(kLOG_ERROR) << "data type is not supported";
...@@ -164,8 +162,8 @@ void Executor<Dtype, P>::InitMemory() { ...@@ -164,8 +162,8 @@ void Executor<Dtype, P>::InitMemory() {
char *origin_data = char *origin_data =
ReadFileToBuff(program_.model_path + "/" + var_desc->Name()); ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
char *data = origin_data; char *data = origin_data;
LoadMemory((void**)&data, var_desc, tensor); LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
delete [] origin_data; delete[] origin_data;
} else { } else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
varInputMemory(var_desc, var, tensor); varInputMemory(var_desc, var, tensor);
...@@ -180,7 +178,8 @@ void Executor<Dtype, P>::InitCombineMemory() { ...@@ -180,7 +178,8 @@ void Executor<Dtype, P>::InitCombineMemory() {
char *origin_data = nullptr; char *origin_data = nullptr;
bool self_alloc = false; bool self_alloc = false;
if (program_.combined_params_buf && program_.combined_params_len) { if (program_.combined_params_buf && program_.combined_params_len) {
origin_data = (char *)program_.combined_params_buf; origin_data = reinterpret_cast<char *>(
const_cast<uint8_t *>(program_.combined_params_buf));
} else { } else {
self_alloc = true; self_alloc = true;
origin_data = ReadFileToBuff(program_.para_path); origin_data = ReadFileToBuff(program_.para_path);
...@@ -195,7 +194,7 @@ void Executor<Dtype, P>::InitCombineMemory() { ...@@ -195,7 +194,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
LoadMemory((void**)&data, var_desc, tensor); LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
} else { } else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
varInputMemory(var_desc, var, tensor); varInputMemory(var_desc, var, tensor);
...@@ -204,7 +203,7 @@ void Executor<Dtype, P>::InitCombineMemory() { ...@@ -204,7 +203,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
} }
} }
if (self_alloc) { if (self_alloc) {
delete [] origin_data; delete[] origin_data;
} }
LOG(kLOG_INFO) << "init combine memory finish"; LOG(kLOG_INFO) << "init combine memory finish";
} }
...@@ -231,9 +230,9 @@ bool Executor<Dtype, P>::varInputMemory( ...@@ -231,9 +230,9 @@ bool Executor<Dtype, P>::varInputMemory(
break; break;
} }
bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) || bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) ||
(type == framework::VARTYPE_TYPE_INT8) || (type == framework::VARTYPE_TYPE_INT8) ||
(type == framework::VARTYPE_TYPE_INT32) || (type == framework::VARTYPE_TYPE_INT32) ||
(type == framework::VARTYPE_TYPE_INT64); (type == framework::VARTYPE_TYPE_INT64);
PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type); PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type);
return is_mute_match; return is_mute_match;
} }
...@@ -402,12 +401,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t, ...@@ -402,12 +401,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
g_feed_value->GetMutable<framework::LoDTensor>(); g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims()); feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t); feed_tensor->ShareDataWith(t);
}; }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) { void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
InjectVariable(t, "feed"); InjectVariable(t, "feed");
}; }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) { std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
...@@ -423,14 +422,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) { ...@@ -423,14 +422,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>( auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope)); out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor)); return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
}; }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) { void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0); to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()]; auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? (int)ops.size() : end; end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(), PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong"); "start or end parameter is wrong");
...@@ -451,17 +450,17 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) { ...@@ -451,17 +450,17 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif #endif
} }
}; }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) { void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start); Predict_From_To(start);
}; }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) { void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end); Predict_From_To(0, end);
}; }
#endif #endif
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
......
...@@ -14,16 +14,16 @@ limitations under the License. */ ...@@ -14,16 +14,16 @@ limitations under the License. */
#pragma once #pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "common/types.h" #include "common/types.h"
#include "common/util.h" #include "common/util.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program.h" #include "framework/program/program.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace paddle_mobile { namespace paddle_mobile {
...@@ -36,8 +36,7 @@ class Executor { ...@@ -36,8 +36,7 @@ class Executor {
// @param use_optimize bool whether use operator fusion to speed up or not // @param use_optimize bool whether use operator fusion to speed up or not
// @param loddable bool // @param loddable bool
Executor(const framework::Program<Dtype> program, Executor(const framework::Program<Dtype> program,
const bool use_optimize = true, const bool use_optimize = true, const bool loddable = false);
const bool loddable = false);
// predict with tensor input // predict with tensor input
// @param t input tensor to do prediction // @param t input tensor to do prediction
...@@ -68,8 +67,8 @@ class Executor { ...@@ -68,8 +67,8 @@ class Executor {
framework::LoDTensor *tensor) const; framework::LoDTensor *tensor) const;
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void LoadMemory(void** data, void LoadMemory(void **data,
const std::shared_ptr<framework::VarDesc> var_desc, const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor); framework::LoDTensor *tensor);
framework::Program<Dtype> program_; framework::Program<Dtype> program_;
......
...@@ -30,4 +30,3 @@ namespace ops = paddle_mobile::operators; ...@@ -30,4 +30,3 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(dequantize, ops::DequantizeOp); REGISTER_OPERATOR_CPU(dequantize, ops::DequantizeOp);
#endif #endif
...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "feed_op.h" #include "operators/feed_op.h"
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
...@@ -26,4 +25,3 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp); ...@@ -26,4 +25,3 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp); REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif #endif
...@@ -44,7 +44,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -44,7 +44,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
} }
void RunImpl() const { void RunImpl() const {
auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX()); auto input = reinterpret_cast<Tensor *>(param_.InputX());
fpga::format_image(input); fpga::format_image(input);
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
Tensor *output = param_.Out(); Tensor *output = param_.Out();
...@@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
fpga::BypassArgs args; fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP32_TO_FP16; args.convert_type = fpga::DATA_FP32_TO_FP16;
args.layout_type = fpga::LAYOUT_NO_CONVERT; args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.address = (void *)input_ptr; args.image.address = input_ptr;
args.image.channels = input->dims()[1]; args.image.channels = input->dims()[1];
args.image.height = input->dims()[2]; args.image.height = input->dims()[2];
args.image.width = input->dims()[3]; args.image.width = input->dims()[3];
...@@ -78,4 +78,3 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -78,4 +78,3 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -12,10 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "fetch_op.h" #include "operators/fetch_op.h"
namespace paddle_mobile {
namespace operators {}
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
...@@ -27,4 +24,3 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); ...@@ -27,4 +24,3 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif #endif
...@@ -46,4 +46,3 @@ class FetchOp : public framework::OperatorBase<DeviceType> { ...@@ -46,4 +46,3 @@ class FetchOp : public framework::OperatorBase<DeviceType> {
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -23,16 +23,16 @@ limitations under the License. */ ...@@ -23,16 +23,16 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template<> template <>
bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) { bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) {
return true; return true;
} }
template<> template <>
void DequantizeKernel<CPU, float>::Compute( void DequantizeKernel<CPU, float>::Compute(
const DequantizeParam<CPU> &param) const { const DequantizeParam<CPU> &param) const {
const Tensor *input = param.input_; const Tensor *input = param.input_;
Tensor *output = param.out_; Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0]; float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_; float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>(); const int32_t *x = input->data<const int32_t>();
...@@ -70,7 +70,7 @@ void DequantizeKernel<CPU, float>::Compute( ...@@ -70,7 +70,7 @@ void DequantizeKernel<CPU, float>::Compute(
} }
} }
} // namespace paddle_mobile
} // namespace operators } // namespace operators
} // namespace paddle_mobile
#endif #endif
...@@ -28,14 +28,12 @@ float32_t vmaxvq_f32(float32x4_t r) { ...@@ -28,14 +28,12 @@ float32_t vmaxvq_f32(float32x4_t r) {
} }
#endif #endif
int32x4_t vrnd_towards_zero(float32x4_t r) { int32x4_t vrnd_towards_zero(float32x4_t r) { return vcvtq_s32_f32(r); }
return vcvtq_s32_f32(r);
}
int32x4_t vrnd_away_zero(float32x4_t r) { int32x4_t vrnd_away_zero(float32x4_t r) {
float32x4_t plus = vdupq_n_f32(0.5); float32x4_t plus = vdupq_n_f32(0.5);
float32x4_t minus = vdupq_n_f32(-0.5); float32x4_t minus = vdupq_n_f32(-0.5);
float32x4_t zero = vdupq_n_f32(0); float32x4_t zero = vdupq_n_f32(0);
uint32x4_t more_than_zero = vcgtq_f32(r, zero); uint32x4_t more_than_zero = vcgtq_f32(r, zero);
float32x4_t temp = vbslq_f32(more_than_zero, plus, minus); float32x4_t temp = vbslq_f32(more_than_zero, plus, minus);
temp = vaddq_f32(r, temp); temp = vaddq_f32(r, temp);
...@@ -62,7 +60,7 @@ int32x4_t vrnd_to_even(float32x4_t r) { ...@@ -62,7 +60,7 @@ int32x4_t vrnd_to_even(float32x4_t r) {
} }
} }
return ret; return ret;
#else #else
float32x4_t point5 = vdupq_n_f32(0.5); float32x4_t point5 = vdupq_n_f32(0.5);
int32x4_t one = vdupq_n_s32(1); int32x4_t one = vdupq_n_s32(1);
int32x4_t zero = vdupq_n_s32(0); int32x4_t zero = vdupq_n_s32(0);
...@@ -83,9 +81,9 @@ int32x4_t vrnd_to_even(float32x4_t r) { ...@@ -83,9 +81,9 @@ int32x4_t vrnd_to_even(float32x4_t r) {
mask = vaddq_u32(more_than_zero, mask); mask = vaddq_u32(more_than_zero, mask);
int32x4_t smask = vreinterpretq_s32_u32(mask); int32x4_t smask = vreinterpretq_s32_u32(mask);
smask = vsubq_s32(smask, one); smask = vsubq_s32(smask, one);
rnd = vaddq_s32(rnd, smask); rnd = vaddq_s32(rnd, smask);
return rnd; return rnd;
#endif #endif
} }
#endif #endif
...@@ -93,7 +91,7 @@ namespace paddle_mobile { ...@@ -93,7 +91,7 @@ namespace paddle_mobile {
namespace operators { namespace operators {
static float find_abs_max(const Tensor *input) { static float find_abs_max(const Tensor *input) {
float max_abs = float(0); float max_abs = 0.f;
const float *x = input->data<const float>(); const float *x = input->data<const float>();
size_t size = input->numel(); size_t size = input->numel();
#if defined(__ARM_NEON__) || defined(__ARM_NEON) #if defined(__ARM_NEON__) || defined(__ARM_NEON)
...@@ -130,8 +128,7 @@ static float find_abs_max(const Tensor *input) { ...@@ -130,8 +128,7 @@ static float find_abs_max(const Tensor *input) {
return max_abs; return max_abs;
} }
static void quantize_round_to_even(const Tensor *input, static void quantize_round_to_even(const Tensor *input, const float scale,
const float scale,
Tensor *output) { Tensor *output) {
const float *x = input->data<const float>(); const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>(); int8_t *y = output->data<int8_t>();
...@@ -183,9 +180,8 @@ static void quantize_round_to_even(const Tensor *input, ...@@ -183,9 +180,8 @@ static void quantize_round_to_even(const Tensor *input,
} }
} }
static void quantize_round_to_zero(const Tensor *input, static void quantize_round_to_zero(const Tensor *input, const float scale,
const float scale, Tensor *output) {
Tensor *output) {
const float *x = input->data<const float>(); const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>(); int8_t *y = output->data<int8_t>();
size_t size = input->numel(); size_t size = input->numel();
...@@ -225,9 +221,8 @@ static void quantize_round_to_zero(const Tensor *input, ...@@ -225,9 +221,8 @@ static void quantize_round_to_zero(const Tensor *input,
} }
} }
static void quantize_round_to_nearest(const Tensor *input, static void quantize_round_to_nearest(const Tensor *input, const float scale,
const float scale, Tensor *output) {
Tensor *output) {
const float *x = input->data<const float>(); const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>(); int8_t *y = output->data<int8_t>();
size_t size = input->numel(); size_t size = input->numel();
...@@ -267,15 +262,14 @@ static void quantize_round_to_nearest(const Tensor *input, ...@@ -267,15 +262,14 @@ static void quantize_round_to_nearest(const Tensor *input,
} }
} }
template<> template <>
bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) { bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
return true; return true;
} }
template<> template <>
void QuantizeKernel<CPU, float>::Compute( void QuantizeKernel<CPU, float>::Compute(
const QuantizeParam<CPU> &param) const { const QuantizeParam<CPU> &param) const {
// TODO
float max_abs = 0.f; float max_abs = 0.f;
const Tensor *input = param.input_; const Tensor *input = param.input_;
Tensor *output = param.out_; Tensor *output = param.out_;
...@@ -306,7 +300,7 @@ void QuantizeKernel<CPU, float>::Compute( ...@@ -306,7 +300,7 @@ void QuantizeKernel<CPU, float>::Compute(
} }
} }
} // namespace paddle_mobile
} // namespace operators } // namespace operators
} // namespace paddle_mobile
#endif #endif
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/quantize_op.h" #include "operators/quantize_op.h"
#include <vector>
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -32,4 +33,3 @@ namespace ops = paddle_mobile::operators; ...@@ -32,4 +33,3 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(quantize, ops::QuantizeOp); REGISTER_OPERATOR_CPU(quantize, ops::QuantizeOp);
#endif #endif
此差异已折叠。
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
TOTAL_ERRORS=0 TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h"); do for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v "protobuf-c.*"); do
cpplint $file; cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册