提交 1fbe4e79 编写于 作者: H hjchen2

Merge branch 'dev-latest' of https://github.com/hjchen2/paddle-mobile into dev-latest

......@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "io/executor.h"
#include <operators/math/gemm.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "common/enforce.h"
#include "common/log.h"
......@@ -26,7 +26,7 @@ limitations under the License. */
#include "framework/program/var_desc.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "operators/math/gemm.h"
namespace paddle_mobile {
......@@ -34,9 +34,8 @@ using framework::Variable;
template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
const bool use_optimize,
const bool loddable)
: program_(p), use_optimize_(use_optimize), loddable_(loddable) {
const bool use_optimize, const bool loddable)
: program_(p), use_optimize_(use_optimize), loddable_(loddable) {
Variable *variable_ptr = program_.scope->Var("batch_size");
variable_ptr->SetValue<int>(1);
to_predict_program_ =
......@@ -77,20 +76,20 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p,
}
}
template<typename Dtype>
template <typename Dtype>
void LoadMemInternal(void **data, framework::LoDTensor *tensor) {
char **data_buf = reinterpret_cast<char **>(data);
int64_t size = tensor->numel();
Dtype* tensor_data = tensor->mutable_data<Dtype>();
Dtype *tensor_data = tensor->mutable_data<Dtype>();
if (0) {
// TODO should be moved into operator init function
// TODO(hjchen2) should be moved into operator init function
float min_value;
float max_value;
memcpy(&min_value, data_buf, sizeof(float));
memcpy(&max_value, data_buf + sizeof(float), sizeof(float));
data_buf += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
const uint8_t *uint8_data = reinterpret_cast<uint8_t*>(data_buf);
const uint8_t *uint8_data = reinterpret_cast<uint8_t *>(data_buf);
for (int k = 0; k < size; ++k) {
tensor_data[k] = uint8_data[k] * factor + min_value;
}
......@@ -103,21 +102,20 @@ void LoadMemInternal(void **data, framework::LoDTensor *tensor) {
template <typename Dtype, Precision P>
void Executor<Dtype, P>::LoadMemory(
void **data,
const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor) {
char **data_buf = reinterpret_cast<char**>(data);
void **data, const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor) {
char **data_buf = reinterpret_cast<char **>(data);
// version
uint32_t version = *(reinterpret_cast<uint32_t*>(*data_buf));
uint32_t version = *(reinterpret_cast<uint32_t *>(*data_buf));
*data_buf += sizeof(uint32_t);
// lod information
uint64_t lod_level = *(reinterpret_cast<uint64_t*>(*data_buf));
uint64_t lod_level = *(reinterpret_cast<uint64_t *>(*data_buf));
*data_buf += sizeof(uint64_t);
auto *lod = tensor->mutable_lod();
lod->resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *(reinterpret_cast<uint64_t*>(*data_buf));
uint64_t size = *(reinterpret_cast<uint64_t *>(*data_buf));
*data_buf += sizeof(uint64_t);
std::vector<size_t> tmp_dim(size / sizeof(size_t));
memcpy(tmp_dim.data(), *data_buf, size);
......@@ -125,10 +123,10 @@ void Executor<Dtype, P>::LoadMemory(
*data_buf += size;
}
// tensor version
uint32_t tensor_version = *(reinterpret_cast<uint32_t*>(*data_buf));
uint32_t tensor_version = *(reinterpret_cast<uint32_t *>(*data_buf));
*data_buf += sizeof(uint32_t);
// tensor desc size
int32_t tensor_desc_size = *(reinterpret_cast<int32_t*>(*data_buf));
int32_t tensor_desc_size = *(reinterpret_cast<int32_t *>(*data_buf));
*data_buf += sizeof(int32_t);
// skip tensor desc
*data_buf += tensor_desc_size;
......@@ -138,13 +136,13 @@ void Executor<Dtype, P>::LoadMemory(
// parse tensor from stream
switch (tensor_desc.DataType()) {
case framework::VARTYPE_TYPE_FP32:
LoadMemInternal<float>((void**)data_buf, tensor);
LoadMemInternal<float>(reinterpret_cast<void **>(data_buf), tensor);
break;
case framework::VARTYPE_TYPE_INT8:
LoadMemInternal<int8_t>((void**)data_buf, tensor);
LoadMemInternal<int8_t>(reinterpret_cast<void **>(data_buf), tensor);
break;
case framework::VARTYPE_TYPE_INT32:
LoadMemInternal<int>((void**)data_buf, tensor);
LoadMemInternal<int>(reinterpret_cast<void **>(data_buf), tensor);
break;
default:
LOG(kLOG_ERROR) << "data type is not supported";
......@@ -164,8 +162,8 @@ void Executor<Dtype, P>::InitMemory() {
char *origin_data =
ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
char *data = origin_data;
LoadMemory((void**)&data, var_desc, tensor);
delete [] origin_data;
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
delete[] origin_data;
} else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
varInputMemory(var_desc, var, tensor);
......@@ -180,7 +178,8 @@ void Executor<Dtype, P>::InitCombineMemory() {
char *origin_data = nullptr;
bool self_alloc = false;
if (program_.combined_params_buf && program_.combined_params_len) {
origin_data = (char *)program_.combined_params_buf;
origin_data = reinterpret_cast<char *>(
const_cast<uint8_t *>(program_.combined_params_buf));
} else {
self_alloc = true;
origin_data = ReadFileToBuff(program_.para_path);
......@@ -195,7 +194,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
LoadMemory((void**)&data, var_desc, tensor);
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
} else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
varInputMemory(var_desc, var, tensor);
......@@ -204,7 +203,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
}
}
if (self_alloc) {
delete [] origin_data;
delete[] origin_data;
}
LOG(kLOG_INFO) << "init combine memory finish";
}
......@@ -231,9 +230,9 @@ bool Executor<Dtype, P>::varInputMemory(
break;
}
bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) ||
(type == framework::VARTYPE_TYPE_INT8) ||
(type == framework::VARTYPE_TYPE_INT32) ||
(type == framework::VARTYPE_TYPE_INT64);
(type == framework::VARTYPE_TYPE_INT8) ||
(type == framework::VARTYPE_TYPE_INT32) ||
(type == framework::VARTYPE_TYPE_INT64);
PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type);
return is_mute_match;
}
......@@ -402,12 +401,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
InjectVariable(t, "feed");
};
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
......@@ -423,14 +422,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? (int)ops.size() : end;
end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
......@@ -451,17 +450,17 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end);
};
}
#endif
template class Executor<CPU, Precision::FP32>;
......
......@@ -14,16 +14,16 @@ limitations under the License. */
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "common/types.h"
#include "common/util.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/program/program.h"
#include "framework/tensor.h"
#include <memory>
#include <string>
#include <vector>
#include <map>
namespace paddle_mobile {
......@@ -36,8 +36,7 @@ class Executor {
// @param use_optimize bool whether use operator fusion to speed up or not
// @param loddable bool
Executor(const framework::Program<Dtype> program,
const bool use_optimize = true,
const bool loddable = false);
const bool use_optimize = true, const bool loddable = false);
// predict with tensor input
// @param t input tensor to do prediction
......@@ -68,8 +67,8 @@ class Executor {
framework::LoDTensor *tensor) const;
void InitMemory();
void InitCombineMemory();
void LoadMemory(void** data,
const std::shared_ptr<framework::VarDesc> var_desc,
void LoadMemory(void **data,
const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor);
framework::Program<Dtype> program_;
......
......@@ -30,4 +30,3 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(dequantize, ops::DequantizeOp);
#endif
......@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "feed_op.h"
#include "operators/feed_op.h"
namespace ops = paddle_mobile::operators;
......@@ -26,4 +25,3 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif
......@@ -44,7 +44,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
}
void RunImpl() const {
auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
auto input = reinterpret_cast<Tensor *>(param_.InputX());
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param_.Out();
......@@ -53,7 +53,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP32_TO_FP16;
args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.address = (void *)input_ptr;
args.image.address = input_ptr;
args.image.channels = input->dims()[1];
args.image.height = input->dims()[2];
args.image.width = input->dims()[3];
......@@ -78,4 +78,3 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
} // namespace operators
} // namespace paddle_mobile
......@@ -12,10 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "fetch_op.h"
namespace paddle_mobile {
namespace operators {}
} // namespace paddle_mobile
#include "operators/fetch_op.h"
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
......@@ -27,4 +24,3 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif
......@@ -46,4 +46,3 @@ class FetchOp : public framework::OperatorBase<DeviceType> {
} // namespace operators
} // namespace paddle_mobile
......@@ -23,16 +23,16 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template<>
template <>
bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) {
return true;
}
template<>
template <>
void DequantizeKernel<CPU, float>::Compute(
const DequantizeParam<CPU> &param) const {
const Tensor *input = param.input_;
Tensor *output = param.out_;
Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0];
float weight_scale = param.weight_scale_;
const int32_t *x = input->data<const int32_t>();
......@@ -70,7 +70,7 @@ void DequantizeKernel<CPU, float>::Compute(
}
}
} // namespace paddle_mobile
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -28,14 +28,12 @@ float32_t vmaxvq_f32(float32x4_t r) {
}
#endif
int32x4_t vrnd_towards_zero(float32x4_t r) {
return vcvtq_s32_f32(r);
}
int32x4_t vrnd_towards_zero(float32x4_t r) { return vcvtq_s32_f32(r); }
int32x4_t vrnd_away_zero(float32x4_t r) {
float32x4_t plus = vdupq_n_f32(0.5);
float32x4_t plus = vdupq_n_f32(0.5);
float32x4_t minus = vdupq_n_f32(-0.5);
float32x4_t zero = vdupq_n_f32(0);
float32x4_t zero = vdupq_n_f32(0);
uint32x4_t more_than_zero = vcgtq_f32(r, zero);
float32x4_t temp = vbslq_f32(more_than_zero, plus, minus);
temp = vaddq_f32(r, temp);
......@@ -62,7 +60,7 @@ int32x4_t vrnd_to_even(float32x4_t r) {
}
}
return ret;
#else
#else
float32x4_t point5 = vdupq_n_f32(0.5);
int32x4_t one = vdupq_n_s32(1);
int32x4_t zero = vdupq_n_s32(0);
......@@ -83,9 +81,9 @@ int32x4_t vrnd_to_even(float32x4_t r) {
mask = vaddq_u32(more_than_zero, mask);
int32x4_t smask = vreinterpretq_s32_u32(mask);
smask = vsubq_s32(smask, one);
rnd = vaddq_s32(rnd, smask);
rnd = vaddq_s32(rnd, smask);
return rnd;
#endif
#endif
}
#endif
......@@ -93,7 +91,7 @@ namespace paddle_mobile {
namespace operators {
static float find_abs_max(const Tensor *input) {
float max_abs = float(0);
float max_abs = 0.f;
const float *x = input->data<const float>();
size_t size = input->numel();
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
......@@ -130,8 +128,7 @@ static float find_abs_max(const Tensor *input) {
return max_abs;
}
static void quantize_round_to_even(const Tensor *input,
const float scale,
static void quantize_round_to_even(const Tensor *input, const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
......@@ -183,9 +180,8 @@ static void quantize_round_to_even(const Tensor *input,
}
}
static void quantize_round_to_zero(const Tensor *input,
const float scale,
Tensor *output) {
static void quantize_round_to_zero(const Tensor *input, const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
size_t size = input->numel();
......@@ -225,9 +221,8 @@ static void quantize_round_to_zero(const Tensor *input,
}
}
static void quantize_round_to_nearest(const Tensor *input,
const float scale,
Tensor *output) {
static void quantize_round_to_nearest(const Tensor *input, const float scale,
Tensor *output) {
const float *x = input->data<const float>();
int8_t *y = output->data<int8_t>();
size_t size = input->numel();
......@@ -267,15 +262,14 @@ static void quantize_round_to_nearest(const Tensor *input,
}
}
template<>
template <>
bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
return true;
}
template<>
template <>
void QuantizeKernel<CPU, float>::Compute(
const QuantizeParam<CPU> &param) const {
// TODO
float max_abs = 0.f;
const Tensor *input = param.input_;
Tensor *output = param.out_;
......@@ -306,7 +300,7 @@ void QuantizeKernel<CPU, float>::Compute(
}
}
} // namespace paddle_mobile
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/quantize_op.h"
#include <vector>
namespace paddle_mobile {
namespace operators {
......@@ -32,4 +33,3 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(quantize, ops::QuantizeOp);
#endif
此差异已折叠。
......@@ -3,7 +3,7 @@
TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h"); do
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}' | grep -v ".pb.cpp" | grep -v ".pb.h" | grep -v "protobuf-c.*"); do
cpplint $file;
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册