提交 e25940ee 编写于 作者: L liuruilong

Merge branch 'superresoltion' of https://github.com/codeWorm2015/paddle-mobile into superresoltion

...@@ -238,7 +238,7 @@ void Executor<Device, T>::InitCombineMemory() { ...@@ -238,7 +238,7 @@ void Executor<Device, T>::InitCombineMemory() {
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::InitNoPersistableMemory( void Executor<Device, T>::InitNoPersistableMemory(
const LoDTensor &input_tensor) { const Tensor &input_tensor) {
for (const auto &block : program_desc_->Blocks()) { for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name()); auto var = program_.scope->Var(var_desc->Name());
...@@ -336,9 +336,9 @@ void Executor<Device, T>::SetInput(const Tensor &input, ...@@ -336,9 +336,9 @@ void Executor<Device, T>::SetInput(const Tensor &input,
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) { if (config_.load_when_predict) {
if (target_tensor->IsInitialized() && if (input_dim_last_ != input.dims()) {
target_tensor->dims() != input.dims()) { InitNoPersistableMemory(input);
InitNoPersistableMemory(*target_tensor); input_dim_last_ = input.dims();
} }
} }
...@@ -355,9 +355,9 @@ void Executor<Device, T>::SetInput(const LoDTensor &input, ...@@ -355,9 +355,9 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) { if (config_.load_when_predict) {
if (target_tensor->IsInitialized() && if (input_dim_last_ != input.dims()) {
target_tensor->dims() != input.dims()) {
InitNoPersistableMemory(*target_tensor); InitNoPersistableMemory(*target_tensor);
input_dim_last_ = input.dims();
} }
} }
......
...@@ -65,7 +65,7 @@ class Executor { ...@@ -65,7 +65,7 @@ class Executor {
LoDTensor *tensor) const; LoDTensor *tensor) const;
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void InitNoPersistableMemory(const LoDTensor &input_tensor); void InitNoPersistableMemory(const Tensor &input_tensor);
void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc, void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
LoDTensor *tensor); LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
......
...@@ -253,34 +253,29 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input, ...@@ -253,34 +253,29 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias, framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) { bool if_bias, bool if_relu) {
#if __ARM_NEON #if __ARM_NEON
const float *input_data = input->data<float>(); const float *bias_data = bias->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->mutable_data<float>();
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
}
const int h = static_cast<int>(input->dims()[2]);
const int w = static_cast<int>(input->dims()[3]);
// const int l = h;
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]); const int c = static_cast<int>(input->dims()[1]);
const int h = static_cast<int>(input->dims()[2]);
const int w = static_cast<int>(input->dims()[3]);
const int hxw = h * w; const int hxw = h * w;
float32x4_t vbias = vdupq_n_f32(0.0); // const int l = h;
// leftTop, rightTop, leftBottom, rightBottom // leftTop, rightTop, leftBottom, rightBottom
int lt = 0; const int lt = 0;
int rt = w - 1; const int rt = w - 1;
int lb = (h - 1) * w; const int lb = (h - 1) * w;
int rb = h * w - 1; const int rb = h * w - 1;
float32x4_t zero = vdupq_n_f32(0.0); float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
const float *filter_data_tmp = filter_data; #pragma omp parallel for
for (int j = 0; j < c; ++j) { for (int j = 0; j < c; ++j) {
const float *filter_data_tmp = filter->data<float>() + j * 9;
const float *input_data = input->data<float>() + j * hxw;
float *output_data = output->mutable_data<float>() + j * hxw;
float32x4_t vbias;
if (if_bias) { if (if_bias) {
vbias = vdupq_n_f32(bias_data[j]); vbias = vdupq_n_f32(bias_data[j]);
} }
...@@ -552,9 +547,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input, ...@@ -552,9 +547,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
} }
} }
} }
output_data += hxw;
input_data += hxw;
filter_data_tmp += 9;
} }
} }
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册