提交 f9d0c03d 编写于 作者: L liuruilong

Merge branch 'superresoltion' of https://github.com/codeWorm2015/paddle-mobile into superresoltion

......@@ -238,7 +238,7 @@ void Executor<Device, T>::InitCombineMemory() {
template <typename Device, typename T>
void Executor<Device, T>::InitNoPersistableMemory(
const LoDTensor &input_tensor) {
const Tensor &input_tensor) {
for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
......@@ -336,9 +336,9 @@ void Executor<Device, T>::SetInput(const Tensor &input,
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (target_tensor->IsInitialized() &&
target_tensor->dims() != input.dims()) {
InitNoPersistableMemory(*target_tensor);
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(input);
input_dim_last_ = input.dims();
}
}
......@@ -355,9 +355,9 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (target_tensor->IsInitialized() &&
target_tensor->dims() != input.dims()) {
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(*target_tensor);
input_dim_last_ = input.dims();
}
}
......
......@@ -65,7 +65,7 @@ class Executor {
LoDTensor *tensor) const;
void InitMemory();
void InitCombineMemory();
void InitNoPersistableMemory(const LoDTensor &input_tensor);
void InitNoPersistableMemory(const Tensor &input_tensor);
void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL
......
......@@ -253,34 +253,29 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
framework::Tensor *output, framework::Tensor *bias,
bool if_bias, bool if_relu) {
#if __ARM_NEON
const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->mutable_data<float>();
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
}
const int h = static_cast<int>(input->dims()[2]);
const int w = static_cast<int>(input->dims()[3]);
// const int l = h;
const float *bias_data = bias->data<float>();
const int batch_size = static_cast<int>(input->dims()[0]);
const int c = static_cast<int>(input->dims()[1]);
const int h = static_cast<int>(input->dims()[2]);
const int w = static_cast<int>(input->dims()[3]);
const int hxw = h * w;
float32x4_t vbias = vdupq_n_f32(0.0);
// const int l = h;
// leftTop, rightTop, leftBottom, rightBottom
int lt = 0;
int rt = w - 1;
int lb = (h - 1) * w;
int rb = h * w - 1;
const int lt = 0;
const int rt = w - 1;
const int lb = (h - 1) * w;
const int rb = h * w - 1;
float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) {
const float *filter_data_tmp = filter_data;
#pragma omp parallel for
for (int j = 0; j < c; ++j) {
const float *filter_data_tmp = filter->data<float>() + j * 9;
const float *input_data = input->data<float>() + j * hxw;
float *output_data = output->mutable_data<float>() + j * hxw;
float32x4_t vbias;
if (if_bias) {
vbias = vdupq_n_f32(bias_data[j]);
}
......@@ -552,9 +547,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
}
}
}
output_data += hxw;
input_data += hxw;
filter_data_tmp += 9;
}
}
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册