提交 07afb230 编写于 作者: xiebaiyuan's avatar xiebaiyuan

fssd run pass.

上级 eb8f2740
...@@ -59,6 +59,7 @@ template <typename Dtype> ...@@ -59,6 +59,7 @@ template <typename Dtype>
void OperatorBase<Dtype>::Run() const { void OperatorBase<Dtype>::Run() const {
RunImpl(); RunImpl();
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys(); vector<string> input_keys = GetInputKeys();
for (const auto key : input_keys) { for (const auto key : input_keys) {
Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_); Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_);
......
...@@ -73,6 +73,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -73,6 +73,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
#ifdef PADDLE_EXECUTOR_MULTITHREAD #ifdef PADDLE_EXECUTOR_MULTITHREAD
depManager.resize(blocks.size()); depManager.resize(blocks.size());
#endif #endif
DLOG << "executer in loaddable mode: " << loddable_;
for (int i = 0; i < blocks.size(); ++i) { for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i]; std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
...@@ -82,7 +83,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -82,7 +83,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
auto op_base = framework::OpRegistry<Dtype>::CreateOp( auto op_base = framework::OpRegistry<Dtype>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope); program_.scope);
DLOG << "executer in loaddable mode: " << loddable_;
// use pre_infershape to pre resize , but if u use an lod mode tensor u // use pre_infershape to pre resize , but if u use an lod mode tensor u
// need to resize in runtime // need to resize in runtime
if (!loddable_) { if (!loddable_) {
......
...@@ -19,22 +19,6 @@ limitations under the License. */ ...@@ -19,22 +19,6 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
static std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1;
for (int i = 0; i < in_dims.size(); ++i) {
if (i < axis) {
outer *= in_dims[i];
} else {
inner *= in_dims[i];
}
}
std::vector<int32_t> out_shape(2);
out_shape[0] = static_cast<int>(outer);
out_shape[1] = static_cast<int>(inner);
return out_shape;
}
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
void FlattenOp<DeviceType, T>::InferShape() const { void FlattenOp<DeviceType, T>::InferShape() const {
PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr, PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
......
...@@ -24,7 +24,21 @@ limitations under the License. */ ...@@ -24,7 +24,21 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
inline std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1;
for (int i = 0; i < in_dims.size(); ++i) {
if (i < axis) {
outer *= in_dims[i];
} else {
inner *= in_dims[i];
}
}
std::vector<int32_t> out_shape(2);
out_shape[0] = static_cast<int>(outer);
out_shape[1] = static_cast<int>(inner);
return out_shape;
}
using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
......
...@@ -23,69 +23,66 @@ namespace operators { ...@@ -23,69 +23,66 @@ namespace operators {
template <typename P> template <typename P>
void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) { void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) {
auto out_dims = param.Out()->dims(); auto out_dims = param.Out()->dims();
auto* input = param.InputX()->data<float>(); auto* input = param.InputX()->data<float>();
auto out_size_t = param.InputOutPutSize(); auto out_size_t = param.InputOutPutSize();
int out_h = param.OutH(); int out_h = param.OutH();
int out_w = param.OutW(); int out_w = param.OutW();
if (out_size_t != nullptr) { if (out_size_t != nullptr) {
auto out_size_data = out_size_t->data<int>(); auto out_size_data = out_size_t->data<int>();
out_h = out_size_data[0]; out_h = out_size_data[0];
out_w = out_size_data[1]; out_w = out_size_data[1];
} }
auto* output = param.Out()->mutable_data<float>( auto* output = param.Out()->mutable_data<float>(
{out_dims[0], out_dims[1], out_h, out_w}); {out_dims[0], out_dims[1], out_h, out_w});
auto batch_size = param.InputX()->dims()[0]; auto batch_size = param.InputX()->dims()[0];
auto channels = param.InputX()->dims()[1]; auto channels = param.InputX()->dims()[1];
auto in_h = param.InputX()->dims()[2]; auto in_h = param.InputX()->dims()[2];
auto in_w = param.InputX()->dims()[3]; auto in_w = param.InputX()->dims()[3];
auto in_hw = in_h * in_w; auto in_hw = in_h * in_w;
auto out_hw = out_h * out_w; auto out_hw = out_h * out_w;
auto in_chw = channels * in_hw; auto in_chw = channels * in_hw;
auto out_chw = channels * out_hw; auto out_chw = channels * out_hw;
float ratio_h = float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f; (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w = float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f; (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
if (in_h == out_h && in_w == out_w) { if (in_h == out_h && in_w == out_w) {
memcpy(output, input, param.InputX()->numel() * sizeof(float)); memcpy(output, input, param.InputX()->numel() * sizeof(float));
} else { } else {
for (int k = 0; k < batch_size; ++k) { // loop for batches for (int k = 0; k < batch_size; ++k) { // loop for batches
for (int i = 0; i < out_h; ++i) { // loop for images for (int i = 0; i < out_h; ++i) { // loop for images
int h = ratio_h * i; int h = ratio_h * i;
int hid = (h < in_h - 1) ? 1 : 0; int hid = (h < in_h - 1) ? 1 : 0;
float h1lambda = ratio_h * i - h; float h1lambda = ratio_h * i - h;
float h2lambda = 1.f - h1lambda; float h2lambda = 1.f - h1lambda;
for (int j = 0; j < out_w; ++j) { for (int j = 0; j < out_w; ++j) {
int w = ratio_w * j; int w = ratio_w * j;
int wid = (w < in_w - 1) ? 1 : 0; int wid = (w < in_w - 1) ? 1 : 0;
float w1lambda = ratio_w * j - w; float w1lambda = ratio_w * j - w;
float w2lambda = 1.f - w1lambda; float w2lambda = 1.f - w1lambda;
// calculate four position for bilinear interpolation // calculate four position for bilinear interpolation
const float* in_pos = &input[k * in_chw + h * in_w + w]; const float* in_pos = &input[k * in_chw + h * in_w + w];
float* out_pos = &output[k * out_chw + i * out_w + j]; float* out_pos = &output[k * out_chw + i * out_w + j];
for (int c = 0; c < channels; ++c) { // loop for channels for (int c = 0; c < channels; ++c) { // loop for channels
// bilinear interpolation // bilinear interpolation
out_pos[0] = static_cast<float>( out_pos[0] = static_cast<float>(
h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) + h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) +
h1lambda * (w2lambda * in_pos[hid * in_w] + h1lambda * (w2lambda * in_pos[hid * in_w] +
w1lambda * in_pos[hid * in_w + wid])); w1lambda * in_pos[hid * in_w + wid]));
in_pos += in_hw; in_pos += in_hw;
out_pos += out_hw; out_pos += out_hw;
} }
}
}
} }
}
} }
}
} }
} // namespace operators } // namespace operators
......
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#ifdef FLATTEN_OP #ifdef FLATTEN_OP
#pragma once #pragma once
#include <operators/kernel/reshape_kernel.h>
#include <vector> #include <vector>
#include "operators/flatten_op.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -23,9 +25,18 @@ namespace operators { ...@@ -23,9 +25,18 @@ namespace operators {
template <typename P> template <typename P>
void FlattenCompute(const FlattenParam<CPU> &param) { void FlattenCompute(const FlattenParam<CPU> &param) {
param.Out()->mutable_data<float>(); const auto *input_x = param.InputX();
framework::TensorCopy(*param.InputX(), param.Out()); const auto axis = param.Axis();
param.Out()->Resize(param.Out()->dims()); const auto &input_x_dims = input_x->dims();
auto *out = param.Out();
const auto &out_shape_v = GetOutputShape(axis, input_x_dims);
const framework::DDim &out_dim = ValidateShape(out_shape_v, input_x_dims);
out->Resize(out_dim);
out->mutable_data<float>();
framework::TensorCopy(*input_x, out);
out->Resize(out_dim);
} }
} // namespace operators } // namespace operators
......
...@@ -23,7 +23,7 @@ namespace operators { ...@@ -23,7 +23,7 @@ namespace operators {
template <typename P> template <typename P>
void ShapeCompute(const ShapeParam<CPU>& param) { void ShapeCompute(const ShapeParam<CPU>& param) {
auto* in_t = param.InputX(); auto* in_t = param.Input();
auto* out_t = param.Out(); auto* out_t = param.Out();
auto out_data = out_t->mutable_data<int32_t>(); auto out_data = out_t->mutable_data<int32_t>();
auto in_dims = in_t->dims(); auto in_dims = in_t->dims();
......
...@@ -2366,7 +2366,7 @@ class ShapeParam : public OpParam { ...@@ -2366,7 +2366,7 @@ class ShapeParam : public OpParam {
input_ = InputFrom<GType>(inputs, scope); input_ = InputFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope); out_ = OutFrom<GType>(outputs, scope);
} }
const RType *InputX() const { return input_; } const RType *Input() const { return input_; }
RType *Out() const { return out_; } RType *Out() const { return out_; }
private: private:
......
...@@ -20,11 +20,11 @@ namespace paddle_mobile { ...@@ -20,11 +20,11 @@ namespace paddle_mobile {
namespace operators { namespace operators {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
void ShapeOp<DeviceType, T>::InferShape() const { void ShapeOp<DeviceType, T>::InferShape() const {
PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr, PADDLE_MOBILE_ENFORCE(this->param_.Input() != nullptr,
"Input (Input) of get_shape op should not be null."); "Input (Input) of get_shape op should not be null.");
PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr, PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
"Output (Out) of get_shape op should not be null."); "Output (Out) of get_shape op should not be null.");
this->param_.Out()->Resize(this->param_.InputX()->dims()); this->param_.Out()->Resize({this->param_.Input()->dims().size()});
} }
} // namespace operators } // namespace operators
......
...@@ -64,7 +64,7 @@ void SplitOp<DeviceType, T>::InferShape() const { ...@@ -64,7 +64,7 @@ void SplitOp<DeviceType, T>::InferShape() const {
PADDLE_MOBILE_ENFORCE(outs_dims.size() == outs.size(), PADDLE_MOBILE_ENFORCE(outs_dims.size() == outs.size(),
"length==dims.size() must be true!"); "length==dims.size() must be true!");
for (int j = 0; j < outs_dims.size(); ++j) { for (int j = 0; j < outs_dims.size(); ++j) {
outs[j]->Resize(outs_dims[j]); outs[j]->Resize(outs_dims[j]);
} }
// todo lod impl // todo lod impl
......
...@@ -23,7 +23,7 @@ int main() { ...@@ -23,7 +23,7 @@ int main() {
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model", if (paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model",
std::string(g_fluid_fssd_new) + "/params", false)) { std::string(g_fluid_fssd_new) + "/params", true)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册