提交 7a3de8ab 编写于 作者: X xiebaiyuan 提交者: GitHub

Merge pull request #940 from xiebaiyuan/develop

add fssd ops impls #924
......@@ -59,6 +59,7 @@ template <typename Dtype>
void OperatorBase<Dtype>::Run() const {
RunImpl();
#ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys();
for (const auto key : input_keys) {
Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_);
......
......@@ -73,6 +73,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
#ifdef PADDLE_EXECUTOR_MULTITHREAD
depManager.resize(blocks.size());
#endif
DLOG << "executer in loaddable mode: " << loddable_;
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
......@@ -82,7 +83,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
auto op_base = framework::OpRegistry<Dtype>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope);
DLOG << "executer in loaddable mode: " << loddable_;
// use pre_infershape to pre resize , but if u use an lod mode tensor u
// need to resize in runtime
if (!loddable_) {
......@@ -176,6 +176,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
type_size = 8;
break;
case framework::VARTYPE_TYPE_INT32:
memory = tensor->mutable_data<int32_t>();
type_size = 4;
break;
case framework::VARTYPE_TYPE_INT64:
......@@ -308,6 +309,9 @@ bool Executor<Dtype, P>::varInputMemory(
}
case framework::VARTYPE_TYPE_INT32: {
tensor = var->template GetMutable<framework::LoDTensor>();
tensor->template mutable_data<int32_t>();
is_mute_match = true;
break;
}
......
......@@ -20,8 +20,25 @@ namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void BilinearOp<DeviceType, T>::InferShape() const {
// todo check
this->param_.Out()->Resize(this->param_.InputX()->dims());
PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
"Input(X) of BilinearInterOp should not be null.");
PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
"Output(Out) of BilinearInterOp should not be null.");
auto dim_x = this->param_.InputX()->dims(); // NCHW format
int out_h = this->param_.OutH();
int out_w = this->param_.OutW();
PADDLE_MOBILE_ENFORCE(dim_x.size() == 4, "X's dimension must be 4");
if (this->param_.InputOutPutSize() != nullptr) {
auto out_size_dim = this->param_.InputOutPutSize()->dims();
PADDLE_MOBILE_ENFORCE(out_size_dim.size() == 1,
"OutSize's dimension size must be 1");
PADDLE_MOBILE_ENFORCE(out_size_dim[0] == 2, "OutSize's dim[0] must be 2");
}
std::vector<int64_t> dim_out({dim_x[0], dim_x[1], out_h, out_w});
this->param_.Out()->Resize(framework::make_ddim(dim_out));
}
} // namespace operators
......
......@@ -18,10 +18,32 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void FlattenOp<DeviceType, T>::InferShape() const {
// todo check
this->param_.Out()->Resize(this->param_.InputX()->dims());
PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
"Input (X) of Flatten op should not be null.");
PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
"Output (Output) of Flatten op should not be null.");
auto &axis = this->param_.Axis();
PADDLE_MOBILE_ENFORCE(axis >= 0,
"The axis should be greater than or equal to 0.");
auto &in_dims = this->param_.InputX()->dims();
// const auto &in_dims = ctx->GetInputDim("X");
PADDLE_MOBILE_ENFORCE(
axis <= in_dims.size(),
"The axis should be less than or equal to input tensor's rank.");
const auto &out_dims = GetOutputShape(axis, in_dims);
this->param_.Out()->Resize(in_dims);
// todo supprot lodtensor
// if (in_dims[0] == out_dims[0]) {
// // Only pass LoD when the first dimension of output and Input(X)
// // are the same.
// ctx->ShareLoD("X", "Out");
// }
}
} // namespace operators
......
......@@ -24,7 +24,21 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
inline std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1;
for (int i = 0; i < in_dims.size(); ++i) {
if (i < axis) {
outer *= in_dims[i];
} else {
inner *= in_dims[i];
}
}
std::vector<int32_t> out_shape(2);
out_shape[0] = static_cast<int>(outer);
out_shape[1] = static_cast<int>(inner);
return out_shape;
}
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
......
......@@ -22,7 +22,68 @@ namespace paddle_mobile {
namespace operators {
template <typename P>
void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) {}
void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) {
auto out_dims = param.Out()->dims();
auto* input = param.InputX()->data<float>();
auto out_size_t = param.InputOutPutSize();
int out_h = param.OutH();
int out_w = param.OutW();
if (out_size_t != nullptr) {
auto out_size_data = out_size_t->data<int>();
out_h = out_size_data[0];
out_w = out_size_data[1];
}
auto* output = param.Out()->mutable_data<float>(
{out_dims[0], out_dims[1], out_h, out_w});
auto batch_size = param.InputX()->dims()[0];
auto channels = param.InputX()->dims()[1];
auto in_h = param.InputX()->dims()[2];
auto in_w = param.InputX()->dims()[3];
auto in_hw = in_h * in_w;
auto out_hw = out_h * out_w;
auto in_chw = channels * in_hw;
auto out_chw = channels * out_hw;
float ratio_h =
(out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
float ratio_w =
(out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
if (in_h == out_h && in_w == out_w) {
memcpy(output, input, param.InputX()->numel() * sizeof(float));
} else {
for (int k = 0; k < batch_size; ++k) { // loop for batches
for (int i = 0; i < out_h; ++i) { // loop for images
int h = ratio_h * i;
int hid = (h < in_h - 1) ? 1 : 0;
float h1lambda = ratio_h * i - h;
float h2lambda = 1.f - h1lambda;
for (int j = 0; j < out_w; ++j) {
int w = ratio_w * j;
int wid = (w < in_w - 1) ? 1 : 0;
float w1lambda = ratio_w * j - w;
float w2lambda = 1.f - w1lambda;
// calculate four position for bilinear interpolation
const float* in_pos = &input[k * in_chw + h * in_w + w];
float* out_pos = &output[k * out_chw + i * out_w + j];
for (int c = 0; c < channels; ++c) { // loop for channels
// bilinear interpolation
out_pos[0] = static_cast<float>(
h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) +
h1lambda * (w2lambda * in_pos[hid * in_w] +
w1lambda * in_pos[hid * in_w + wid]));
in_pos += in_hw;
out_pos += out_hw;
}
}
}
}
}
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -15,14 +15,29 @@ limitations under the License. */
#ifdef FLATTEN_OP
#pragma once
#include <operators/kernel/reshape_kernel.h>
#include <vector>
#include "operators/flatten_op.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void FlattenCompute(const FlattenParam<CPU>& param) {}
void FlattenCompute(const FlattenParam<CPU> &param) {
const auto *input_x = param.InputX();
const auto axis = param.Axis();
const auto &input_x_dims = input_x->dims();
auto *out = param.Out();
const auto &out_shape_v = GetOutputShape(axis, input_x_dims);
const framework::DDim &out_dim = ValidateShape(out_shape_v, input_x_dims);
out->Resize(out_dim);
out->mutable_data<float>();
framework::TensorCopy(*input_x, out);
out->Resize(out_dim);
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -22,7 +22,15 @@ namespace paddle_mobile {
namespace operators {
template <typename P>
void ShapeCompute(const ShapeParam<CPU>& param) {}
void ShapeCompute(const ShapeParam<CPU>& param) {
auto* in_t = param.Input();
auto* out_t = param.Out();
auto out_data = out_t->mutable_data<int32_t>();
auto in_dims = in_t->dims();
for (int i = 0; i < in_dims.size(); ++i) {
out_data[i] = static_cast<int32_t>(in_dims[i]);
}
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -21,8 +21,64 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
// Strided numel memory copy from src to dst by the specified axis
//
// For example, for a tensor dims [4, 20, 100], the strieded numel is
// [8000, 2000, 100]
//
// NOTE: The src and dst tensor should have the same elements
// except the specified axis.
template <typename T>
inline void StridedNumelCopyWithAxis(int64_t axis, T* dst,
const framework::DDim& dst_stride_numel,
const T* src,
const framework::DDim& src_stride_numel,
int64_t size) {
int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
int64_t src_after = src_stride_numel[axis];
int64_t dst_after = dst_stride_numel[axis];
PADDLE_MOBILE_ENFORCE(src_stride_numel.size() == dst_stride_numel.size(),
"src and dst tensor should have the same dims size.");
for (int64_t i = 0; i < axis; ++i) {
if (i < axis) {
PADDLE_MOBILE_ENFORCE(src_stride_numel[i] / src_stride_numel[axis] ==
dst_stride_numel[i] / dst_stride_numel[axis],
"src and dst should have the same elements "
"except the specified axis.");
} else if (i == axis) {
continue;
} else {
PADDLE_MOBILE_ENFORCE(src_stride_numel[i] == dst_stride_numel[i],
"src and dst should have the same elements "
"except the specified axis.");
}
}
for (int64_t i = 0; i < before; ++i) {
memory::Copy(dst + i * dst_after, src + i * src_after, sizeof(T) * size);
}
}
template <typename P>
void SplitCompute(const SplitParam<CPU>& param) {}
void SplitCompute(const SplitParam<CPU>& param) {
auto* in = param.InputX();
auto outs = param.Outs();
auto in_stride = framework::stride_numel(in->dims());
int64_t axis = param.Axis();
size_t input_offset = 0;
for (auto& out : outs) {
out->mutable_data<float>();
auto out_stride = framework::stride_numel(out->dims());
StridedNumelCopyWithAxis<float>(axis, out->data<float>(), out_stride,
in->data<float>() + input_offset, in_stride,
out_stride[axis]);
input_offset += out_stride[axis];
}
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -245,6 +245,12 @@ class OpParam {
return GetVarValue<T>("Out", outputs, scope);
}
template <typename T>
static vector<T *> OutMultiFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetMultiVarValue<T>("Out", outputs, scope);
}
template <typename T>
static T *OutputYFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<T>("Y", outputs, scope);
......@@ -2248,13 +2254,16 @@ class FlattenParam : public OpParam {
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
axis = GetAttr<int>("axis", attrs);
}
const RType *InputX() const { return input_x_; }
RType *Out() const { return out_; }
const int &Axis() const { return axis; }
private:
RType *input_x_;
RType *out_;
int axis;
};
#endif
......@@ -2268,14 +2277,29 @@ class SplitParam : public OpParam {
SplitParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
outs_ = OutMultiFrom<GType>(outputs, scope);
axis = GetAttr<int>("axis", attrs);
num = GetAttr<int>("num", attrs);
sections = GetAttr<std::vector<int>>("sections", attrs);
// for (int i = 0; i < outs_.size(); ++i) {
// out_ts_.push_back(*scope.FindVar(outs_[i])->GetMutable());
// }
}
const RType *InputX() const { return input_x_; }
RType *Out() const { return out_; }
std::vector<GType *> Outs() const { return outs_; }
int Axis() const { return axis; }
int Num() const { return num; }
std::vector<int> Sections() const { return sections; }
// std::vector<GType> OutTs() const { return out_ts_; }
private:
RType *input_x_;
RType *out_;
std::vector<GType *> outs_;
int axis;
int num;
std::vector<int> sections;
// std::vector<GType> out_ts_;
};
#endif
......@@ -2292,14 +2316,21 @@ class BilinearInterpParam : public OpParam {
input_x_ = InputXFrom<GType>(inputs, scope);
input_outsize_ = InputOutSizeFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
out_h_ = GetAttr<int>("out_h", attrs);
out_w_ = GetAttr<int>("out_w", attrs);
}
const RType *InputX() const { return input_x_; }
const RType *InputOutPutSize() const { return input_outsize_; }
RType *Out() const { return out_; }
int OutH() const { return out_h_; }
int OutW() const { return out_w_; }
private:
RType *input_x_;
RType *input_outsize_;
RType *out_;
int out_h_;
int out_w_;
};
#endif
......@@ -2315,7 +2346,7 @@ class ShapeParam : public OpParam {
input_ = InputFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
}
const RType *InputX() const { return input_; }
const RType *Input() const { return input_; }
RType *Out() const { return out_; }
private:
......
......@@ -20,7 +20,11 @@ namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void ShapeOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims());
PADDLE_MOBILE_ENFORCE(this->param_.Input() != nullptr,
"Input (Input) of get_shape op should not be null.");
PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
"Output (Out) of get_shape op should not be null.");
this->param_.Out()->Resize({this->param_.Input()->dims().size()});
}
} // namespace operators
......
......@@ -18,9 +18,62 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void SplitOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims());
PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
"Input(X) of SplitOp should not be null.");
// std::string str;
// str.size()
const auto &outs = this->param_.Outs();
PADDLE_MOBILE_ENFORCE(outs.size() >= 1UL,
"Outputs(Out) of SplitOp should not be empty.");
auto in_dims = this->param_.InputX()->dims();
size_t axis = static_cast<size_t>(this->param_.Axis());
size_t num = static_cast<size_t>(this->param_.Num());
const auto &sections = this->param_.Sections();
const size_t outs_number = outs.size();
std::vector<framework::DDim> outs_dims;
outs_dims.reserve(outs_number);
if (num > 0) {
int64_t in_axis_dim = in_dims[axis];
PADDLE_MOBILE_ENFORCE(in_axis_dim % num == 0,
"tensor split does not result"
" in an equal division");
size_t out_axis_dim = in_axis_dim / num;
for (size_t i = 0; i < outs_number; ++i) {
auto dim = in_dims;
dim[axis] = out_axis_dim;
outs_dims.push_back(dim);
}
} else if (sections.size() > 0) {
PADDLE_MOBILE_ENFORCE(sections.size() == outs_number,
"tensor split sections size"
"should be equal to output size.");
for (size_t i = 0; i < outs_number; ++i) {
auto dim = in_dims;
dim[axis] = sections[i];
outs_dims.push_back(dim);
}
}
PADDLE_MOBILE_ENFORCE(outs_dims.size() == outs.size(),
"length==dims.size() must be true!");
for (int j = 0; j < outs_dims.size(); ++j) {
outs[j]->Resize(outs_dims[j]);
}
// todo lod impl
// if (axis != 0) {
// // Only pass LoD when not spliting along the first dim.
// for (size_t i = 0; i < outs_number; ++i) {
// ctx->ShareLoD("X", "Out", 0, i);
// }
// }
}
} // namespace operators
......
......@@ -44,7 +44,6 @@ class SplitOp : public framework::OperatorWithKernel<
operators::SplitKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
};
} // namespace operators
} // namespace paddle_mobile
......
......@@ -23,7 +23,7 @@ int main() {
// ../../../test/models/mobilenet
auto time1 = time();
if (paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model",
std::string(g_fluid_fssd_new) + "/params", false)) {
std::string(g_fluid_fssd_new) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册