未验证 提交 cc501377 编写于 作者: H HappyAngel 提交者: GitHub

Merge pull request #145 from PaddlePaddle/develop

pull new code
......@@ -60,7 +60,7 @@ For demands of Apple's GPU Metal and web front end inference, please see `./meta
Paddle Lite has referenced the following open-source projects:
- [ARM compute library](http://agroup.baidu.com/paddle-infer/md/article/%28https://github.com/ARM-software/ComputeLibrary%29)
- [ARM compute library](https://github.com/ARM-software/ComputeLibrary%29)
- [Anakin](https://github.com/PaddlePaddle/Anakin). The optimizations under Anakin has been incorporated into Paddle Lite, and so there will not be any future updates of Anakin. As another high-performance inference project under PaddlePaddle, Anakin has been forward-looking and helpful to the making of Paddle Lite.
......
......@@ -48,7 +48,7 @@ metal, web的模块相对独立,会继续在 `./metal` 和 `./web` 目录下
## 致谢:
Paddle Lite 借鉴了以下开源项目:
- [ARM compute library]((https://github.com/ARM-software/ComputeLibrary))
- [ARM compute library](https://github.com/ARM-software/ComputeLibrary)
- [Anakin](https://github.com/PaddlePaddle/Anakin) ,Anakin对应底层的一些优化实现已被集成到Paddle Lite。Anakin作为PaddlePaddle组织下的一个高性能预测项目,极具前瞻性,对Paddle Lite有重要贡献。Anakin已和本项目实现整合。之后,Anakin不再升级。
## 交流与反馈
......
......@@ -177,6 +177,8 @@ namespace lite_api {
template <>
std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(
const CxxConfig &config) {
static std::mutex mutex_conf;
std::unique_lock<std::mutex> lck(mutex_conf);
auto x = std::make_shared<lite::CxxPaddleApiImpl>();
x->Init(config);
return x;
......
......@@ -33,19 +33,19 @@ class Variable {
template <typename T>
T* GetMutable() {
if (!blob_.is<T>()) blob_.set<T>();
if (!blob_.valid()) {
blob_.set<T>();
}
return blob_.get_mutable<T>();
}
template <typename T>
bool IsType() {
return blob_.type() == typeid(T).hash_code();
return blob_.is_type<T>();
}
private:
// variant<int, float, std::string, lite::Tensor> blob_;
variant<int, float, std::string, lite::Tensor, std::vector<lite::Tensor>>
blob_;
Any blob_;
};
} // namespace lite
......
......@@ -25,26 +25,6 @@ namespace lite {
namespace kernels {
namespace arm {
template <typename Dtype>
void trans_basic(const int count,
const Dtype* din,
const int* permute_order,
const int* old_steps,
const int* new_steps,
const int num_axes,
Dtype* dout) {
for (int i = 0; i < count; ++i) {
int old_idx = 0;
int idx = i;
for (int j = 0; j < num_axes; ++j) {
int order = permute_order[j];
old_idx += (idx / new_steps[j]) * old_steps[order];
idx %= new_steps[j];
}
dout[i] = din[old_idx];
}
}
template <typename Dtype>
void transpose_mat(const Dtype* din,
Dtype* dout,
......@@ -201,6 +181,61 @@ void TransposeCompute::PrepareForRun() {
_old_steps = get_stride(input->dims());
}
}
template <typename Dtype>
void TransposeCompute_(const std::vector<int>& axis,
const lite::Tensor* input,
lite::Tensor* output) {
// const Dtype *input_ptr = input->data<Dtype>();
const Dtype* input_ptr = input->data<float>();
Dtype* output_ptr = output->mutable_data<Dtype>();
// input and output's shape dimension must >= 2 && <= 6.
const DDim& in_dim = input->dims();
const DDim& out_dim = output->dims();
// precompute inverted output dim and strides
size_t rout_dim[6], strides[6];
int permute = axis.size(); // permute must >=2 && <= 6.
for (int i = 0; i < permute; ++i) {
int k = permute - 1 - i;
strides[k] = 1;
for (int j = axis[i] + 1; j < permute; ++j) {
strides[k] *= in_dim[j];
}
rout_dim[k] = out_dim[i];
}
// unroll the first 2 dimensions
int reamin_dim = 1;
for (int i = 2; i < out_dim.size(); ++i) {
reamin_dim *= out_dim[i];
}
#pragma omp parallel for collapse(2)
for (int batch = 0; batch < out_dim[0]; ++batch) {
for (int j = 0; j < out_dim[1]; ++j) {
size_t offset = batch * strides[permute - 1] + j * strides[permute - 2];
Dtype* out_ptr = output_ptr + (batch * out_dim[1] + j) * reamin_dim;
int indics[4] = {0, 0, 0, 0};
for (int k = 0; k < reamin_dim; ++k) {
out_ptr[k] = input_ptr[offset];
indics[0] += 1;
offset += strides[0];
for (int p = 0; p < permute - 3; ++p) {
if (indics[p] == rout_dim[p]) {
indics[p + 1] += 1;
indics[p] = 0;
offset += strides[p + 1];
offset -= rout_dim[p] * strides[p];
} else {
break;
}
}
}
}
}
}
// Transpose
void TransposeCompute::Run() {
auto& param = Param<operators::TransposeParam>();
......@@ -220,13 +255,7 @@ void TransposeCompute::Run() {
if (trans_mat) {
transpose_mat(din, dout, _trans_num, _trans_w, _trans_h);
} else {
trans_basic(output->numel(),
din,
param.axis.data(),
_old_steps.data(),
_new_steps.data(),
input->dims().size(),
dout);
TransposeCompute_<float>(axis, input, output);
}
}
......
......@@ -62,6 +62,9 @@ class Any {
template <typename T, typename... Args>
inline void construct(Args&&... args);
template <typename T>
inline bool is_type() const;
private:
template <typename T>
class TypeOnHeap;
......@@ -214,6 +217,14 @@ inline const std::type_info& Any::type() const {
}
}
template <typename T>
inline bool Any::is_type() const {
if ((type_ == nullptr) || (*(type_->ptype_info) != typeid(T))) {
return false;
}
return true;
}
template <typename T>
inline void Any::check_type() const {
CHECK_EQ((type_ == nullptr), false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册