提交 f1426f03 编写于 作者: L liuruilong

add log print module

---
Language: Cpp
BasedOnStyle: LLVM
Standard: Cpp11
IndentWidth: 4
NamespaceIndentation: All
...
......@@ -6,6 +6,7 @@ repos:
files: (src).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
- id: remove-tabs
files: (src).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
hooks:
......@@ -18,11 +19,21 @@ repos:
files: (src).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
- id: trailing-whitespace
files: (src).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
- repo: local
hooks:
- id: clang-format-with-version-check
name: clang-format
description: Format files with ClangFormat.
entry: bash .clang_format.hook -i
entry: bash ./tools/pre-commit.hooks/.clang_format.hook -i
language: system
files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
#- repo: local
# hooks:
# - id: copyright_checker
# name: copyright_checker
# entry: python ./tools/pre-commit.hooks/.copyright.hook
# language: system
# files: (src).*\.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
# exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
......@@ -46,7 +46,8 @@ target_link_libraries(paddle-mobile-static protobuf-lite openblas)
add_dependencies(paddle-mobile openblas_proj)
# gen test
ADD_EXECUTABLE(paddle-mobile-test test/main.cpp test/test_helper.h)
ADD_EXECUTABLE(paddle-mobile-test test/main.cpp test/test_helper.h
test/elementwise_add_op_test.h test/test_include.h)
target_link_libraries(paddle-mobile-test paddle-mobile)
# gen test log
......
# Paddle-Mobile
# Paddle-Mobile
![License MIT](https://img.shields.io/github/license/mashape/apistatus.svg) [![Build Status](https://travis-ci.org/PaddlePaddle/paddle-mobile.svg?branch=develop&longCache=true&style=flat-square)](https://travis-ci.org/PaddlePaddle/paddle-mobile)
This project is used to develop the next version deep learning freamwork for mobile device.
......
......@@ -23,30 +23,31 @@ SOFTWARE.
namespace paddle_mobile {
namespace framework {
template <typename Dtype> class OperatorBase;
class OpDesc;
class BlockDesc;
class InferShapeContext;
}
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
template <typename Dtype>
using OpCreator = std::function<framework::OperatorBase<Dtype> *(
const std::string & /*type*/, const VariableNameMap & /*inputs*/,
const VariableNameMap & /*outputs*/,
const framework::AttributeMap & /*attrs*/)>;
using GradOpMakerFN =
std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
const framework::OpDesc &,
const std::unordered_set<std::string> & /*no_grad_set*/,
std::unordered_map<std::string, std::string> * /*grad_to_var*/,
const std::vector<framework::BlockDesc *> &grad_block)>;
using InferVarTypeFN = std::function<void(const framework::OpDesc & /*op_desc*/,
framework::BlockDesc * /*block*/)>;
using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
namespace framework {
template <typename Dtype> class OperatorBase;
class OpDesc;
class BlockDesc;
class InferShapeContext;
}
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
template <typename Dtype>
using OpCreator = std::function<framework::OperatorBase<Dtype> *(
const std::string & /*type*/, const VariableNameMap & /*inputs*/,
const VariableNameMap & /*outputs*/,
const framework::AttributeMap & /*attrs*/)>;
using GradOpMakerFN =
std::function<std::vector<std::unique_ptr<framework::OpDesc>>(
const framework::OpDesc &,
const std::unordered_set<std::string> & /*no_grad_set*/,
std::unordered_map<std::string, std::string> * /*grad_to_var*/,
const std::vector<framework::BlockDesc *> &grad_block)>;
using InferVarTypeFN =
std::function<void(const framework::OpDesc & /*op_desc*/,
framework::BlockDesc * /*block*/)>;
using InferShapeFN = std::function<void(framework::InferShapeContext *)>;
};
......@@ -19,45 +19,45 @@ SOFTWARE.
#pragma once;
namespace paddle_mobile {
enum class Precision : int { FP32 = 0 };
enum class Precision : int { FP32 = 0 };
//! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
//! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 };
template <DeviceTypeEnum T> struct DeviceType {};
template <DeviceTypeEnum T> struct DeviceType {};
typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI;
//! data type
enum DataType {
PM_INVALID = -1,
PM_HALF = 0,
PM_FLOAT = 1,
PM_DOUBLE = 2,
PM_INT8 = 3,
PM_INT16 = 4,
PM_INT32 = 5,
PM_INT64 = 6,
PM_UINT8 = 7,
PM_UINT16 = 8,
PM_UINT32 = 9,
PM_STRING = 10,
PM_BOOL = 11,
PM_SHAPE = 12,
PM_TENSOR = 13
};
//!
enum PMStatus {
PMSuccess = 0xFF, /*!< No errors */
PMNotInitialized = 0x01, /*!< Data not initialized. */
PMInvalidValue = 0x02, /*!< Incorrect variable value. */
PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
PMUnKownError = 0x04, /*!< Unknown error. */
PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
PMOutOfMem = 0x06, /*!< OOM error*/
PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08 /*!< un-correct device. */
};
//! data type
enum DataType {
PM_INVALID = -1,
PM_HALF = 0,
PM_FLOAT = 1,
PM_DOUBLE = 2,
PM_INT8 = 3,
PM_INT16 = 4,
PM_INT32 = 5,
PM_INT64 = 6,
PM_UINT8 = 7,
PM_UINT16 = 8,
PM_UINT32 = 9,
PM_STRING = 10,
PM_BOOL = 11,
PM_SHAPE = 12,
PM_TENSOR = 13
};
//!
enum PMStatus {
PMSuccess = 0xFF, /*!< No errors */
PMNotInitialized = 0x01, /*!< Data not initialized. */
PMInvalidValue = 0x02, /*!< Incorrect variable value. */
PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
PMUnKownError = 0x04, /*!< Unknown error. */
PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
PMOutOfMem = 0x06, /*!< OOM error*/
PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08 /*!< un-correct device. */
};
}
......@@ -21,79 +21,79 @@ SOFTWARE.
#pragma once
namespace paddle_mobile {
template <int ID, typename Type> struct IDToType { typedef Type type_t; };
template <int ID, typename Type> struct IDToType { typedef Type type_t; };
template <typename F, typename... Ts> struct VariantHelper {
static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
? sizeof(F)
: VariantHelper<Ts...>::size;
template <typename F, typename... Ts> struct VariantHelper {
static const size_t size = sizeof(F) > VariantHelper<Ts...>::size
? sizeof(F)
: VariantHelper<Ts...>::size;
inline static void Destroy(size_t id, void *data) {
if (id == typeid(F).hash_code()) {
reinterpret_cast<F *>(data)->~F();
} else {
VariantHelper<Ts...>::Destroy(id, data);
}
}
};
inline static void Destroy(size_t id, void *data) {
if (id == typeid(F).hash_code()) {
reinterpret_cast<F *>(data)->~F();
} else {
VariantHelper<Ts...>::Destroy(id, data);
}
}
};
template <typename F> struct VariantHelper<F> {
static const size_t size = sizeof(F);
inline static void Destroy(size_t id, void *data) {
if (id == typeid(F).hash_code()) {
// reinterpret_cast<F*>(data)->~F();
} else {
// std::cout << "未匹配到 " << std::endl;
}
}
};
template <typename F> struct VariantHelper<F> {
static const size_t size = sizeof(F);
inline static void Destroy(size_t id, void *data) {
if (id == typeid(F).hash_code()) {
// reinterpret_cast<F*>(data)->~F();
} else {
// std::cout << "未匹配到 " << std::endl;
}
}
};
template <size_t size> class RawData {
public:
char data[size];
RawData() {}
RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
// void operator=(const RawData &raw_data){
// strcpy(data, raw_data.data);
// }
};
template <size_t size> class RawData {
public:
char data[size];
RawData() {}
RawData(const RawData &raw_data) { strcpy(data, raw_data.data); }
// void operator=(const RawData &raw_data){
// strcpy(data, raw_data.data);
// }
};
template <typename... Ts> struct Variant {
Variant(const Variant &variant) {
// std::cout << " 赋值构造函数 " << std::endl;
type_id = variant.type_id;
data = variant.data;
}
template <typename... Ts> struct Variant {
Variant(const Variant &variant) {
// std::cout << " 赋值构造函数 " << std::endl;
type_id = variant.type_id;
data = variant.data;
}
Variant() : type_id(invalid_type()) {}
~Variant() {
// helper::Destroy(type_id, &data);
}
Variant() : type_id(invalid_type()) {}
~Variant() {
// helper::Destroy(type_id, &data);
}
template <typename T, typename... Args> void Set(Args &&... args) {
helper::Destroy(type_id, &data);
new (&data) T(std::forward<Args>(args)...);
type_id = typeid(T).hash_code();
}
template <typename T, typename... Args> void Set(Args &&... args) {
helper::Destroy(type_id, &data);
new (&data) T(std::forward<Args>(args)...);
type_id = typeid(T).hash_code();
}
template <typename T> T &Get() const {
if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data));
} else {
// std::cout << " bad cast in variant " << std::endl;
throw std::bad_cast();
}
}
template <typename T> T &Get() const {
if (type_id == typeid(T).hash_code()) {
return *const_cast<T *>(reinterpret_cast<const T *>(&data));
} else {
// std::cout << " bad cast in variant " << std::endl;
throw std::bad_cast();
}
}
size_t TypeId() const { return type_id; }
size_t TypeId() const { return type_id; }
private:
static inline size_t invalid_type() { return typeid(void).hash_code(); }
typedef VariantHelper<Ts...> helper;
size_t type_id;
RawData<helper::size> data;
};
private:
static inline size_t invalid_type() { return typeid(void).hash_code(); }
typedef VariantHelper<Ts...> helper;
size_t type_id;
RawData<helper::size> data;
};
template <typename T> struct Vistor { typedef T type_t; };
template <typename T> struct Vistor { typedef T type_t; };
} // namespace paddle_mobile
......@@ -19,5 +19,5 @@ SOFTWARE.
#include "attribute.h"
namespace paddle_mobile {
namespace framework {}
namespace framework {}
} // namespace paddle_mobile
......@@ -22,107 +22,110 @@ SOFTWARE.
#include "framework.pb.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class BlockDesc;
class BlockDesc;
class Attribute {
public:
static Attribute GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
// std::cout << "begin get attr value" << std::endl;
Attribute attr;
switch (attr_desc.type()) {
case proto::AttrType::BOOLEAN: {
attr.Set<bool>(attr_desc.b());
break;
}
case proto::AttrType::INT: {
attr.Set<int>(attr_desc.i());
break;
}
case proto::AttrType::FLOAT: {
attr.Set<float>(attr_desc.f());
break;
}
case proto::AttrType::STRING: {
attr.Set<std::string>(attr_desc.s());
break;
}
case proto::AttrType::BOOLEANS: {
std::vector<bool> val(attr_desc.bools_size());
for (int i = 0; i < attr_desc.bools_size(); ++i) {
val[i] = attr_desc.bools(i);
}
attr.Set<std::vector<bool>>(val);
break;
}
case proto::AttrType::INTS: {
std::vector<int> val(attr_desc.ints_size());
for (int i = 0; i < attr_desc.ints_size(); ++i) {
val[i] = attr_desc.ints(i);
}
attr.Set<std::vector<int>>(val);
break;
}
case proto::AttrType::FLOATS: {
std::vector<float> val(attr_desc.floats_size());
for (int i = 0; i < attr_desc.floats_size(); ++i) {
val[i] = attr_desc.floats(i);
}
attr.Set<std::vector<float>>(val);
break;
}
case proto::AttrType::STRINGS: {
std::vector<std::string> val(attr_desc.strings_size());
for (int i = 0; i < attr_desc.strings_size(); ++i) {
val[i] = attr_desc.strings(i);
}
attr.Set<std::vector<std::string>>(val);
break;
}
case proto::AttrType::LONG: {
attr.Set<int64_t>(attr_desc.l());
break;
}
default:
// std::cout << " not support " << std::endl;
break;
}
// std::cout << "end get attr value" << std::endl;
return attr;
}
class Attribute {
public:
static Attribute
GetAttrValue(const proto::OpDesc::Attr &attr_desc) {
// std::cout << "begin get attr value" << std::endl;
Attribute attr;
switch (attr_desc.type()) {
case proto::AttrType::BOOLEAN: {
attr.Set<bool>(attr_desc.b());
break;
}
case proto::AttrType::INT: {
attr.Set<int>(attr_desc.i());
break;
}
case proto::AttrType::FLOAT: {
attr.Set<float>(attr_desc.f());
break;
}
case proto::AttrType::STRING: {
attr.Set<std::string>(attr_desc.s());
break;
}
case proto::AttrType::BOOLEANS: {
std::vector<bool> val(attr_desc.bools_size());
for (int i = 0; i < attr_desc.bools_size(); ++i) {
val[i] = attr_desc.bools(i);
}
attr.Set<std::vector<bool>>(val);
break;
}
case proto::AttrType::INTS: {
std::vector<int> val(attr_desc.ints_size());
for (int i = 0; i < attr_desc.ints_size(); ++i) {
val[i] = attr_desc.ints(i);
}
attr.Set<std::vector<int>>(val);
break;
}
case proto::AttrType::FLOATS: {
std::vector<float> val(attr_desc.floats_size());
for (int i = 0; i < attr_desc.floats_size(); ++i) {
val[i] = attr_desc.floats(i);
}
attr.Set<std::vector<float>>(val);
break;
}
case proto::AttrType::STRINGS: {
std::vector<std::string> val(attr_desc.strings_size());
for (int i = 0; i < attr_desc.strings_size(); ++i) {
val[i] = attr_desc.strings(i);
}
attr.Set<std::vector<std::string>>(val);
break;
}
case proto::AttrType::LONG: {
attr.Set<int64_t>(attr_desc.l());
break;
}
default:
// std::cout << " not support " << std::endl;
break;
}
// std::cout << "end get attr value" << std::endl;
return attr;
}
Attribute() {}
template <typename T, typename... Args> Attribute &Set(Args &&... args) {
variant_.Set<T>(args...);
return *this;
}
Attribute() {}
template <typename T, typename... Args>
Attribute &Set(Args &&... args) {
variant_.Set<T>(args...);
return *this;
}
template <typename T> T &Get() const { return variant_.Get<T>(); }
template <typename T> T &Get() const { return variant_.Get<T>(); }
private:
Variant<int, float, std::string, std::vector<int>, std::vector<float>,
std::vector<std::string>, bool, std::vector<bool>, BlockDesc *,
int64_t>
variant_;
};
private:
Variant<int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc *, int64_t>
variant_;
};
using AttributeMap = std::unordered_map<std::string, Attribute>;
using AttributeMap = std::unordered_map<std::string, Attribute>;
class AttrReader {
public:
explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
class AttrReader {
public:
explicit AttrReader(const AttributeMap &attrs) : attrs_(attrs) {}
template <typename T> inline T Get(const std::string &name) const {
// PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in
// AttributeMap",
// name);
return ((Attribute)attrs_.at(name)).Get<T>();
}
template <typename T> inline T Get(const std::string &name) const {
// PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should
// be in
// AttributeMap",
// name);
return ((Attribute)attrs_.at(name)).Get<T>();
}
private:
const AttributeMap &attrs_;
};
private:
const AttributeMap &attrs_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -19,32 +19,32 @@ SOFTWARE.
#include "block_desc.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
std::vector<std::shared_ptr<VarDesc>> res;
for (const auto &p : vars_) {
res.push_back(p.second);
}
return res;
}
std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
std::vector<std::shared_ptr<VarDesc>> res;
for (const auto &p : vars_) {
res.push_back(p.second);
}
return res;
}
std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const {
std::vector<std::shared_ptr<OpDesc>> res;
for (const auto &op : ops_) {
res.push_back(op);
}
return res;
}
std::vector<std::shared_ptr<OpDesc>> BlockDesc::Ops() const {
std::vector<std::shared_ptr<OpDesc>> res;
for (const auto &op : ops_) {
res.push_back(op);
}
return res;
}
BlockDesc::BlockDesc(const proto::BlockDesc &desc) : desc_(desc) {
for (const proto::VarDesc &var_desc : desc_.vars()) {
vars_[var_desc.name()].reset(new VarDesc(var_desc));
}
for (const proto::OpDesc &op_desc : desc_.ops()) {
ops_.emplace_back(new framework::OpDesc(op_desc));
}
}
BlockDesc::BlockDesc(const proto::BlockDesc &desc) : desc_(desc) {
for (const proto::VarDesc &var_desc : desc_.vars()) {
vars_[var_desc.name()].reset(new VarDesc(var_desc));
}
for (const proto::OpDesc &op_desc : desc_.ops()) {
ops_.emplace_back(new framework::OpDesc(op_desc));
}
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -24,46 +24,50 @@ SOFTWARE.
#include "var_desc.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class BlockDesc : PaddleMobileObject {
public:
BlockDesc(const proto::BlockDesc &desc);
class BlockDesc : PaddleMobileObject {
public:
BlockDesc(const proto::BlockDesc &desc);
const int &ID() const { return desc_.idx(); }
const int &ID() const { return desc_.idx(); }
const int &Parent() const { return desc_.parent_idx(); }
const int &Parent() const { return desc_.parent_idx(); }
bool operator==(const paddle_mobile::framework::BlockDesc &in_block) const {
return this->ID() == in_block.ID() && this->Parent() == in_block.Parent();
}
bool operator==(
const paddle_mobile::framework::BlockDesc &in_block) const {
return this->ID() == in_block.ID() &&
this->Parent() == in_block.Parent();
}
bool operator<(const paddle_mobile::framework::BlockDesc &in_block) const {
return this->ID() < in_block.ID() && this->Parent() < in_block.Parent();
}
bool operator<(
const paddle_mobile::framework::BlockDesc &in_block) const {
return this->ID() < in_block.ID() &&
this->Parent() < in_block.Parent();
}
std::vector<std::shared_ptr<VarDesc>> Vars() const;
std::vector<std::shared_ptr<OpDesc>> Ops() const;
std::vector<std::shared_ptr<VarDesc>> Vars() const;
std::vector<std::shared_ptr<OpDesc>> Ops() const;
private:
proto::BlockDesc desc_;
std::vector<std::shared_ptr<OpDesc>> ops_;
std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
};
private:
proto::BlockDesc desc_;
std::vector<std::shared_ptr<OpDesc>> ops_;
std::unordered_map<std::string, std::shared_ptr<VarDesc>> vars_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
namespace std {
template <> struct hash<paddle_mobile::framework::BlockDesc> {
typedef paddle_mobile::framework::BlockDesc argument_type;
typedef std::size_t result_type;
result_type operator()(argument_type const &s) const noexcept {
result_type const h1(std::hash<int>{}(s.ID()));
result_type const h2(std::hash<int>{}(s.ID()));
return h1 ^ (h2 << 1);
}
};
template <> struct hash<paddle_mobile::framework::BlockDesc> {
typedef paddle_mobile::framework::BlockDesc argument_type;
typedef std::size_t result_type;
result_type operator()(argument_type const &s) const noexcept {
result_type const h1(std::hash<int>{}(s.ID()));
result_type const h2(std::hash<int>{}(s.ID()));
return h1 ^ (h2 << 1);
}
};
} // namespace std
......@@ -19,49 +19,50 @@ limitations under the License. */
#include <string>
namespace paddle_mobile {
namespace framework {
namespace framework {
enum class DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
};
enum class DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
};
inline DataLayout StringToDataLayout(const std::string &str) {
std::string s(str);
for (size_t i = 0; i < s.size(); ++i) {
s[i] = toupper(s[i]);
}
inline DataLayout StringToDataLayout(const std::string &str) {
std::string s(str);
for (size_t i = 0; i < s.size(); ++i) {
s[i] = toupper(s[i]);
}
if (s == "NHWC") {
return DataLayout::kNHWC;
} else if (s == "NCHW") {
return DataLayout::kNCHW;
} else if (s == "ANYLAYOUT") {
return DataLayout::kAnyLayout;
} else {
// std::cout << "Unknown storage order string: %s", s;
}
}
if (s == "NHWC") {
return DataLayout::kNHWC;
} else if (s == "NCHW") {
return DataLayout::kNCHW;
} else if (s == "ANYLAYOUT") {
return DataLayout::kAnyLayout;
} else {
// std::cout << "Unknown storage order string: %s", s;
}
}
inline std::string DataLayoutToString(const DataLayout &data_layout) {
switch (data_layout) {
case DataLayout::kNHWC:
return "NHWC";
case DataLayout::kNCHW:
return "NCHW";
case DataLayout::kAnyLayout:
return "ANY_LAYOUT";
default:
break;
// std::cout << "unknown DataLayou %d", data_layout;
}
}
inline std::string DataLayoutToString(const DataLayout &data_layout) {
switch (data_layout) {
case DataLayout::kNHWC:
return "NHWC";
case DataLayout::kNCHW:
return "NCHW";
case DataLayout::kAnyLayout:
return "ANY_LAYOUT";
default:
break;
// std::cout << "unknown DataLayou %d", data_layout;
}
}
inline std::ostream &operator<<(std::ostream &out, const DataLayout &l) {
out << DataLayoutToString(l);
return out;
}
inline std::ostream &operator<<(std::ostream &out,
const DataLayout &l) {
out << DataLayoutToString(l);
return out;
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -21,67 +21,72 @@ SOFTWARE.
#include "data_transform.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
static void PassTensorData(Tensor *from, Tensor *to) {
to->ShareDataWith(*from);
*from = Tensor();
}
static void PassTensorData(Tensor *from, Tensor *to) {
to->ShareDataWith(*from);
*from = Tensor();
}
void DataTransform(const OpKernelType &expected_kernel_type,
const OpKernelType &kernel_type_for_var,
const Tensor &input_tensor, Tensor *output_tensor) {
bool transformed = false;
Tensor in;
in.ShareDataWith(input_tensor);
Tensor out;
void DataTransform(const OpKernelType &expected_kernel_type,
const OpKernelType &kernel_type_for_var,
const Tensor &input_tensor, Tensor *output_tensor) {
bool transformed = false;
Tensor in;
in.ShareDataWith(input_tensor);
Tensor out;
// // do layout transform
// if (NeedTransformLayout(expected_kernel_type.data_layout_,
// kernel_type_for_var.data_layout_)) {
// TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do data type transform
// if (expected_kernel_type.data_type_ != kernel_type_for_var.data_type_) {
// TransDataType(kernel_type_for_var, expected_kernel_type, in, &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do device transform
// if (!platform::is_same_place(kernel_type_for_var.place_,
// expected_kernel_type.place_)) {
// TransDataDevice(in, expected_kernel_type.place_, &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// PADDLE_ENFORCE(transformed, "No transform is applied, please check!");
// get output data
output_tensor->ShareDataWith(in);
}
// // do layout transform
// if (NeedTransformLayout(expected_kernel_type.data_layout_,
// kernel_type_for_var.data_layout_)) {
// TransDataLayout(kernel_type_for_var, expected_kernel_type, in,
// &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do data type transform
// if (expected_kernel_type.data_type_ !=
// kernel_type_for_var.data_type_) {
// TransDataType(kernel_type_for_var, expected_kernel_type, in,
// &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// // do device transform
// if (!platform::is_same_place(kernel_type_for_var.place_,
// expected_kernel_type.place_)) {
// TransDataDevice(in, expected_kernel_type.place_, &out);
// transformed = true;
// PassTensorData(&out, &in);
// }
//
// PADDLE_ENFORCE(transformed, "No transform is applied, please
// check!");
// get output data
output_tensor->ShareDataWith(in);
}
void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
Variable &out_var) {
// if (in_var.IsType<LoDTensor>()) {
// auto& in_lod_tensor = in_var.Get<LoDTensor>();
// auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
// tran_lod_tensor->set_lod(in_lod_tensor.lod());
// tran_lod_tensor->set_layout(in_lod_tensor.layout());
// tran_lod_tensor->ShareDataWith(tensor);
// } else if (in_var.IsType<SelectedRows>()) {
// auto& in_selected_rows = in_var.Get<SelectedRows>();
// auto* trans_selected_rows = out_var.GetMutable<SelectedRows>();
// trans_selected_rows->set_height(in_selected_rows.height());
// trans_selected_rows->set_rows(in_selected_rows.rows());
// trans_selected_rows->mutable_value()->ShareDataWith(tensor);
// } else {
// PADDLE_THROW("unknown var type");
// }
}
void CopyVariableWithTensor(const Variable &in_var,
const Tensor &tensor, Variable &out_var) {
// if (in_var.IsType<LoDTensor>()) {
// auto& in_lod_tensor = in_var.Get<LoDTensor>();
// auto* tran_lod_tensor = out_var.GetMutable<LoDTensor>();
// tran_lod_tensor->set_lod(in_lod_tensor.lod());
// tran_lod_tensor->set_layout(in_lod_tensor.layout());
// tran_lod_tensor->ShareDataWith(tensor);
// } else if (in_var.IsType<SelectedRows>()) {
// auto& in_selected_rows = in_var.Get<SelectedRows>();
// auto* trans_selected_rows =
// out_var.GetMutable<SelectedRows>();
// trans_selected_rows->set_height(in_selected_rows.height());
// trans_selected_rows->set_rows(in_selected_rows.rows());
// trans_selected_rows->mutable_value()->ShareDataWith(tensor);
// } else {
// PADDLE_THROW("unknown var type");
// }
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -28,14 +28,14 @@ SOFTWARE.
#include "variable.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
void DataTransform(const OpKernelType &expected_kernel_type,
const OpKernelType &kernel_type_for_var,
const Tensor &input_tensor, Tensor *out);
void DataTransform(const OpKernelType &expected_kernel_type,
const OpKernelType &kernel_type_for_var,
const Tensor &input_tensor, Tensor *out);
void CopyVariableWithTensor(const Variable &in_var, const Tensor &tensor,
Variable &out_var);
void CopyVariableWithTensor(const Variable &in_var,
const Tensor &tensor, Variable &out_var);
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -21,23 +21,23 @@ SOFTWARE.
#include "framework.pb.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
// inline proto::VarType::Type ToDataType(std::type_index type) {
// using namespace paddle_mobile::framework::proto;
// if (typeid(float).hash_code() == type.hash_code()) {
// return proto::VarType::FP32;
// } else if (typeid(double).hash_code() == type.hash_code()) {
// return proto::VarType::FP64;
// } else if (typeid(int).hash_code() == type.hash_code()) {
// return proto::VarType::INT32;
// } else if (typeid(int64_t).hash_code() == type.hash_code()) {
// return proto::VarType::INT64;
// } else if (typeid(bool).hash_code() == type.hash_code()) {
// return proto::VarType::BOOL;
// } else {
//// PADDLE_THROW("Not supported");
// }
// }
}
// inline proto::VarType::Type ToDataType(std::type_index type) {
// using namespace paddle_mobile::framework::proto;
// if (typeid(float).hash_code() == type.hash_code()) {
// return proto::VarType::FP32;
// } else if (typeid(double).hash_code() == type.hash_code()) {
// return proto::VarType::FP64;
// } else if (typeid(int).hash_code() == type.hash_code()) {
// return proto::VarType::INT32;
// } else if (typeid(int64_t).hash_code() == type.hash_code()) {
// return proto::VarType::INT64;
// } else if (typeid(bool).hash_code() == type.hash_code()) {
// return proto::VarType::BOOL;
// } else {
//// PADDLE_THROW("Not supported");
// }
// }
}
} // namespace paddle_mobile
......@@ -15,311 +15,320 @@ limitations under the License. */
#include "ddim.h"
namespace paddle_mobile {
namespace framework {
/// @cond HIDDEN
template <int i> Dim<i> make_dim(const int64_t *d) {
return Dim<i>(*d, make_dim<i - 1>(d + 1));
}
template <> Dim<0> make_dim<0>(const int64_t *d) { return Dim<0>(*d); }
void make_ddim(DDim &ddim, const int64_t *dims, int n) {
switch (n) {
case 0:
ddim = make_dim<0>(dims);
break;
case 1:
ddim = make_dim<1>(dims);
break;
case 2:
ddim = make_dim<2>(dims);
break;
case 3:
ddim = make_dim<3>(dims);
break;
case 4:
ddim = make_dim<4>(dims);
break;
case 5:
ddim = make_dim<5>(dims);
break;
case 6:
ddim = make_dim<6>(dims);
break;
case 7:
ddim = make_dim<7>(dims);
break;
case 8:
ddim = make_dim<8>(dims);
break;
case 9:
ddim = make_dim<9>(dims);
break;
default:
// std::cout << "Dynamic dimensions must have between [1, 9]
// dimensions.";
break;
}
}
/// @endcond
DDim make_ddim(std::initializer_list<int64_t> dims) {
DDim result(make_dim(0));
make_ddim(result, dims.begin(), dims.size());
return result;
}
DDim make_ddim(const std::vector<int64_t> &dims) {
DDim result(make_dim(0));
make_ddim(result, &dims[0], dims.size());
return result;
}
DDim make_ddim(const std::vector<int> &dims) {
std::vector<int64_t> res(dims.size());
std::transform(dims.begin(), dims.end(), res.begin(),
[](int d) { return static_cast<int64_t>(d); });
return make_ddim(res);
}
/// @cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
struct DynamicMutableIndexer : Vistor<int64_t &> {
public:
explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D> int64_t &operator()(Dim<D> &dim) const { return dim[idx_]; }
private:
int idx_;
};
struct DynamicConstIndexer : public Vistor<int64_t> {
public:
explicit DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D> int64_t operator()(const Dim<D> &dim) const {
return dim[idx_];
}
private:
int idx_;
};
/// @endcond
int64_t &DDim::operator[](int idx) {
return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
}
int64_t DDim::operator[](int idx) const {
return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
}
int DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const {
// if (var.which() != d.getVar().which()) {
// return false;
// } else {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
for (unsigned int i = 0; i < v1.size(); i++) {
if (v1[i] != v2[i]) {
return false;
}
}
return true;
// }
}
bool DDim::operator!=(DDim d) const { return !(*this == d); }
DDim DDim::operator+(DDim d) const {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
std::vector<int64_t> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] + v2[i]);
}
return make_ddim(v3);
}
DDim DDim::operator*(DDim d) const {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
std::vector<int64_t> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] * v2[i]);
}
return make_ddim(v3);
}
int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
/// @cond HIDDEN
struct VectorizeVisitor : Vistor<void> {
std::vector<int64_t> &vector;
explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}
template <typename T> void operator()(const T &t) {
vector.push_back(t.head);
this->operator()(t.tail);
}
void operator()(const Dim<0> &t) {}
};
/// @endcond
std::vector<int64_t> vectorize(const DDim &ddim) {
std::vector<int64_t> result;
VectorizeVisitor visitor(result);
DDim::ApplyVistor(visitor, ddim);
return result;
}
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std::vector<int> vectorize2int(const DDim &ddim) {
std::vector<int64_t> temp = vectorize(ddim);
std::vector<int> result(temp.begin(), temp.end());
return result;
}
struct ProductVisitor : Vistor<int64_t> {
template <int D> int64_t operator()(const Dim<D> &dim) {
return product(dim);
}
};
int64_t product(const DDim &ddim) {
ProductVisitor visitor;
return DDim::ApplyVistor(visitor, ddim);
}
struct SliceVectorizeVisitor : Vistor<void> {
std::vector<int64_t> &vector;
int begin;
int end;
SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
: vector(v), begin(b), end(e) {
// PADDLE_ENFORCE(begin < end,
// "Begin index must be less than end index in ddim
// slice.");
// PADDLE_ENFORCE(begin >= 0,
// "Begin index can't be less than zero in ddim slice.");
}
template <int S> void operator()(const Dim<S> &dim) {
if (begin == 0) {
vector.push_back(dim.head);
} else {
--begin;
}
--end;
if (end > 0) {
this->operator()(dim.tail);
}
}
void operator()(const Dim<0> &dim) {
// PADDLE_ENFORCE(end == 0, "End index in ddim slice is out of bound.");
}
};
DDim slice_ddim(const DDim &ddim, int begin, int end) {
std::vector<int64_t> vec;
vec.reserve(end - begin);
SliceVectorizeVisitor visitor(vec, begin, end);
// boost::apply_visitor(visitor, dim);
DDim::ApplyVistor(visitor, ddim);
// visitor(ddim.var.Get<Dim<4>>());
return make_ddim(vec);
}
/// \cond HIDDEN
struct ArityVisitor : Vistor<int> {
template <int D> int operator()(Dim<D>) const { return D; }
};
/// \endcond
int arity(const DDim &d) {
ArityVisitor arityVisitor = ArityVisitor();
return DDim::ApplyVistor(arityVisitor, d);
// return arityVisitor(d.var.Get<Dim<4>>());
// return boost::apply_visitor(ArityVisitor(), d); }
}
/// \cond HIDDEN
/// \endcond
struct OSVistor : Vistor<std::ostream &> {
OSVistor(std::ostream &os) : os_(os) {}
template <int D> std::ostream &operator()(Dim<D> dim) const {
return os_ << dim;
}
private:
std::ostream &os_;
};
std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
auto vistor = OSVistor(os);
DDim::ApplyVistor(vistor, ddim);
return os;
}
DDim::DDim(std::initializer_list<int64_t> init_list) {
*this = make_ddim(init_list);
}
DDim flatten_to_2d(const DDim &src, int num_col_dims) {
int rank = src.size();
return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
product(slice_ddim(src, num_col_dims, rank))});
}
DDim flatten_to_1d(const DDim &src) { return make_ddim({product(src)}); }
DDim stride(const DDim &ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = 1;
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i + 1];
}
return framework::make_ddim(strides);
}
DDim stride_numel(const framework::DDim &ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = ddim[ddim.size() - 1];
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i];
}
return framework::make_ddim(strides);
}
} // namespace framework
namespace framework {
/// @cond HIDDEN
template <int i> Dim<i> make_dim(const int64_t *d) {
return Dim<i>(*d, make_dim<i - 1>(d + 1));
}
template <> Dim<0> make_dim<0>(const int64_t *d) { return Dim<0>(*d); }
void make_ddim(DDim &ddim, const int64_t *dims, int n) {
switch (n) {
case 0:
ddim = make_dim<0>(dims);
break;
case 1:
ddim = make_dim<1>(dims);
break;
case 2:
ddim = make_dim<2>(dims);
break;
case 3:
ddim = make_dim<3>(dims);
break;
case 4:
ddim = make_dim<4>(dims);
break;
case 5:
ddim = make_dim<5>(dims);
break;
case 6:
ddim = make_dim<6>(dims);
break;
case 7:
ddim = make_dim<7>(dims);
break;
case 8:
ddim = make_dim<8>(dims);
break;
case 9:
ddim = make_dim<9>(dims);
break;
default:
// std::cout << "Dynamic dimensions must have between [1,
// 9]
// dimensions.";
break;
}
}
/// @endcond
DDim make_ddim(std::initializer_list<int64_t> dims) {
DDim result(make_dim(0));
make_ddim(result, dims.begin(), dims.size());
return result;
}
DDim make_ddim(const std::vector<int64_t> &dims) {
DDim result(make_dim(0));
make_ddim(result, &dims[0], dims.size());
return result;
}
DDim make_ddim(const std::vector<int> &dims) {
std::vector<int64_t> res(dims.size());
std::transform(dims.begin(), dims.end(), res.begin(),
[](int d) { return static_cast<int64_t>(d); });
return make_ddim(res);
}
/// @cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes
// errors
struct DynamicMutableIndexer : Vistor<int64_t &> {
public:
explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D> int64_t &operator()(Dim<D> &dim) const {
return dim[idx_];
}
private:
int idx_;
};
struct DynamicConstIndexer : public Vistor<int64_t> {
public:
explicit DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D> int64_t operator()(const Dim<D> &dim) const {
return dim[idx_];
}
private:
int idx_;
};
/// @endcond
int64_t &DDim::operator[](int idx) {
return DDim::ApplyVistor(DynamicMutableIndexer(idx), *this);
}
int64_t DDim::operator[](int idx) const {
return DDim::ApplyVistor(DynamicConstIndexer(idx), *this);
}
int DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const {
// if (var.which() != d.getVar().which()) {
// return false;
// } else {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
for (unsigned int i = 0; i < v1.size(); i++) {
if (v1[i] != v2[i]) {
return false;
}
}
return true;
// }
}
bool DDim::operator!=(DDim d) const { return !(*this == d); }
DDim DDim::operator+(DDim d) const {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
std::vector<int64_t> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] + v2[i]);
}
return make_ddim(v3);
}
DDim DDim::operator*(DDim d) const {
std::vector<int64_t> v1 = vectorize(*this);
std::vector<int64_t> v2 = vectorize(d);
std::vector<int64_t> v3;
assert(v1.size() == v2.size());
for (unsigned int i = 0; i < v1.size(); i++) {
v3.push_back(v1[i] * v2[i]);
}
return make_ddim(v3);
}
int64_t get(const DDim &ddim, int idx) { return ddim[idx]; }
void set(DDim &ddim, int idx, int value) { ddim[idx] = value; }
/// @cond HIDDEN
struct VectorizeVisitor : Vistor<void> {
std::vector<int64_t> &vector;
explicit VectorizeVisitor(std::vector<int64_t> &v) : vector(v) {}
template <typename T> void operator()(const T &t) {
vector.push_back(t.head);
this->operator()(t.tail);
}
void operator()(const Dim<0> &t) {}
};
/// @endcond
std::vector<int64_t> vectorize(const DDim &ddim) {
std::vector<int64_t> result;
VectorizeVisitor visitor(result);
DDim::ApplyVistor(visitor, ddim);
return result;
}
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std::vector<int> vectorize2int(const DDim &ddim) {
std::vector<int64_t> temp = vectorize(ddim);
std::vector<int> result(temp.begin(), temp.end());
return result;
}
struct ProductVisitor : Vistor<int64_t> {
template <int D> int64_t operator()(const Dim<D> &dim) {
return product(dim);
}
};
int64_t product(const DDim &ddim) {
ProductVisitor visitor;
return DDim::ApplyVistor(visitor, ddim);
}
struct SliceVectorizeVisitor : Vistor<void> {
std::vector<int64_t> &vector;
int begin;
int end;
SliceVectorizeVisitor(std::vector<int64_t> &v, int b, int e)
: vector(v), begin(b), end(e) {
// PADDLE_ENFORCE(begin < end,
// "Begin index must be less than end index in
// ddim
// slice.");
// PADDLE_ENFORCE(begin >= 0,
// "Begin index can't be less than zero in
// ddim slice.");
}
template <int S> void operator()(const Dim<S> &dim) {
if (begin == 0) {
vector.push_back(dim.head);
} else {
--begin;
}
--end;
if (end > 0) {
this->operator()(dim.tail);
}
}
void operator()(const Dim<0> &dim) {
// PADDLE_ENFORCE(end == 0, "End index in ddim slice is out
// of bound.");
}
};
DDim slice_ddim(const DDim &ddim, int begin, int end) {
std::vector<int64_t> vec;
vec.reserve(end - begin);
SliceVectorizeVisitor visitor(vec, begin, end);
// boost::apply_visitor(visitor, dim);
DDim::ApplyVistor(visitor, ddim);
// visitor(ddim.var.Get<Dim<4>>());
return make_ddim(vec);
}
/// \cond HIDDEN
struct ArityVisitor : Vistor<int> {
template <int D> int operator()(Dim<D>) const { return D; }
};
/// \endcond
int arity(const DDim &d) {
ArityVisitor arityVisitor = ArityVisitor();
return DDim::ApplyVistor(arityVisitor, d);
// return arityVisitor(d.var.Get<Dim<4>>());
// return boost::apply_visitor(ArityVisitor(), d); }
}
/// \cond HIDDEN
/// \endcond
struct OSVistor : Vistor<std::ostream &> {
OSVistor(std::ostream &os) : os_(os) {}
template <int D> std::ostream &operator()(Dim<D> dim) const {
return os_ << dim;
}
private:
std::ostream &os_;
};
std::ostream &operator<<(std::ostream &os, const DDim &ddim) {
auto vistor = OSVistor(os);
DDim::ApplyVistor(vistor, ddim);
return os;
}
DDim::DDim(std::initializer_list<int64_t> init_list) {
*this = make_ddim(init_list);
}
DDim flatten_to_2d(const DDim &src, int num_col_dims) {
int rank = src.size();
return make_ddim({product(slice_ddim(src, 0, num_col_dims)),
product(slice_ddim(src, num_col_dims, rank))});
}
DDim flatten_to_1d(const DDim &src) {
return make_ddim({product(src)});
}
DDim stride(const DDim &ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = 1;
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i + 1];
}
return framework::make_ddim(strides);
}
DDim stride_numel(const framework::DDim &ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = ddim[ddim.size() - 1];
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i];
}
return framework::make_ddim(strides);
}
} // namespace framework
} // namespace paddle_mobile
......@@ -22,140 +22,145 @@ limitations under the License. */
#include <vector>
namespace paddle_mobile {
namespace framework {
/**
* \brief A dynamically sized dimension.
*
* The number of dimensions must be between [1, 9].
*/
struct DDim {
typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>,
Dim<7>, Dim<8>, Dim<9>>
DDimVar;
DDimVar var;
template <typename Vistor>
static typename Vistor::type_t ApplyVistor(Vistor vistor, const DDim &d) {
if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
return vistor(d.var.Get<Dim<0>>());
} else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
return vistor(d.var.Get<Dim<1>>());
} else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
return vistor(d.var.Get<Dim<2>>());
} else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
return vistor(d.var.Get<Dim<3>>());
} else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
return vistor(d.var.Get<Dim<4>>());
} else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
return vistor(d.var.Get<Dim<5>>());
} else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
return vistor(d.var.Get<Dim<6>>());
} else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
return vistor(d.var.Get<Dim<7>>());
} else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
return vistor(d.var.Get<Dim<8>>());
} else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
return vistor(d.var.Get<Dim<9>>());
} else {
printf(" dim not support \n");
throw std::bad_exception();
// return typename Vistor::type_t();
}
}
DDim() { var.Set<Dim<1>>(Dim<1>()); }
template <int D> explicit DDim(const Dim<D> &in) { var.Set<Dim<D>>(in); }
/*implicit*/ DDim(std::initializer_list<int64_t> init_list);
template <int D> DDim &operator=(const Dim<D> &in) {
var.Set<Dim<D>>(in);
return *this;
}
int64_t &operator[](int idx);
int64_t operator[](int idx) const;
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor) {
// return var.apply_visitor(visitor);
// }
//
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor) const {
// return var.apply_visitor(visitor);
// }
DDimVar getVar() { return var; }
bool operator==(DDim d) const;
bool operator!=(DDim d) const;
DDim operator+(DDim d) const;
DDim operator*(DDim d) const;
int size() const;
};
/**
* \brief Make a DDim from std::vector<int64_t>
*
* \param dims An vector of ints. Must be sized between [1, 9]
*/
DDim make_ddim(const std::vector<int64_t> &dims);
DDim make_ddim(const std::vector<int> &dims);
/**
* \brief Make a DDim from an initializer list
*
* \param dims An initializer list of ints. Must be sized between [1, 9]
*
*/
DDim make_ddim(std::initializer_list<int64_t> dims);
int64_t get(const DDim &dim, int idx);
void set(DDim &dim, int idx, int val);
std::vector<int64_t> vectorize(const DDim &ddim);
std::vector<int> vectorize2int(const DDim &ddim);
int64_t product(const DDim &ddim);
/**
* \brief Slice a ddim
*
* Slice dim with [begin, end).
* e.g. DDim d = make_ddim({1,2,3,4,5});
* slice_ddim(d, 1, 3); ====> {2,3}
*/
DDim slice_ddim(const DDim &dim, int begin, int end);
/**
* \brief What is the length of this dimension?
*
* \param Dynamic dimension to inspect
*/
namespace framework {
/**
* \brief A dynamically sized dimension.
*
* The number of dimensions must be between [1, 9].
*/
struct DDim {
typedef Variant<Dim<0>, Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>,
Dim<6>, Dim<7>, Dim<8>, Dim<9>>
DDimVar;
DDimVar var;
template <typename Vistor>
static typename Vistor::type_t ApplyVistor(Vistor vistor,
const DDim &d) {
if (d.var.TypeId() == typeid(Dim<0>).hash_code()) {
return vistor(d.var.Get<Dim<0>>());
} else if (d.var.TypeId() == typeid(Dim<1>).hash_code()) {
return vistor(d.var.Get<Dim<1>>());
} else if (d.var.TypeId() == typeid(Dim<2>).hash_code()) {
return vistor(d.var.Get<Dim<2>>());
} else if (d.var.TypeId() == typeid(Dim<3>).hash_code()) {
return vistor(d.var.Get<Dim<3>>());
} else if (d.var.TypeId() == typeid(Dim<4>).hash_code()) {
return vistor(d.var.Get<Dim<4>>());
} else if (d.var.TypeId() == typeid(Dim<5>).hash_code()) {
return vistor(d.var.Get<Dim<5>>());
} else if (d.var.TypeId() == typeid(Dim<6>).hash_code()) {
return vistor(d.var.Get<Dim<6>>());
} else if (d.var.TypeId() == typeid(Dim<7>).hash_code()) {
return vistor(d.var.Get<Dim<7>>());
} else if (d.var.TypeId() == typeid(Dim<8>).hash_code()) {
return vistor(d.var.Get<Dim<8>>());
} else if (d.var.TypeId() == typeid(Dim<9>).hash_code()) {
return vistor(d.var.Get<Dim<9>>());
} else {
printf(" dim not support \n");
throw std::bad_exception();
// return typename Vistor::type_t();
}
}
DDim() { var.Set<Dim<1>>(Dim<1>()); }
template <int D> explicit DDim(const Dim<D> &in) {
var.Set<Dim<D>>(in);
}
/*implicit*/ DDim(std::initializer_list<int64_t> init_list);
template <int D> DDim &operator=(const Dim<D> &in) {
var.Set<Dim<D>>(in);
return *this;
}
int64_t &operator[](int idx);
int64_t operator[](int idx) const;
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor) {
// return var.apply_visitor(visitor);
// }
//
// template <typename Visitor>
// typename Visitor::result_type apply_visitor(Visitor& visitor)
// const {
// return var.apply_visitor(visitor);
// }
DDimVar getVar() { return var; }
bool operator==(DDim d) const;
bool operator!=(DDim d) const;
DDim operator+(DDim d) const;
DDim operator*(DDim d) const;
int size() const;
};
/**
* \brief Make a DDim from std::vector<int64_t>
*
* \param dims An vector of ints. Must be sized between [1, 9]
*/
DDim make_ddim(const std::vector<int64_t> &dims);
DDim make_ddim(const std::vector<int> &dims);
/**
* \brief Make a DDim from an initializer list
*
* \param dims An initializer list of ints. Must be sized between [1, 9]
*
*/
DDim make_ddim(std::initializer_list<int64_t> dims);
int64_t get(const DDim &dim, int idx);
void set(DDim &dim, int idx, int val);
std::vector<int64_t> vectorize(const DDim &ddim);
std::vector<int> vectorize2int(const DDim &ddim);
int64_t product(const DDim &ddim);
/**
* \brief Slice a ddim
*
* Slice dim with [begin, end).
* e.g. DDim d = make_ddim({1,2,3,4,5});
* slice_ddim(d, 1, 3); ====> {2,3}
*/
DDim slice_ddim(const DDim &dim, int begin, int end);
/**
* \brief What is the length of this dimension?
*
* \param Dynamic dimension to inspect
*/
int arity(const DDim &ddim);
int arity(const DDim &ddim);
std::ostream &operator<<(std::ostream &, const DDim &);
std::ostream &operator<<(std::ostream &, const DDim &);
// Reshape a tensor to a matrix. The matrix's first dimension(column length)
// will be the product of tensor's first `num_col_dims` dimensions.
DDim flatten_to_2d(const DDim &src, int num_col_dims);
// Reshape a tensor to a matrix. The matrix's first dimension(column
// length)
// will be the product of tensor's first `num_col_dims` dimensions.
DDim flatten_to_2d(const DDim &src, int num_col_dims);
DDim flatten_to_1d(const DDim &src);
DDim flatten_to_1d(const DDim &src);
DDim stride(const DDim &ddim);
DDim stride(const DDim &ddim);
DDim stride_numel(const DDim &ddim);
} // namespace framework
DDim stride_numel(const DDim &ddim);
} // namespace framework
} // namespace paddle_mobile
......@@ -21,388 +21,410 @@
#include "platform/hostdevice.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
// Statically sized, statically indexed dimension
template <int i> struct Dim {
static constexpr int dimensions = i;
// Statically sized, statically indexed dimension
template <int i> struct Dim {
static constexpr int dimensions = i;
template <typename... Args>
HOSTDEVICE Dim(int64_t _head, Args... _tail) : head(_head), tail(_tail...) {
static_assert(sizeof...(_tail) == i - 1,
"Dim initialized with the wrong number of parameters");
}
template <typename... Args>
HOSTDEVICE Dim(int64_t _head, Args... _tail)
: head(_head), tail(_tail...) {
static_assert(
sizeof...(_tail) == i - 1,
"Dim initialized with the wrong number of parameters");
}
HOSTDEVICE
Dim(int64_t _head, const Dim<i - 1> &_tail) : head(_head), tail(_tail) {}
HOSTDEVICE
Dim(int64_t _head, const Dim<i - 1> &_tail)
: head(_head), tail(_tail) {}
HOSTDEVICE
Dim() : head(0), tail() {}
HOSTDEVICE
Dim() : head(0), tail() {}
/** Construct a Dim from a linear index and size. Uses Fortran order
* indexing. */
HOSTDEVICE
Dim(int64_t idx, const Dim<i> &size)
: head(idx % size.head), tail(idx / size.head, size.tail) {}
/** Construct a Dim from a linear index and size. Uses Fortran
* order
* indexing. */
HOSTDEVICE
Dim(int64_t idx, const Dim<i> &size)
: head(idx % size.head), tail(idx / size.head, size.tail) {}
/** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim(int64_t idx) : head(idx), tail(idx) {}
/** Construct a Dim with each dimension set to the given index */
HOSTDEVICE
Dim(int64_t idx) : head(idx), tail(idx) {}
HOSTDEVICE
bool operator==(const Dim<i> &o) const {
return (head == o.head) && (tail == o.tail);
}
HOSTDEVICE
bool operator==(const Dim<i> &o) const {
return (head == o.head) && (tail == o.tail);
}
HOSTDEVICE
bool operator!=(const Dim<i> &o) const { return !(*this == o); }
HOSTDEVICE
bool operator!=(const Dim<i> &o) const { return !(*this == o); }
HOSTDEVICE
int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const;
HOSTDEVICE
int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const;
HOST std::string to_string() const;
HOST std::string to_string() const;
int64_t head;
Dim<i - 1> tail;
};
int64_t head;
Dim<i - 1> tail;
};
// Base case specialization
template <> struct Dim<0> {
static constexpr int dimensions = 0;
// Base case specialization
template <> struct Dim<0> {
static constexpr int dimensions = 0;
HOSTDEVICE
Dim(int64_t _head) {}
HOSTDEVICE
Dim(int64_t _head) {}
HOSTDEVICE
Dim() {}
HOSTDEVICE
Dim() {}
HOSTDEVICE
Dim(int idx, const Dim<0> &size) {
HOSTDEVICE
Dim(int idx, const Dim<0> &size) {
#ifndef __CUDA_ARCH__
if (idx > 0) {
throw std::invalid_argument("Index out of range.");
}
if (idx > 0) {
throw std::invalid_argument("Index out of range.");
}
#else
PADDLE_ASSERT(idx == 0);
PADDLE_ASSERT(idx == 0);
#endif
}
HOSTDEVICE
bool operator==(const Dim<0> &o) const { return true; }
HOSTDEVICE
bool operator!=(const Dim<0> &o) const { return false; }
HOSTDEVICE
int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const;
};
namespace {
// Helper for accessing Dim classes
template <int i> struct DimGetter {
// Return a copy if Dim is const
template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
return DimGetter<i - 1>::impl(d.tail);
}
// Return a reference if Dim is mutable
template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
return DimGetter<i - 1>::impl(d.tail);
}
};
// Eureka! We found the element!
template <> struct DimGetter<0> {
// Return a copy if Dim is const
template <typename D> HOSTDEVICE static int64_t impl(const D &d) {
return d.head;
}
// Return a reference if Dim is mutable
template <typename D> HOSTDEVICE static int64_t &impl(D &d) { return d.head; }
};
template <int D> HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
}
HOSTDEVICE
bool operator==(const Dim<0> &o) const { return true; }
HOSTDEVICE
bool operator!=(const Dim<0> &o) const { return false; }
HOSTDEVICE
int64_t &operator[](int idx);
HOSTDEVICE
int64_t operator[](int idx) const;
};
namespace {
// Helper for accessing Dim classes
template <int i> struct DimGetter {
// Return a copy if Dim is const
template <typename D>
HOSTDEVICE static int64_t impl(const D &d) {
return DimGetter<i - 1>::impl(d.tail);
}
// Return a reference if Dim is mutable
template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
return DimGetter<i - 1>::impl(d.tail);
}
};
// Eureka! We found the element!
template <> struct DimGetter<0> {
// Return a copy if Dim is const
template <typename D>
HOSTDEVICE static int64_t impl(const D &d) {
return d.head;
}
// Return a reference if Dim is mutable
template <typename D> HOSTDEVICE static int64_t &impl(D &d) {
return d.head;
}
};
template <int D> HOSTDEVICE int64_t &indexer(Dim<D> &dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension");
}
if (idx < 0) {
throw std::invalid_argument(
"Tried to access a negative dimension");
}
#else
PADDLE_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
template <> HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
template <> HOSTDEVICE int64_t &indexer<0>(Dim<0> &dim, int idx) {
#ifndef __CUDA_ARCH__
throw std::invalid_argument("Invalid index");
throw std::invalid_argument("Invalid index");
#else
PADDLE_ASSERT(false);
PADDLE_ASSERT(false);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
#else
static int64_t head = 0;
static int64_t head = 0;
#endif
return head;
return head;
#endif
}
}
template <int D> HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
template <int D>
HOSTDEVICE int64_t indexer(const Dim<D> &dim, int idx) {
#ifndef __CUDA_ARCH__
if (idx < 0) {
throw std::invalid_argument("Tried to access a negative dimension");
}
if (idx < 0) {
throw std::invalid_argument(
"Tried to access a negative dimension");
}
#else
PADDLE_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
template <> HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
if (idx == 0) {
return dim.head;
}
return indexer(dim.tail, idx - 1);
}
template <>
HOSTDEVICE int64_t indexer<0>(const Dim<0> &dim, int idx) {
#ifndef __CUDA_ARCH__
throw std::invalid_argument("Invalid index");
throw std::invalid_argument("Invalid index");
#else
PADDLE_ASSERT(false);
PADDLE_ASSERT(false);
#if CUDA_VERSION < 8000
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
// On CUDA versions previous to 8.0, only __shared__ variables
// could be declared as static in the device code.
int64_t head = 0;
#else
static int64_t head = 0;
static int64_t head = 0;
#endif
return head;
return head;
#endif
}
} // namespace
// Static access to constant Dim
template <int i, int l> HOSTDEVICE int64_t get(const Dim<l> &d) {
return DimGetter<i>::impl(d);
}
// Static access to mutable Dim
template <int i, int l> HOSTDEVICE int64_t &get(Dim<l> &d) {
return DimGetter<i>::impl(d);
}
// Dynamic access to constant Dim
template <int l> HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
// std::cout << "l: " << l << std::endl;
return indexer(*this, i);
}
// Dynamic access to mutable Dim
template <int l> HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
return indexer(*this, i);
}
// Dynamic access to constant Dim
inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
return indexer(*this, i);
}
// Dynamic access to mutable Dim
inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
return indexer(*this, i);
}
// Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d)
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type get(const Dim<l> &d,
int i) {
return d[i];
}
// Dynamic access to mutable Dim
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type get(Dim<l> &d,
int i) {
return d[i];
}
// Dot product of two dims
template <int i>
HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
return a.head * b.head + linearize(a.tail, b.tail);
}
// Base case dot product of two Dims
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
return 0;
}
// Product of a Dim
template <int i> HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
return prod * a.head * product(a.tail);
}
// Base case product of a Dim
// Notice it is inline because it is no longer a template
template <> HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
return prod;
}
// Is 0 <= idx_i < size_i for all i?
template <int i>
HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
return ((0 <= idx.head) && (idx.head < size.head) &&
contained(idx.tail, size.tail));
}
// Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline bool contained(const Dim<0> &idx, const Dim<0> &size) {
return true;
}
/**
* \brief Compute exclusive prefix-multiply of a Dim.
*/
template <int i>
HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
}
///\cond HIDDEN
// Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template
template <> HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
return Dim<0>();
}
///\endcond
/**
* Add two dimensions together
*/
template <int i> HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>();
}
template <int i>
HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_plus(lhs, rhs);
}
/**
* Multiply two dimensions together
*/
template <int i> HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>();
}
template <int i>
HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_mult(lhs, rhs);
}
/**
* \brief Normalize strides to ensure any dimension with extent 1
* has stride 0.
*
* \param size Dim object containing the size of an array
* \param stride Dim object containing stride of an array
* \return Dim object the same size as \p size with normalized strides
*
*/
template <int i>
HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size, const Dim<i> &stride) {
int norm_stride = size.head == 1 ? 0 : stride.head;
return Dim<i>(norm_stride, normalize_strides(size.tail, stride.tail));
}
///\cond HIDDEN
template <>
HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
const Dim<0> &stride) {
return Dim<0>();
}
///\endcond
/**
* Helper function to create a Dim
*
* \param idxes The type of Dim constructed depends on the number of params
*
*/
template <typename... Args>
HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
return Dim<sizeof...(Args)>(idxes...);
}
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream &>::type
operator<<(std::ostream &os, const Dim<i> &d) {
os << d.head << ", " << d.tail;
return os;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream &>::type
operator<<(std::ostream &os, const Dim<i> &d) {
os << d.head;
return os;
}
inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
return os;
}
template <int i> HOST std::string Dim<i>::to_string() const {
std::stringstream stream;
stream << *this;
return stream.str();
}
template <int D>
HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
Dim<D> result;
for (int i = 0; i < D - 1; ++i) {
result[i] = linear_index % extents[i];
linear_index /= extents[i];
}
result[D - 1] = linear_index;
return result;
}
} // namespace framework
}
} // namespace
// Static access to constant Dim
template <int i, int l> HOSTDEVICE int64_t get(const Dim<l> &d) {
return DimGetter<i>::impl(d);
}
// Static access to mutable Dim
template <int i, int l> HOSTDEVICE int64_t &get(Dim<l> &d) {
return DimGetter<i>::impl(d);
}
// Dynamic access to constant Dim
template <int l> HOSTDEVICE int64_t Dim<l>::operator[](int i) const {
// std::cout << "l: " << l << std::endl;
return indexer(*this, i);
}
// Dynamic access to mutable Dim
template <int l> HOSTDEVICE int64_t &Dim<l>::operator[](int i) {
return indexer(*this, i);
}
// Dynamic access to constant Dim
inline HOSTDEVICE int64_t Dim<0>::operator[](int i) const {
return indexer(*this, i);
}
// Dynamic access to mutable Dim
inline HOSTDEVICE int64_t &Dim<0>::operator[](int i) {
return indexer(*this, i);
}
// Dynamic access to constant Dim
// without std::enable_if will try to instantiate this on get<0>(d)
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t>::type
get(const Dim<l> &d, int i) {
return d[i];
}
// Dynamic access to mutable Dim
template <int l>
HOSTDEVICE typename std::enable_if<(l > 0), int64_t &>::type
get(Dim<l> &d, int i) {
return d[i];
}
// Dot product of two dims
template <int i>
HOSTDEVICE int64_t linearize(const Dim<i> &a, const Dim<i> &b) {
return a.head * b.head + linearize(a.tail, b.tail);
}
// Base case dot product of two Dims
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline int64_t linearize(const Dim<0> &a, const Dim<0> &b) {
return 0;
}
// Product of a Dim
template <int i>
HOSTDEVICE int64_t product(const Dim<i> &a, int prod = 1) {
return prod * a.head * product(a.tail);
}
// Base case product of a Dim
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline int64_t product(const Dim<0> &a, int prod) {
return prod;
}
// Is 0 <= idx_i < size_i for all i?
template <int i>
HOSTDEVICE bool contained(const Dim<i> &idx, const Dim<i> &size) {
return ((0 <= idx.head) && (idx.head < size.head) &&
contained(idx.tail, size.tail));
}
// Base case of is 0 <= idx_i < size_i ?
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline bool contained(const Dim<0> &idx,
const Dim<0> &size) {
return true;
}
/**
* \brief Compute exclusive prefix-multiply of a Dim.
*/
template <int i>
HOSTDEVICE Dim<i> ex_prefix_mul(const Dim<i> &src, int mul = 1) {
return Dim<i>(mul, ex_prefix_mul(src.tail, mul * src.head));
}
///\cond HIDDEN
// Base case of ex_prefix_mul
// Notice it is inline because it is no longer a template
template <>
HOSTDEVICE inline Dim<0> ex_prefix_mul(const Dim<0> &src, int mul) {
return Dim<0>();
}
///\endcond
/**
* Add two dimensions together
*/
template <int i>
HOSTDEVICE Dim<i> dim_plus(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head + b.head, dim_plus(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<0> dim_plus(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>();
}
template <int i>
HOSTDEVICE Dim<i> operator+(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_plus(lhs, rhs);
}
/**
* Multiply two dimensions together
*/
template <int i>
HOSTDEVICE Dim<i> dim_mult(const Dim<i> &a, const Dim<i> &b) {
return Dim<i>(a.head * b.head, dim_mult(a.tail, b.tail));
}
// Base case
template <>
HOSTDEVICE inline Dim<0> dim_mult(const Dim<0> &a, const Dim<0> &b) {
return Dim<0>();
}
template <int i>
HOSTDEVICE Dim<i> operator*(const Dim<i> &lhs, const Dim<i> &rhs) {
return dim_mult(lhs, rhs);
}
/**
* \brief Normalize strides to ensure any dimension with extent 1
* has stride 0.
*
* \param size Dim object containing the size of an array
* \param stride Dim object containing stride of an array
* \return Dim object the same size as \p size with normalized strides
*
*/
template <int i>
HOSTDEVICE Dim<i> normalize_strides(const Dim<i> &size,
const Dim<i> &stride) {
int norm_stride = size.head == 1 ? 0 : stride.head;
return Dim<i>(norm_stride,
normalize_strides(size.tail, stride.tail));
}
///\cond HIDDEN
template <>
HOSTDEVICE inline Dim<0> normalize_strides(const Dim<0> &size,
const Dim<0> &stride) {
return Dim<0>();
}
///\endcond
/**
* Helper function to create a Dim
*
* \param idxes The type of Dim constructed depends on the number of
* params
*
*/
template <typename... Args>
HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
return Dim<sizeof...(Args)>(idxes...);
}
// Allows us to output a Dim
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream &>::type
operator<<(std::ostream &os, const Dim<i> &d) {
os << d.head << ", " << d.tail;
return os;
}
// Base case that allows us to output a Dim
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream &>::type
operator<<(std::ostream &os, const Dim<i> &d) {
os << d.head;
return os;
}
inline std::ostream &operator<<(std::ostream &os, const Dim<0> &d) {
return os;
}
template <int i> HOST std::string Dim<i>::to_string() const {
std::stringstream stream;
stream << *this;
return stream.str();
}
template <int D>
HOSTDEVICE Dim<D> linear_to_dimension(int linear_index,
Dim<D> extents) {
Dim<D> result;
for (int i = 0; i < D - 1; ++i) {
result[i] = linear_index % extents[i];
linear_index /= extents[i];
}
result[D - 1] = linear_index;
return result;
}
} // namespace framework
} // namespace paddle_mobile
......@@ -23,72 +23,75 @@ SOFTWARE.
#include "variable.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
template <typename Dtype>
Executor<Dtype>::Executor(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
template <typename Dtype>
Executor<Dtype>::Executor(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "conv2d" && op->Input("Input")[0] == "pixel") {
Attribute strides_attr = op->GetAttrMap().at("strides");
std::vector<int> stride = strides_attr.Get<std::vector<int>>();
for (int k = 0; k < stride.size(); ++k) {
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "conv2d" &&
op->Input("Input")[0] == "pixel") {
Attribute strides_attr = op->GetAttrMap().at("strides");
std::vector<int> stride =
strides_attr.Get<std::vector<int>>();
for (int k = 0; k < stride.size(); ++k) {
}
std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
std::make_shared<operators::ConvOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(conv);
}
}
}
}
std::shared_ptr<operators::ConvOp<Dtype, float>> conv =
std::make_shared<operators::ConvOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
program_.scope);
ops_of_block_[*block_desc.get()].push_back(conv);
}
}
}
}
template <typename Dtype>
std::shared_ptr<Tensor> Executor<Dtype>::predict(Tensor &t) {
// feed
auto scope = program_.scope;
Variable *g_feed_value = scope->Var("pixel");
auto tensor = g_feed_value->GetMutable<Tensor>();
tensor->ShareDataWith(t);
template <typename Dtype>
std::shared_ptr<Tensor> Executor<Dtype>::predict(Tensor &t) {
// feed
auto scope = program_.scope;
Variable *g_feed_value = scope->Var("pixel");
auto tensor = g_feed_value->GetMutable<Tensor>();
tensor->ShareDataWith(t);
Variable *con_output = scope->Var("conv2d_0.tmp_0");
Tensor *output_tensor = con_output->GetMutable<Tensor>();
output_tensor->mutable_data<float>({1, 16, 32, 32});
// std::cout << typeid(output_tensor).name() << std::endl;
// std::cout << "output_tensor dims: " << output_tensor->dims() <<
// std::endl;
Variable *con_output = scope->Var("conv2d_0.tmp_0");
Tensor *output_tensor = con_output->GetMutable<Tensor>();
output_tensor->mutable_data<float>({1, 16, 32, 32});
// std::cout << typeid(output_tensor).name() << std::endl;
// std::cout << "output_tensor dims: " << output_tensor->dims() << std::endl;
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict(t, 0);
return out_tensor;
}
predict(t, 0);
return out_tensor;
}
template <typename Dtype>
void Executor<Dtype>::predict(const Tensor &t, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
// std::cout << "开始run" << std::endl;
op->Run();
}
}
template <typename Dtype>
void Executor<Dtype>::predict(const Tensor &t, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size();
++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
// std::cout << "开始run" << std::endl;
op->Run();
}
}
template class Executor<CPU>;
template class Executor<CPU>;
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -32,22 +32,22 @@ SOFTWARE.
#include "variable.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
template <typename Dtype> class Executor {
public:
Executor(const Program<Dtype> p);
std::shared_ptr<Tensor> predict(Tensor &t);
template <typename Dtype> class Executor {
public:
Executor(const Program<Dtype> p);
std::shared_ptr<Tensor> predict(Tensor &t);
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
void predict(const Tensor &t, int block_id);
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
};
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
void predict(const Tensor &t, int block_id);
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
因为 它太大了无法显示 source diff 。你可以改为 查看blob
因为 它太大了无法显示 source diff 。你可以改为 查看blob
......@@ -19,280 +19,304 @@ limitations under the License. */
#include <string.h>
namespace paddle_mobile {
namespace framework {
std::ostream &operator<<(std::ostream &os, const LoD &lod) {
os << "{";
for (auto &v : lod) {
os << "{";
bool is_first = true;
for (auto &i : v) {
if (is_first) {
os << i;
is_first = false;
} else {
os << ", " << i;
}
}
os << "}";
}
os << "}";
return os;
}
std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// PADDLE_ENFORCE(t.type().hash_code() == typeid(float).hash_code());
// if (!platform::is_cpu_place(t.place())) {
// LoDTensor tt;
// framework::TensorCopy(t, platform::CPUPlace(), &tt);
// platform::DeviceContextPool &pool =
// platform::DeviceContextPool::Instance(); auto &dev_ctx =
// *pool.Get(t.place()); dev_ctx.Wait();
//
// os << tt;
// return os;
// }
os << "dim: " << t.dims() << "\n";
os << "lod: " << t.lod() << "\n";
// only print first ten elements
int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) {
os << t.data<float>()[i] << " ";
}
return os;
}
std::string LoDToString(const LoD &lod) {
std::ostringstream stream;
stream << lod;
return stream.str();
}
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end) {
// PADDLE_ENFORCE_LT(level, in.size());
// PADDLE_ENFORCE_LT(elem_end, in[level].size());
LoD res;
res.resize(in.size() - level);
// copy the first level
res[0].assign(in[level].begin() + elem_begin,
in[level].begin() + elem_end + 1);
for (size_t lvl = 1; lvl < res.size(); lvl++) {
const auto &in_level = in[level + lvl];
const auto &above_level = res[lvl - 1];
auto &out_level = res[lvl];
out_level.assign(in_level.begin() + above_level.front(),
in_level.begin() + above_level.back() + 1);
}
for (size_t lvl = 0; lvl < res.size(); lvl++) {
// to make the first offset equals 0, all the elements minus the first
// element
size_t front = res[lvl].front();
for (auto &ele : res[lvl]) {
ele -= front;
}
}
return res;
}
LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1)
return in;
LoD result = in;
for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
for (size_t i = 0; i < in[level].size(); ++i) {
size_t index = in[level][i];
result[level][i] = result[level + 1][index];
}
}
return result;
}
bool operator==(const LoD &a, const LoD &b) {
if (a.size() != b.size()) {
return false;
}
for (size_t i = 0; i < a.size(); i++) {
const auto &a_level = a[i];
const auto &b_level = b[i];
if (a_level.size() != b_level.size()) {
return false;
}
for (size_t j = 0; j < a_level.size(); j++) {
if (a_level[j] != b_level[j]) {
return false;
}
}
}
return true;
}
bool CheckLoD(const LoD &in, int tensor_height) {
if (in.empty())
return true;
for (const auto &level : in) {
// check: there should be more than 2 offsets existing in each level.
if (level.size() < 2)
return false;
// check: the first offset(the begin offset) of each level should be 0.
if (level.front() != 0)
return false;
// check: all the offsets in a level should be ascending(no same items
// allows).
if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
if (a < b)
namespace framework {
std::ostream &operator<<(std::ostream &os, const LoD &lod) {
os << "{";
for (auto &v : lod) {
os << "{";
bool is_first = true;
for (auto &i : v) {
if (is_first) {
os << i;
is_first = false;
} else {
os << ", " << i;
}
}
os << "}";
}
os << "}";
return os;
}
std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// PADDLE_ENFORCE(t.type().hash_code() ==
// typeid(float).hash_code());
// if (!platform::is_cpu_place(t.place())) {
// LoDTensor tt;
// framework::TensorCopy(t, platform::CPUPlace(), &tt);
// platform::DeviceContextPool &pool =
// platform::DeviceContextPool::Instance(); auto &dev_ctx =
// *pool.Get(t.place()); dev_ctx.Wait();
//
// os << tt;
// return os;
// }
os << "dim: " << t.dims() << "\n";
os << "lod: " << t.lod() << "\n";
// only print first ten elements
int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) {
os << t.data<float>()[i] << " ";
}
return os;
}
std::string LoDToString(const LoD &lod) {
std::ostringstream stream;
stream << lod;
return stream.str();
}
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end) {
// PADDLE_ENFORCE_LT(level, in.size());
// PADDLE_ENFORCE_LT(elem_end, in[level].size());
LoD res;
res.resize(in.size() - level);
// copy the first level
res[0].assign(in[level].begin() + elem_begin,
in[level].begin() + elem_end + 1);
for (size_t lvl = 1; lvl < res.size(); lvl++) {
const auto &in_level = in[level + lvl];
const auto &above_level = res[lvl - 1];
auto &out_level = res[lvl];
out_level.assign(in_level.begin() + above_level.front(),
in_level.begin() + above_level.back() + 1);
}
for (size_t lvl = 0; lvl < res.size(); lvl++) {
// to make the first offset equals 0, all the elements minus the
// first
// element
size_t front = res[lvl].front();
for (auto &ele : res[lvl]) {
ele -= front;
}
}
return res;
}
LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1)
return in;
LoD result = in;
for (auto level = static_cast<int>(in.size() - 2); level >= 0;
level--) {
for (size_t i = 0; i < in[level].size(); ++i) {
size_t index = in[level][i];
result[level][i] = result[level + 1][index];
}
}
return result;
}
bool operator==(const LoD &a, const LoD &b) {
if (a.size() != b.size()) {
return false;
}
for (size_t i = 0; i < a.size(); i++) {
const auto &a_level = a[i];
const auto &b_level = b[i];
if (a_level.size() != b_level.size()) {
return false;
}
for (size_t j = 0; j < a_level.size(); j++) {
if (a_level[j] != b_level[j]) {
return false;
}
}
}
return true;
return false;
})) {
std::cout << "ascending error";
return false;
}
}
// check: the lowest level's last offset should equals `tensor_height` if
// tensor_height>0.
if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
return false;
// check: the higher level's last offset should equals the lower level's
// size-1.
// NOTE LoD store the levels from top to bottom, so the higher level goes
// first.
for (size_t level = 0; level < in.size() - 1; level++) {
if (in[level].back() != in[level + 1].size() - 1)
return false;
}
return true;
}
bool CheckAbsLoD(const LoD &in, int tensor_height) {
if (in.empty())
return true;
for (const auto &level : in) {
// check: all the offsets in a level should be ascending(no same items
// allows).
if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
if (a < b)
}
bool CheckLoD(const LoD &in, int tensor_height) {
if (in.empty())
return true;
for (const auto &level : in) {
// check: there should be more than 2 offsets existing in each
// level.
if (level.size() < 2)
return false;
// check: the first offset(the begin offset) of each level
// should be 0.
if (level.front() != 0)
return false;
// check: all the offsets in a level should be ascending(no same
// items
// allows).
if (!std::is_sorted(level.begin(), level.begin(),
[](size_t a, size_t b) {
if (a < b)
return true;
return false;
})) {
std::cout << "ascending error";
return false;
}
}
// check: the lowest level's last offset should equals
// `tensor_height` if
// tensor_height>0.
if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
return false;
// check: the higher level's last offset should equals the lower
// level's
// size-1.
// NOTE LoD store the levels from top to bottom, so the higher level
// goes
// first.
for (size_t level = 0; level < in.size() - 1; level++) {
if (in[level].back() != in[level + 1].size() - 1)
return false;
}
return true;
return false;
})) {
return false;
}
// check: there should be more than 2 offsets existing in each level.
if (level.size() < 2)
return false;
// check: the first offset of each level should be 0, and the last should be
// the same(the height of underlying tensor).
if (level.front() != 0)
return false;
if (tensor_height < 0) {
tensor_height = level.back();
} else if ((size_t)tensor_height != level.back()) {
return false;
}
}
return true;
}
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
size_t end_idx, size_t start_level) {
LoD sub_lod;
for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
// PADDLE_ENFORCE_LE(start_idx, end_idx);
// PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
std::vector<size_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
}
sub_lod.emplace_back(level_lens);
start_idx = lod[level_idx][start_idx];
end_idx = lod[level_idx][end_idx];
}
return LoDAndOffset{sub_lod, {start_idx, end_idx}};
}
void AppendLoD(LoD *lod, const LoD &lod_length) {
// PADDLE_ENFORCE(
// lod->empty() || lod->size() == lod_length.size(),
// "The lod_length should has the same size with the appended lod.");
if (lod->empty()) {
for (size_t i = 0; i < lod_length.size(); ++i) {
lod->emplace_back(1, 0); // size = 1, value = 0;
}
*lod = LoD(lod_length.size(), std::vector<size_t>({0}));
}
for (size_t i = 0; i < lod->size(); ++i) {
auto &level = (*lod)[i];
for (size_t len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}
void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
{ // the 1st field, uint32_t version for LoDTensor
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() * sizeof(framework::LoD::value_type::value_type);
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
os.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
// the 3st field, Tensor
TensorToStream(os, static_cast<Tensor>(tensor));
}
void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
{
// the 1st field, unit32_t version for LoDTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
// the 3st filed, Tensor
TensorFromStream(is, static_cast<Tensor *>(tensor));
}
} // namespace framework
}
bool CheckAbsLoD(const LoD &in, int tensor_height) {
if (in.empty())
return true;
for (const auto &level : in) {
// check: all the offsets in a level should be ascending(no same
// items
// allows).
if (!std::is_sorted(level.begin(), level.begin(),
[](size_t a, size_t b) {
if (a < b)
return true;
return false;
})) {
return false;
}
// check: there should be more than 2 offsets existing in each
// level.
if (level.size() < 2)
return false;
// check: the first offset of each level should be 0, and the
// last should be
// the same(the height of underlying tensor).
if (level.front() != 0)
return false;
if (tensor_height < 0) {
tensor_height = level.back();
} else if ((size_t)tensor_height != level.back()) {
return false;
}
}
return true;
}
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod,
size_t start_idx,
size_t end_idx,
size_t start_level) {
LoD sub_lod;
for (size_t level_idx = start_level; level_idx < lod.size();
++level_idx) {
// PADDLE_ENFORCE_LE(start_idx, end_idx);
// PADDLE_ENFORCE_LT(end_idx, lod[level_idx].size());
std::vector<size_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] -
lod[level_idx][i]);
}
sub_lod.emplace_back(level_lens);
start_idx = lod[level_idx][start_idx];
end_idx = lod[level_idx][end_idx];
}
return LoDAndOffset{sub_lod, {start_idx, end_idx}};
}
void AppendLoD(LoD *lod, const LoD &lod_length) {
// PADDLE_ENFORCE(
// lod->empty() || lod->size() == lod_length.size(),
// "The lod_length should has the same size with the appended
// lod.");
if (lod->empty()) {
for (size_t i = 0; i < lod_length.size(); ++i) {
lod->emplace_back(1, 0); // size = 1, value = 0;
}
*lod = LoD(lod_length.size(), std::vector<size_t>({0}));
}
for (size_t i = 0; i < lod->size(); ++i) {
auto &level = (*lod)[i];
for (size_t len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}
void SerializeToStream(std::ostream &os, const LoDTensor &tensor) {
{ // the 1st field, uint32_t version for LoDTensor
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version),
sizeof(version));
}
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() *
sizeof(framework::LoD::value_type::value_type);
os.write(reinterpret_cast<const char *>(&size),
sizeof(size));
os.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
// the 3st field, Tensor
TensorToStream(os, static_cast<Tensor>(tensor));
}
void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
{
// the 1st field, unit32_t version for LoDTensor
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is
// supported");
}
{
// the 2st field, LoD information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level),
sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
// the 3st filed, Tensor
TensorFromStream(is, static_cast<Tensor *>(tensor));
}
} // namespace framework
} // namespace paddle_mobile
......@@ -23,178 +23,190 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
/*
* LoD is short for Level of Details.
*
* - in a level, each element indicates relative offset of the lower level
* - the first element should be 0 and that indicates that this sequence start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<std::vector<size_t>>;
std::ostream &operator<<(std::ostream &os, const LoD &lod);
std::ostream &operator<<(std::ostream &os, const LoDTensor &t);
std::string LoDToString(const LoD &lod);
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end);
/*
* Transform an LoD from relative offsets to absolute offsets.
*/
LoD ToAbsOffset(const LoD &in);
bool operator==(const LoD &a, const LoD &b);
/*
* Check whether this lod's format is valid.
*
* ATTENTION:
* - Empty lod is treated as valid.
*
* It will check two things:
*
* 1. all the offsets in a level should be ascending(no same items allows).
* 2. there should be more than 2 offsets existing in each level.
* 3. the higher level's last offset should equals the lower level's size-1.
* 4. the first offset(the begin offset) of each level should be 0.
* 5. the lowest level's last offset should equals `tensor_height` if
* tensor_height>0.
*/
bool CheckLoD(const LoD &in, int tensor_height = -1);
/*
* Check whether this absolute lod's format is valid.
*
* ATTENTION:
* - Empty lod is treated as valid.
*
* It will check two things:
* 1. all the offsets in a level should be ascending(no same items allows)
* 2. there should be more than 2 offsets existing in each level.
* 3. the first offset of each level should be 0, and the last should be the
* same(the height of underlying tensor) or `tensor_height` if
* tensor_height>0.
*/
bool CheckAbsLoD(const LoD &in, int tensor_height = -1);
/*
* LoDTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LoDTensor : public Tensor {
public:
LoDTensor() : Tensor() {}
explicit LoDTensor(const LoD &lod) : lod_(lod) {}
void set_lod(const LoD &lod) { lod_ = lod; }
const LoD &lod() const { return lod_; }
LoD *mutable_lod() { return &lod_; }
/*
* Get the start offset and end offset of an element from LoD.
*/
std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
// PADDLE_ENFORCE_LT(level, NumLevels());
// PADDLE_ENFORCE_LT(elem, NumElements(level));
return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
}
/*
* Number of LoDTensor's levels, each level has units of data, for example,
* in the sentence's view, article, paragraph, sentence are 3 levels.
*/
size_t NumLevels() const { return lod_.size(); }
/*
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
// PADDLE_ENFORCE_LT(level, NumLevels());
// the last offset is the end of last element
return (lod_)[level].size() - 1;
}
private:
LoD lod_;
};
/*
* Expand the `source` to fit the LoD of `lod`. For example, a `source`
* LoDTensor is
* - LoD: [0, 2]
* - tensor: [a0, a1]
* a `lod` is
* - LoD: [0 3 5]
* returns a new LoDTensor
* - [a0 a0 a0 a1 a1]
*/
template <typename T>
LoDTensor LodExpand(const LoDTensor &source, const LoD &lod, size_t level) {
LoD abs_lod = ToAbsOffset(lod);
const auto &lod_level = lod[level];
size_t num_instances = source.dims()[0];
// new tensor
LoDTensor tensor;
tensor.set_lod(lod);
auto dims = source.dims();
dims[0] = lod_level.back();
tensor.Resize(dims);
tensor.mutable_data<T>();
// PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
auto slice = tensor.Slice(elem, elem + 1);
TensorCopy(source.Slice(ins, ins + 1), &slice);
}
}
return tensor;
}
// Get the absolute offset of a lod[start_level][start_idx:end_idx] and
// relative length of details for every levels(i.e., [start_level: ]).
//
// For example,
// lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]]
// start_level = 0
// start_idx = 1
// end_idx = 3
//
// Returns:
// LoD = [[1, 4], [2, 4, 2, 3, 2]]
// pair<size_t, size_t> = {11, 24}
std::pair<LoD, std::pair<size_t, size_t>>
GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, size_t end_idx,
size_t start_level);
void AppendLoD(LoD *lod, const LoD &lod_length);
/*
* Serialize/Desiralize LoDTensor to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
} // namespace framework
namespace framework {
/*
* LoD is short for Level of Details.
*
* - in a level, each element indicates relative offset of the lower
* level
* - the first element should be 0 and that indicates that this sequence
* start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<std::vector<size_t>>;
std::ostream &operator<<(std::ostream &os, const LoD &lod);
std::ostream &operator<<(std::ostream &os, const LoDTensor &t);
std::string LoDToString(const LoD &lod);
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end);
/*
* Transform an LoD from relative offsets to absolute offsets.
*/
LoD ToAbsOffset(const LoD &in);
bool operator==(const LoD &a, const LoD &b);
/*
* Check whether this lod's format is valid.
*
* ATTENTION:
* - Empty lod is treated as valid.
*
* It will check two things:
*
* 1. all the offsets in a level should be ascending(no same items
* allows).
* 2. there should be more than 2 offsets existing in each level.
* 3. the higher level's last offset should equals the lower level's
* size-1.
* 4. the first offset(the begin offset) of each level should be 0.
* 5. the lowest level's last offset should equals `tensor_height` if
* tensor_height>0.
*/
bool CheckLoD(const LoD &in, int tensor_height = -1);
/*
* Check whether this absolute lod's format is valid.
*
* ATTENTION:
* - Empty lod is treated as valid.
*
* It will check two things:
* 1. all the offsets in a level should be ascending(no same items
* allows)
* 2. there should be more than 2 offsets existing in each level.
* 3. the first offset of each level should be 0, and the last should
* be the
* same(the height of underlying tensor) or `tensor_height` if
* tensor_height>0.
*/
bool CheckAbsLoD(const LoD &in, int tensor_height = -1);
/*
* LoDTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LoDTensor : public Tensor {
public:
LoDTensor() : Tensor() {}
explicit LoDTensor(const LoD &lod) : lod_(lod) {}
void set_lod(const LoD &lod) { lod_ = lod; }
const LoD &lod() const { return lod_; }
LoD *mutable_lod() { return &lod_; }
/*
* Get the start offset and end offset of an element from LoD.
*/
std::pair<size_t, size_t> lod_element(size_t level,
size_t elem) const {
// PADDLE_ENFORCE_LT(level, NumLevels());
// PADDLE_ENFORCE_LT(elem, NumElements(level));
return std::make_pair((lod_)[level][elem],
(lod_)[level][elem + 1]);
}
/*
* Number of LoDTensor's levels, each level has units of data, for
* example,
* in the sentence's view, article, paragraph, sentence are 3
* levels.
*/
size_t NumLevels() const { return lod_.size(); }
/*
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
// PADDLE_ENFORCE_LT(level, NumLevels());
// the last offset is the end of last element
return (lod_)[level].size() - 1;
}
private:
LoD lod_;
};
/*
* Expand the `source` to fit the LoD of `lod`. For example, a `source`
* LoDTensor is
* - LoD: [0, 2]
* - tensor: [a0, a1]
* a `lod` is
* - LoD: [0 3 5]
* returns a new LoDTensor
* - [a0 a0 a0 a1 a1]
*/
template <typename T>
LoDTensor LodExpand(const LoDTensor &source, const LoD &lod,
size_t level) {
LoD abs_lod = ToAbsOffset(lod);
const auto &lod_level = lod[level];
size_t num_instances = source.dims()[0];
// new tensor
LoDTensor tensor;
tensor.set_lod(lod);
auto dims = source.dims();
dims[0] = lod_level.back();
tensor.Resize(dims);
tensor.mutable_data<T>();
// PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1];
elem++) {
auto slice = tensor.Slice(elem, elem + 1);
TensorCopy(source.Slice(ins, ins + 1), &slice);
}
}
return tensor;
}
// Get the absolute offset of a lod[start_level][start_idx:end_idx] and
// relative length of details for every levels(i.e., [start_level: ]).
//
// For example,
// lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]]
// start_level = 0
// start_idx = 1
// end_idx = 3
//
// Returns:
// LoD = [[1, 4], [2, 4, 2, 3, 2]]
// pair<size_t, size_t> = {11, 24}
std::pair<LoD, std::pair<size_t, size_t>>
GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
size_t end_idx, size_t start_level);
void AppendLoD(LoD *lod, const LoD &lod_length);
/*
* Serialize/Desiralize LoDTensor to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream &os, const LoDTensor &tensor);
void DeserializeFromStream(std::istream &is, LoDTensor *tensor);
} // namespace framework
} // namespace paddle_mobile
......@@ -5,55 +5,58 @@
#include "op_desc.h"
namespace paddle_mobile {
namespace framework {
OpDesc::OpDesc(const proto::OpDesc &desc) : desc_(desc) {
for (int i = 0; i < desc_.inputs_size(); ++i) {
const proto::OpDesc::Var &var = desc_.inputs(i);
std::vector<std::string> &args = inputs_[var.parameter()];
int arg_size = var.arguments_size();
for (int j = 0; j < arg_size; ++j) {
args.push_back(var.arguments(j));
}
}
for (int i = 0; i < desc_.outputs_size(); ++i) {
const proto::OpDesc::Var &var = desc_.outputs(i);
std::vector<std::string> &args = outputs_[var.parameter()];
int arg_size = var.arguments_size();
for (int j = 0; j < arg_size; ++j) {
args.push_back(var.arguments(j));
}
}
for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
std::string attr_name = attr.name();
if (attr.type() != proto::AttrType::BLOCK) {
attrs_[attr_name] = Attribute::GetAttrValue(attr);
// if (attr.type() == proto::AttrType::INT){
// std::cout << " attrName " << attr_name << " " <<
// attrs_[attr_name].Get<int>() << std::endl;
// }
}
}
}
const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
return inputs_.find(name)->second;
}
const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
return outputs_.find(name)->second;
}
Attribute OpDesc::GetAttr(const std::string &name) const {
auto it = attrs_.find(name);
return it->second;
}
const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
return attrs_;
}
} // namespace framework
namespace framework {
OpDesc::OpDesc(const proto::OpDesc &desc) : desc_(desc) {
for (int i = 0; i < desc_.inputs_size(); ++i) {
const proto::OpDesc::Var &var = desc_.inputs(i);
std::vector<std::string> &args = inputs_[var.parameter()];
int arg_size = var.arguments_size();
for (int j = 0; j < arg_size; ++j) {
args.push_back(var.arguments(j));
}
}
for (int i = 0; i < desc_.outputs_size(); ++i) {
const proto::OpDesc::Var &var = desc_.outputs(i);
std::vector<std::string> &args = outputs_[var.parameter()];
int arg_size = var.arguments_size();
for (int j = 0; j < arg_size; ++j) {
args.push_back(var.arguments(j));
}
}
for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
std::string attr_name = attr.name();
if (attr.type() != proto::AttrType::BLOCK) {
attrs_[attr_name] = Attribute::GetAttrValue(attr);
// if (attr.type() == proto::AttrType::INT){
// std::cout << " attrName " << attr_name << " " <<
// attrs_[attr_name].Get<int>() << std::endl;
// }
}
}
}
const std::vector<std::string> &
OpDesc::Input(const std::string &name) const {
return inputs_.find(name)->second;
}
const std::vector<std::string> &
OpDesc::Output(const std::string &name) const {
return outputs_.find(name)->second;
}
Attribute OpDesc::GetAttr(const std::string &name) const {
auto it = attrs_.find(name);
return it->second;
}
const std::unordered_map<std::string, Attribute> &
OpDesc::GetAttrMap() const {
return attrs_;
}
} // namespace framework
} // namespace paddle_mobile
......@@ -23,29 +23,31 @@ SOFTWARE.
#include "paddle_mobile_object.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class OpDesc : PaddleMobileObject {
public:
OpDesc(const proto::OpDesc &desc);
const std::vector<std::string> &Input(const std::string &name) const;
const std::vector<std::string> &Output(const std::string &name) const;
Attribute GetAttr(const std::string &name) const;
class OpDesc : PaddleMobileObject {
public:
OpDesc(const proto::OpDesc &desc);
const std::vector<std::string> &
Input(const std::string &name) const;
const std::vector<std::string> &
Output(const std::string &name) const;
Attribute GetAttr(const std::string &name) const;
const VariableNameMap &GetInputs() { return inputs_; }
const VariableNameMap &GetInputs() { return inputs_; }
const VariableNameMap &GetOutputs() { return outputs_; }
const VariableNameMap &GetOutputs() { return outputs_; }
const AttributeMap &GetAttrMap() const;
const AttributeMap &GetAttrMap() const;
const std::string &Type() { return desc_.type(); };
const std::string &Type() { return desc_.type(); };
private:
proto::OpDesc desc_;
VariableNameMap inputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
};
private:
proto::OpDesc desc_;
VariableNameMap inputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -22,70 +22,74 @@ SOFTWARE.
#include "framework.pb.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype> struct OpInfo {
OpCreator<Dtype> creator_;
const OpCreator<Dtype> &Creator() const {
// PADDLE_ENFORCE_NOT_NULL(creator_,
// "Operator Creator has not been registered");
return creator_;
}
};
template <typename Dtype> class OpInfoMap;
template <typename Dtype> static OpInfoMap<Dtype> *g_op_info_map = nullptr;
template <typename Dtype> class OpInfoMap {
public:
static OpInfoMap &Instance() {
if (g_op_info_map<Dtype> == nullptr) {
g_op_info_map<Dtype> = new OpInfoMap();
}
return *g_op_info_map<Dtype>;
};
bool Has(const std::string &op_type) const {
return map_.find(op_type) != map_.end();
}
void Insert(const std::string &type, const OpInfo<Dtype> &info) {
// PADDLE_ENFORCE(!Has(type), "Operator %s has been registered", type);
map_.insert({type, info});
}
const OpInfo<Dtype> &Get(const std::string &type) const {
auto op_info_ptr = GetNullable(type);
// PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been
// registered",
// type);
return *op_info_ptr;
}
const OpInfo<Dtype> *GetNullable(const std::string &type) const {
auto it = map_.find(type);
if (it == map_.end()) {
return nullptr;
} else {
return &it->second;
}
}
const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
return map_;
}
std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
return &map_;
}
private:
OpInfoMap() = default;
std::unordered_map<std::string, OpInfo<Dtype>> map_;
// DISABLE_COPY_AND_ASSIGN(OpInfoMap);
};
} // namespace framework
namespace framework {
template <typename Dtype> struct OpInfo {
OpCreator<Dtype> creator_;
const OpCreator<Dtype> &Creator() const {
// PADDLE_ENFORCE_NOT_NULL(creator_,
// "Operator Creator has not been
// registered");
return creator_;
}
};
template <typename Dtype> class OpInfoMap;
template <typename Dtype>
static OpInfoMap<Dtype> *g_op_info_map = nullptr;
template <typename Dtype> class OpInfoMap {
public:
static OpInfoMap &Instance() {
if (g_op_info_map<Dtype> == nullptr) {
g_op_info_map<Dtype> = new OpInfoMap();
}
return *g_op_info_map<Dtype>;
};
bool Has(const std::string &op_type) const {
return map_.find(op_type) != map_.end();
}
void Insert(const std::string &type, const OpInfo<Dtype> &info) {
// PADDLE_ENFORCE(!Has(type), "Operator %s has been
// registered", type);
map_.insert({type, info});
}
const OpInfo<Dtype> &Get(const std::string &type) const {
auto op_info_ptr = GetNullable(type);
// PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not
// been
// registered",
// type);
return *op_info_ptr;
}
const OpInfo<Dtype> *GetNullable(const std::string &type) const {
auto it = map_.find(type);
if (it == map_.end()) {
return nullptr;
} else {
return &it->second;
}
}
const std::unordered_map<std::string, OpInfo<Dtype>> &map() const {
return map_;
}
std::unordered_map<std::string, OpInfo<Dtype>> *mutable_map() {
return &map_;
}
private:
OpInfoMap() = default;
std::unordered_map<std::string, OpInfo<Dtype>> map_;
// DISABLE_COPY_AND_ASSIGN(OpInfoMap);
};
} // namespace framework
} // namespace paddle_mobile
......@@ -22,43 +22,51 @@ SOFTWARE.
#include "framework.pb.h"
namespace paddle_mobile {
namespace framework {
struct OpKernelType {
struct Hash {
size_t operator()(const OpKernelType &key) const {
int data_type = static_cast<int>(key.data_type_) << LEFT_SHIFT;
int data_layout = static_cast<int>(key.data_layout_) << (LEFT_SHIFT * 2);
namespace framework {
struct OpKernelType {
struct Hash {
size_t operator()(const OpKernelType &key) const {
int data_type = static_cast<int>(key.data_type_)
<< LEFT_SHIFT;
int data_layout = static_cast<int>(key.data_layout_)
<< (LEFT_SHIFT * 2);
std::hash<int> hasher;
return hasher(data_type + data_layout);
}
};
std::hash<int> hasher;
return hasher(data_type + data_layout);
}
};
// place, data_type, library_type kinds less than 2^8
constexpr static int LEFT_SHIFT = 8;
// place, data_type, library_type kinds less than 2^8
constexpr static int LEFT_SHIFT = 8;
proto::VarType::Type data_type_;
DataLayout data_layout_;
proto::VarType::Type data_type_;
DataLayout data_layout_;
OpKernelType(proto::VarType::Type data_type,
DataLayout data_layout = DataLayout::kAnyLayout)
: data_type_(data_type), data_layout_(data_layout) {}
OpKernelType(proto::VarType::Type data_type,
DataLayout data_layout = DataLayout::kAnyLayout)
: data_type_(data_type), data_layout_(data_layout) {}
bool operator==(const OpKernelType &o) const {
return data_type_ == o.data_type_ && data_layout_ == o.data_layout_;
}
bool operator==(const OpKernelType &o) const {
return data_type_ == o.data_type_ &&
data_layout_ == o.data_layout_;
}
bool operator!=(const OpKernelType &o) const { return !(*this == o); }
};
bool operator!=(const OpKernelType &o) const {
return !(*this == o);
}
};
inline bool NeedTransformLayout(const DataLayout &l, const DataLayout &r) {
return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout && l != r;
}
inline bool NeedTransformLayout(const DataLayout &l,
const DataLayout &r) {
return l != DataLayout::kAnyLayout && r != DataLayout::kAnyLayout &&
l != r;
}
inline bool TransFromNeeded(const OpKernelType &l, const OpKernelType &r) {
return (l.data_type_ != r.data_type_) ||
NeedTransformLayout(l.data_layout_, r.data_layout_);
}
inline bool TransFromNeeded(const OpKernelType &l,
const OpKernelType &r) {
return (l.data_type_ != r.data_type_) ||
NeedTransformLayout(l.data_layout_, r.data_layout_);
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -19,8 +19,8 @@ SOFTWARE.
#pragma once
namespace paddle_mobile {
namespace framework {
// this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker {};
} // namespace framework
namespace framework {
// this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker {};
} // namespace framework
} // namespace paddle_mobile
......@@ -20,26 +20,23 @@ SOFTWARE.
#include "op_info.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
template <typename Dtype>
OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs),
scope_(scope) {
CheckAllInputOutputSet();
}
template <typename Dtype>
OperatorBase<Dtype>::OperatorBase(const std::string &type,
const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs),
scope_(scope) {
CheckAllInputOutputSet();
}
template <typename Dtype>
void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype> void OperatorBase<Dtype>::Run() { RunImpl(); }
template class OperatorBase<CPU>;
template class OperatorWithKernel<CPU>;
template <typename Dtype>
void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template class OperatorBase<CPU>;
template class OperatorWithKernel<CPU>;
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -18,8 +18,6 @@ SOFTWARE.
#pragma once
#include <map>
#include "attribute.h"
#include "block_desc.h"
#include "common/type_define.h"
......@@ -31,62 +29,62 @@ SOFTWARE.
#include "scope.h"
#include "tensor.h"
#include "variable.h"
#include <map>
namespace paddle_mobile {
namespace framework {
template <typename Dtype> class OperatorBase : PaddleMobileObject {
public:
OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope);
virtual ~OperatorBase() {}
virtual void Run();
const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; }
const std::string &Type() const { return type_; }
const AttributeMap &Attrs() const { return attrs_; }
protected:
std::shared_ptr<Scope> scope_;
std::string type_;
VariableNameMap inputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
namespace framework {
private:
void CheckAllInputOutputSet() const;
virtual void RunImpl() const = 0;
};
template <typename Dtype> class OperatorBase : PaddleMobileObject {
public:
OperatorBase(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs,
std::shared_ptr<Scope> scope);
virtual ~OperatorBase() {}
virtual void Run() const = 0;
template <typename Dtype>
class OperatorWithKernel : public OperatorBase<Dtype> {
public:
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
virtual void InferShape() const = 0;
const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; }
const std::string &Type() const { return type_; }
const AttributeMap &Attrs() const { return attrs_; }
void ClearVariables() const {
if (this->scope_) {
this->scope_->EraseVars(this->inputs_.at("Filter"));
this->scope_->EraseVars(this->inputs_.at("Input"));
}
}
void ClearVariables() const {
if (this->scope_) {
this->scope_->EraseVars(this->inputs_.at("Filter"));
this->scope_->EraseVars(this->inputs_.at("Input"));
}
}
protected:
std::shared_ptr<Scope> scope_;
std::string type_;
VariableNameMap inputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
protected:
virtual void RunImpl() const = 0;
private:
void CheckAllInputOutputSet() const;
};
private:
};
template <typename Dtype>
class OperatorWithKernel : public OperatorBase<Dtype> {
public:
OperatorWithKernel(const std::string &type,
const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {}
virtual void InferShape() const = 0;
virtual void Run() const = 0;
};
template <typename Dtype, typename P> class OpKernelBase : PaddleMobileObject {
public:
virtual void Compute(const P &para) const = 0;
template <typename Dtype, typename P>
class OpKernelBase : PaddleMobileObject {
public:
virtual void Compute(const P &para) const = 0;
virtual ~OpKernelBase() = default;
};
virtual ~OpKernelBase() = default;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -23,14 +23,14 @@ SOFTWARE.
namespace paddle_mobile {
class PaddleMobileObject {
public:
virtual inline const std::string &ToString() {
char address[128] = {0};
sprintf(address, "%p", this);
return std::string(address);
}
class PaddleMobileObject {
public:
virtual inline const std::string &ToString() {
char address[128] = {0};
sprintf(address, "%p", this);
return std::string(address);
}
private:
};
private:
};
} // namespace paddle_mobile
......@@ -17,5 +17,5 @@ SOFTWARE.
==============================================================================*/
namespace paddle_mobile {
namespace framework {}
namespace framework {}
} // namespace paddle_mobile
......@@ -24,17 +24,17 @@ SOFTWARE.
#include "scope.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
template <typename Dtype, Precision P = Precision::FP32>
class Program : PaddleMobileObject {
public:
std::shared_ptr<ProgramDesc> originProgram;
std::shared_ptr<ProgramDesc> optimizeProgram;
std::shared_ptr<Scope> scope;
template <typename Dtype, Precision P = Precision::FP32>
class Program : PaddleMobileObject {
public:
std::shared_ptr<ProgramDesc> originProgram;
std::shared_ptr<ProgramDesc> optimizeProgram;
std::shared_ptr<Scope> scope;
private:
};
private:
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -5,18 +5,18 @@
#include "program_desc.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) : desc_(desc) {
for (auto &block_desc : *desc_.mutable_blocks()) {
// new framework::BlockDesc(block_desc)
blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
}
}
ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) : desc_(desc) {
for (auto &block_desc : *desc_.mutable_blocks()) {
// new framework::BlockDesc(block_desc)
blocks_.emplace_back(std::make_shared<BlockDesc>(block_desc));
}
}
std::shared_ptr<BlockDesc> ProgramDesc::Block(size_t idx) {
return blocks_[idx];
}
std::shared_ptr<BlockDesc> ProgramDesc::Block(size_t idx) {
return blocks_[idx];
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -25,18 +25,20 @@ SOFTWARE.
#include "paddle_mobile_object.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class ProgramDesc : PaddleMobileObject {
public:
ProgramDesc(const proto::ProgramDesc &desc);
std::shared_ptr<BlockDesc> Block(size_t idx);
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; };
class ProgramDesc : PaddleMobileObject {
public:
ProgramDesc(const proto::ProgramDesc &desc);
std::shared_ptr<BlockDesc> Block(size_t idx);
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() {
return blocks_;
};
private:
std::vector<std::shared_ptr<BlockDesc>> blocks_;
proto::ProgramDesc desc_;
};
private:
std::vector<std::shared_ptr<BlockDesc>> blocks_;
proto::ProgramDesc desc_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -4,113 +4,116 @@
#include <vector>
namespace paddle_mobile {
namespace framework {
namespace framework {
Scope &Scope::NewScope() const {
std::unique_lock<std::mutex> lock(mutex_);
kids_.push_back(new Scope(this));
return *kids_.back();
}
Scope &Scope::NewScope() const {
std::unique_lock<std::mutex> lock(mutex_);
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable *Scope::Var(const std::string &name) {
auto *pvar = FindVarLocally(name);
if (pvar != nullptr) {
return pvar;
};
pvar = new Variable;
vars_[name] = pvar;
pvar->name_ = &(vars_.find(name)->first);
return pvar;
}
Variable *Scope::Var(const std::string &name) {
auto *pvar = FindVarLocally(name);
if (pvar != nullptr) {
return pvar;
};
pvar = new Variable;
vars_[name] = pvar;
pvar->name_ = &(vars_.find(name)->first);
return pvar;
}
// Variable* Scope::Var(std::string* name) {
// auto var_name = string::Sprintf("%p.%d", this, vars_.size());
// if (name != nullptr) {
// *name = var_name;
// }
// return Var(var_name);
// }
// Variable* Scope::Var(std::string* name) {
// auto var_name = string::Sprintf("%p.%d", this,
// vars_.size());
// if (name != nullptr) {
// *name = var_name;
// }
// return Var(var_name);
// }
Variable *Scope::FindVar(const std::string &name) const {
auto *pvar = FindVarLocally(name);
if (pvar != nullptr) {
return pvar;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
}
Variable *Scope::FindVar(const std::string &name) const {
auto *pvar = FindVarLocally(name);
if (pvar != nullptr) {
return pvar;
}
return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
}
const Scope *Scope::FindScope(const Variable *var) const {
for (auto &name_var : vars_) {
if (name_var.second == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
const Scope *Scope::FindScope(const Variable *var) const {
for (auto &name_var : vars_) {
if (name_var.second == var) {
return this;
}
}
return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
}
void Scope::DropKids() {
for (Scope *s : kids_) {
delete s;
}
kids_.clear();
}
void Scope::DropKids() {
for (Scope *s : kids_) {
delete s;
}
kids_.clear();
}
std::vector<std::string> Scope::LocalVarNames() const {
std::vector<std::string> known_vars;
known_vars.reserve(vars_.size());
for (auto &name_var : vars_) {
known_vars.emplace_back(name_var.first);
}
return known_vars;
}
std::vector<std::string> Scope::LocalVarNames() const {
std::vector<std::string> known_vars;
known_vars.reserve(vars_.size());
for (auto &name_var : vars_) {
known_vars.emplace_back(name_var.first);
}
return known_vars;
}
void Scope::DeleteScope(Scope *scope) const {
std::unique_lock<std::mutex> lock(mutex_);
auto it = std::find(kids_.begin(), kids_.end(), scope);
kids_.erase(it);
delete scope;
// deferent
}
void Scope::DeleteScope(Scope *scope) const {
std::unique_lock<std::mutex> lock(mutex_);
auto it = std::find(kids_.begin(), kids_.end(), scope);
kids_.erase(it);
delete scope;
// deferent
}
void Scope::EraseVars(const std::vector<std::string> &var_names) {
std::set<std::string> var_set(var_names.begin(), var_names.end());
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
delete it->second;
it = vars_.erase(it);
} else {
++it;
}
}
}
void Scope::EraseVars(const std::vector<std::string> &var_names) {
std::set<std::string> var_set(var_names.begin(), var_names.end());
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
delete it->second;
it = vars_.erase(it);
} else {
++it;
}
}
}
void Scope::Rename(const std::string &origin_name,
const std::string &new_name) const {
auto origin_it = vars_.find(origin_name);
if (origin_it == vars_.end()) {
return;
}
auto new_it = vars_.find(new_name);
if (new_it != vars_.end()) {
return;
}
vars_[new_name] = origin_it->second;
vars_.erase(origin_it);
}
//
// std::string Scope::Rename(const std::string& origin_name) const {
// auto var_name = string::Sprintf("%p.%d", this, vars_.size());
// Rename(origin_name, var_name);
// return var_name;
// }
void Scope::Rename(const std::string &origin_name,
const std::string &new_name) const {
auto origin_it = vars_.find(origin_name);
if (origin_it == vars_.end()) {
return;
}
auto new_it = vars_.find(new_name);
if (new_it != vars_.end()) {
return;
}
vars_[new_name] = origin_it->second;
vars_.erase(origin_it);
}
//
// std::string Scope::Rename(const std::string& origin_name)
// const {
// auto var_name = string::Sprintf("%p.%d", this,
// vars_.size());
// Rename(origin_name, var_name);
// return var_name;
// }
Variable *Scope::FindVarLocally(const std::string &name) const {
auto it = vars_.find(name);
if (it != vars_.end()) {
return it->second;
}
return nullptr;
}
Variable *Scope::FindVarLocally(const std::string &name) const {
auto it = vars_.find(name);
if (it != vars_.end()) {
return it->second;
}
return nullptr;
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -24,57 +24,58 @@ SOFTWARE.
#include <unordered_map> //std::unordered_map
namespace paddle_mobile {
namespace framework {
class Scope {
public:
Scope() {}
~Scope() {}
namespace framework {
class Scope {
public:
Scope() {}
~Scope() {}
Scope &NewScope() const;
Scope &NewScope() const;
/// Create a variable with given name if it doesn't exist.
Variable *Var(const std::string &name);
/// Create a variable with given name if it doesn't exist.
Variable *Var(const std::string &name);
/// Create a variable with a scope-unique name.
Variable *Var(std::string *name = nullptr);
/// Create a variable with a scope-unique name.
Variable *Var(std::string *name = nullptr);
void EraseVars(const std::vector<std::string> &var_names);
void EraseVars(const std::vector<std::string> &var_names);
/// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find.
Variable *FindVar(const std::string &name) const;
/// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find.
Variable *FindVar(const std::string &name) const;
const Scope *parent() const { return parent_; }
const Scope *parent() const { return parent_; }
/// Find the scope or an ancestor scope that contains the given variable.
const Scope *FindScope(const Variable *var) const;
/// Find the scope or an ancestor scope that contains the given
/// variable.
const Scope *FindScope(const Variable *var) const;
void DeleteScope(Scope *scope) const;
void DeleteScope(Scope *scope) const;
/// Drop all kids scopes belonged to this scope.
void DropKids();
/// Drop all kids scopes belonged to this scope.
void DropKids();
// enumerate all the variables current contains.
std::vector<std::string> LocalVarNames() const;
// enumerate all the variables current contains.
std::vector<std::string> LocalVarNames() const;
// Rename variable to a new name
void Rename(const std::string &origin_name,
const std::string &new_name) const;
// Rename variable to a new name
void Rename(const std::string &origin_name,
const std::string &new_name) const;
// Rename variable to a new name and return the new name
std::string Rename(const std::string &origin_name) const;
// Rename variable to a new name and return the new name
std::string Rename(const std::string &origin_name) const;
Variable *FindVarLocally(const std::string &name) const;
Variable *FindVarLocally(const std::string &name) const;
private:
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const *parent) : parent_(parent) {}
private:
// Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const *parent) : parent_(parent) {}
mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr};
mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr};
mutable std::mutex mutex_;
};
} // namespace framework
mutable std::mutex mutex_;
};
} // namespace framework
} // namespace paddle_mobile
......@@ -24,57 +24,59 @@ SOFTWARE.
#include "tensor.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class SelectedRows {
public:
SelectedRows(const std::vector<int64_t> &rows, const int64_t &height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
class SelectedRows {
public:
SelectedRows(const std::vector<int64_t> &rows,
const int64_t &height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() {
height_ = 0;
value_.reset(new Tensor());
}
SelectedRows() {
height_ = 0;
value_.reset(new Tensor());
}
const Tensor &value() const { return *value_; }
const Tensor &value() const { return *value_; }
Tensor *mutable_value() { return value_.get(); }
Tensor *mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
void set_height(int64_t height) { height_ = height; }
const std::vector<int64_t> &rows() const { return rows_; }
const std::vector<int64_t> &rows() const { return rows_; }
std::vector<int64_t> *mutable_rows() { return &rows_; }
std::vector<int64_t> *mutable_rows() { return &rows_; }
void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
void set_rows(const std::vector<int64_t> &rows) { rows_ = rows; }
/**
* get the index of id in rows
*/
int64_t index(int64_t id) const {
auto it = std::find(rows_.begin(), rows_.end(), id);
// PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
/**
* get the index of id in rows
*/
int64_t index(int64_t id) const {
auto it = std::find(rows_.begin(), rows_.end(), id);
// PADDLE_ENFORCE(it != rows_.end(), "id should be in rows");
return static_cast<int64_t>(std::distance(rows_.begin(), it));
}
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
// Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9} here.
// SelectedRows are simply concated when adding together. Until a
// SelectedRows add a Tensor, will the duplicate rows be handled.
std::vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
private:
// Notice: rows can be duplicate. We can have {0, 4, 7, 0, 5, 7, 9}
// here.
// SelectedRows are simply concated when adding together. Until a
// SelectedRows add a Tensor, will the duplicate rows be handled.
std::vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -25,287 +25,316 @@ limitations under the License. */
#include "memory/t_malloc.h"
namespace paddle_mobile {
namespace framework {
template <typename... T> struct SizeOfTypeFunctor;
template <typename T> struct SizeOfTypeFunctor<T> {
size_t operator()(std::type_index type) const {
if (typeid(T).hash_code() == type.hash_code()) {
return sizeof(T);
} else {
return 0UL;
}
}
};
template <> struct SizeOfTypeFunctor<> {
size_t operator()(std::type_index type) const { return 0UL; }
};
template <typename HEAD, typename... TAIL>
struct SizeOfTypeFunctor<HEAD, TAIL...> {
size_t operator()(std::type_index type) const {
SizeOfTypeFunctor<HEAD> head;
size_t head_size = head(type);
if (head_size != 0) {
return head_size;
}
SizeOfTypeFunctor<TAIL...> tail;
return tail(type);
}
};
static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool, size_t> functor;
size_t size = functor(type);
// PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
return size;
}
class LoDTensor;
class Tensor {
public:
Tensor() : offset_(0) {}
/*! Return a pointer to mutable memory block. */
template <typename T> inline T *data() {
check_memory_size();
// PADDLE_ENFORCE(std::is_same<T, void>::value ||
// holder_->type().hash_code() == typeid(T).hash_code(),
// "Tensor holds the wrong type, it holds %s",
// this->holder_->type().name());
return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
/*! Return a pointer to constant memory block. */
template <typename T> inline const T *data() const {
check_memory_size();
// PADDLE_ENFORCE(std::is_same<T, void>::value ||
// holder_->type().hash_code() == typeid(T).hash_code(),
// "Tensor holds the wrong type, it holds %s",
// this->holder_->type().name());
return reinterpret_cast<const T *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
inline bool IsInitialized() const { return holder_ != nullptr; }
/**
* @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation.
*/
template <typename T> inline T *mutable_data() {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T *>(mutable_data(typeid(T)));
}
inline void *mutable_data(std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
// PADDLE_ENFORCE_GE(numel(), 0,
// "When calling this method, the Tensor's numel must be
// " "equal or larger than zero. " "Please check
// Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type));
offset_ = 0;
}
return reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
inline void *mutable_data() {
// PADDLE_ENFORCE(this->holder_ != nullptr,
// "Cannot invoke mutable data if current hold nothing.");
return mutable_data(holder_->type());
}
/**
* @brief Return a pointer to mutable memory block.
*
* @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block.
*
* @note If not exist, then allocation.
*/
template <typename T> inline T *mutable_data(DDim dims) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>();
}
/*! Return the dimensions of the memory block. */
inline const DDim &dims() const { return dims_; }
/*! Return the numel of the memory block. */
inline int64_t numel() const { return product(dims_); }
/*! Resize the dimensions of the memory block. */
inline Tensor &Resize(const DDim &dims) {
dims_ = dims;
return *this;
}
/*! The internal of two tensors share the same memory block. */
inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size();
*this = src;
return *this;
}
/**
* @brief Return a sub-tensor of the given tensor.
*
* @param[in] begin_idx The index of the start row(inclusive) to slice.
* The index number begins from 0.
* @param[in] end_idx The index of the end row(exclusive) to slice.
* The index number begins from 0.
*/
inline Tensor Slice(int begin_idx, int end_idx) const {
check_memory_size();
// PADDLE_ENFORCE_GE(begin_idx, 0,
// "The start row index must be greater than 0.");
// PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of
// bound."); PADDLE_ENFORCE_LT(
// begin_idx, end_idx,
// "The start row index must be lesser than the end row index.");
if (dims_[0] == 1) {
return *this;
} else {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
return dst;
}
}
std::type_index type() const {
// PADDLE_ENFORCE_NOT_NULL(
// holder_, "Tensor not initialized yet when
// Tensor::type() is called.");
return holder_->type();
}
// memory size returns the holding memory size in byte.
size_t memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
inline void check_memory_size() const {
// PADDLE_ENFORCE_NOT_NULL(
// holder_, "Tensor holds no memory. Call Tensor::mutable_data
// first.");
// PADDLE_ENFORCE_LE(
// numel() * SizeOfType(type()), memory_size(),
// "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
// "first to re-allocate memory.\n"
// "or maybe the required data-type mismatches the data already
// stored.");
}
inline DataLayout layout() const { return layout_; }
inline void set_layout(const DataLayout layout) { layout_ = layout; }
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of Variable.
*/
struct Placeholder {
virtual ~Placeholder() = default;
virtual void *ptr() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
virtual void set_type(std::type_index type) = 0;
};
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, std::type_index type)
: ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
memory::PODDeleter<uint8_t>()),
size_(size), type_(type) {
// PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s
// memory to allocation.",
// (is_cpu_place(place_) ?
// "CPU" : "GPU"));
}
virtual size_t size() const { return size_; }
virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; }
/*! the pointer of memory block. */
std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
/*! the size of memory block. */
size_t size_;
/* the current type of memory */
std::type_index type_;
};
/*! holds the memory block if allocated. */
std::shared_ptr<Placeholder> holder_;
/**
* @brief points to elements dimensions.
*
* @note dims_ do not indicate the memory block size.
*/
DDim dims_;
/**
* @brief the layout of memory block, default is NHWC.
*
* @note the memory allocation order, describe how weight/data is stored
* For example, in 4-D Tensor(rank=4), there are three commonly
* used layout. They are
* NCHW, NHWC, CHWN.
* N,C,H,W for respectively the batch size, the number of
* feature maps, the height, the width.
*/
DataLayout layout_ = DataLayout::kNHWC;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t offset_;
};
inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) {
Tensor res;
res.ShareDataWith(src);
res.Resize(flatten_to_2d(src.dims(), num_col_dims));
return res;
}
} // namespace framework
namespace framework {
template <typename... T> struct SizeOfTypeFunctor;
template <typename T> struct SizeOfTypeFunctor<T> {
size_t operator()(std::type_index type) const {
if (typeid(T).hash_code() == type.hash_code()) {
return sizeof(T);
} else {
return 0UL;
}
}
};
template <> struct SizeOfTypeFunctor<> {
size_t operator()(std::type_index type) const { return 0UL; }
};
template <typename HEAD, typename... TAIL>
struct SizeOfTypeFunctor<HEAD, TAIL...> {
size_t operator()(std::type_index type) const {
SizeOfTypeFunctor<HEAD> head;
size_t head_size = head(type);
if (head_size != 0) {
return head_size;
}
SizeOfTypeFunctor<TAIL...> tail;
return tail(type);
}
};
static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int, float, double, int16_t, int64_t, bool,
size_t>
functor;
size_t size = functor(type);
// PADDLE_ENFORCE(size != 0UL, "Cannot get size of type %s",
// type.name());
return size;
}
class LoDTensor;
class Tensor {
public:
Tensor() : offset_(0) {}
/*! Return a pointer to mutable memory block. */
template <typename T> inline T *data() {
check_memory_size();
// PADDLE_ENFORCE(std::is_same<T, void>::value ||
// holder_->type().hash_code() ==
// typeid(T).hash_code(),
// "Tensor holds the wrong type, it holds %s",
// this->holder_->type().name());
return reinterpret_cast<T *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
/*! Return a pointer to constant memory block. */
template <typename T> inline const T *data() const {
check_memory_size();
// PADDLE_ENFORCE(std::is_same<T, void>::value ||
// holder_->type().hash_code() ==
// typeid(T).hash_code(),
// "Tensor holds the wrong type, it holds %s",
// this->holder_->type().name());
return reinterpret_cast<const T *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
inline bool IsInitialized() const { return holder_ != nullptr; }
/**
* @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation.
*/
template <typename T> inline T *mutable_data() {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T *>(mutable_data(typeid(T)));
}
inline void *mutable_data(std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
// PADDLE_ENFORCE_GE(numel(), 0,
// "When calling this method, the Tensor's
// numel must be
// " "equal or larger than zero. " "Please
// check
// Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type));
offset_ = 0;
}
return reinterpret_cast<void *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
inline void *mutable_data() {
// PADDLE_ENFORCE(this->holder_ != nullptr,
// "Cannot invoke mutable data if current hold
// nothing.");
return mutable_data(holder_->type());
}
/**
* @brief Return a pointer to mutable memory block.
*
* @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block.
*
* @note If not exist, then allocation.
*/
template <typename T> inline T *mutable_data(DDim dims) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>();
}
/*! Return the dimensions of the memory block. */
inline const DDim &dims() const { return dims_; }
/*! Return the numel of the memory block. */
inline int64_t numel() const { return product(dims_); }
/*! Resize the dimensions of the memory block. */
inline Tensor &Resize(const DDim &dims) {
dims_ = dims;
return *this;
}
/*! The internal of two tensors share the same memory block. */
inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size();
*this = src;
return *this;
}
/**
* @brief Return a sub-tensor of the given tensor.
*
* @param[in] begin_idx The index of the start row(inclusive) to
* slice.
* The index number begins from 0.
* @param[in] end_idx The index of the end row(exclusive) to
* slice.
* The index number begins from 0.
*/
inline Tensor Slice(int begin_idx, int end_idx) const {
check_memory_size();
// PADDLE_ENFORCE_GE(begin_idx, 0,
// "The start row index must be greater than
// 0.");
// PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is
// out of
// bound."); PADDLE_ENFORCE_LT(
// begin_idx, end_idx,
// "The start row index must be lesser than the end row
// index.");
if (dims_[0] == 1) {
return *this;
} else {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ =
offset_ + begin_idx * base * SizeOfType(type());
return dst;
}
}
std::type_index type() const {
// PADDLE_ENFORCE_NOT_NULL(
// holder_, "Tensor not initialized yet
// when
// Tensor::type() is called.");
return holder_->type();
}
// memory size returns the holding memory size in byte.
size_t memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
inline void check_memory_size() const {
// PADDLE_ENFORCE_NOT_NULL(
// holder_, "Tensor holds no memory. Call
// Tensor::mutable_data
// first.");
// PADDLE_ENFORCE_LE(
// numel() * SizeOfType(type()), memory_size(),
// "Tensor's dims_ is out of bound. Call
// Tensor::mutable_data "
// "first to re-allocate memory.\n"
// "or maybe the required data-type mismatches the data
// already
// stored.");
}
inline DataLayout layout() const { return layout_; }
inline void set_layout(const DataLayout layout) {
layout_ = layout;
}
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a
* template
* parameter of Variable.
*/
struct Placeholder {
virtual ~Placeholder() = default;
virtual void *ptr() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
virtual void set_type(std::type_index type) = 0;
};
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, std::type_index type)
: ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
memory::PODDeleter<uint8_t>()),
size_(size), type_(type) {
// PADDLE_ENFORCE_NOT_NULL(ptr_,
// "Insufficient %s
// memory to allocation.",
// (is_cpu_place(place_)
// ?
// "CPU" :
// "GPU"));
}
virtual size_t size() const { return size_; }
virtual void *ptr() const {
return static_cast<void *>(ptr_.get());
}
virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; }
/*! the pointer of memory block. */
std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
/*! the size of memory block. */
size_t size_;
/* the current type of memory */
std::type_index type_;
};
/*! holds the memory block if allocated. */
std::shared_ptr<Placeholder> holder_;
/**
* @brief points to elements dimensions.
*
* @note dims_ do not indicate the memory block size.
*/
DDim dims_;
/**
* @brief the layout of memory block, default is NHWC.
*
* @note the memory allocation order, describe how weight/data is
* stored
* For example, in 4-D Tensor(rank=4), there are three
* commonly
* used layout. They are
* NCHW, NHWC, CHWN.
* N,C,H,W for respectively the batch size, the number of
* feature maps, the height, the width.
*/
DataLayout layout_ = DataLayout::kNHWC;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really
* begins.
*/
size_t offset_;
};
inline Tensor ReshapeToMatrix(const Tensor &src, int num_col_dims) {
Tensor res;
res.ShareDataWith(src);
res.Resize(flatten_to_2d(src.dims(), num_col_dims));
return res;
}
} // namespace framework
} // namespace paddle_mobile
......@@ -18,183 +18,189 @@
#include <vector>
namespace paddle_mobile {
namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to
// "
// << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size);
}
void TensorCopySync(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place()
// << " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size);
}
template <typename Predicate> struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor &tensor_;
Tensor *out_;
AnyDTypeVisitor(Predicate predicate, const Tensor &tensor, Tensor *out)
: predicate_(predicate), tensor_(tensor), out_(out) {}
template <typename T> void operator()() const {
// auto t = EigenVector<T>::Flatten(tensor_);
// auto o = EigenScalar<bool>::From(*out_);
// return any of predicate_(t) is true.
// o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate>
inline void AnyImpl(Predicate predicate, const Tensor &tensor,
framework::Tensor *out) {
VisitDataType(ToDataType(tensor.type()),
AnyDTypeVisitor<Predicate>(predicate, tensor, out));
}
template <typename Predicate> struct AnyVisitor {
const framework::Tensor &tensor_;
Predicate predicate_;
AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
bool operator()(void) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>();
AnyImpl(predicate_, tensor_, &out);
return this->GetResult(out);
}
bool GetResult(const framework::Tensor &out) const {
return *out.data<bool>();
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
// return platform::VisitPlace(visitor);
return visitor();
}
struct ContainsNANPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isnan();
}
};
bool TensorContainsNAN(const framework::Tensor &tensor) {
ContainsNANPredicate predicate;
return Any(tensor, predicate);
}
struct ContainsInfPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isinf();
}
};
bool TensorContainsInf(const framework::Tensor &tensor) {
ContainsInfPredicate predicate;
return Any(tensor, predicate);
}
void TensorToStream(std::ostream &os, const Tensor &tensor) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto::VarType::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto *pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.memory_size();
auto *data_ptr = tensor.data<void>();
// PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
// "Index overflow when writing tensor");
os.write(static_cast<const char *>(data_ptr),
static_cast<std::streamsize>(size));
}
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void **buf, Tensor *tensor)
: buf_(buf), tensor_(tensor) {}
template <typename T> void operator()() {
*buf_ = tensor_->mutable_data<T>();
}
void **buf_;
Tensor *tensor_;
};
void TensorFromStream(std::istream &is, framework::Tensor *tensor) {
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
proto::VarType::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
// PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
// "Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *buf;
framework::VisitDataType(desc.data_type(),
DeserializedDataFunctor(&buf, tensor));
is.read(static_cast<char *>(buf), tensor->memory_size());
}
}
} // namespace framework
namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopy " << src.dims() << " from " <<
// src.place() << " to
// "
// << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size);
}
void TensorCopySync(const Tensor &src, Tensor *dst) {
// VLOG(3) << "TensorCopySync " << src.dims() << " from " <<
// src.place()
// << " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(src.type());
auto size = src.numel() * SizeOfType(src.type());
memory::Copy(dst_ptr, src_ptr, size);
}
template <typename Predicate> struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor &tensor_;
Tensor *out_;
AnyDTypeVisitor(Predicate predicate, const Tensor &tensor,
Tensor *out)
: predicate_(predicate), tensor_(tensor), out_(out) {}
template <typename T> void operator()() const {
// auto t = EigenVector<T>::Flatten(tensor_);
// auto o = EigenScalar<bool>::From(*out_);
// return any of predicate_(t) is true.
// o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate>
inline void AnyImpl(Predicate predicate, const Tensor &tensor,
framework::Tensor *out) {
VisitDataType(ToDataType(tensor.type()),
AnyDTypeVisitor<Predicate>(predicate, tensor, out));
}
template <typename Predicate> struct AnyVisitor {
const framework::Tensor &tensor_;
Predicate predicate_;
AnyVisitor(const framework::Tensor &tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
bool operator()(void) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>();
AnyImpl(predicate_, tensor_, &out);
return this->GetResult(out);
}
bool GetResult(const framework::Tensor &out) const {
return *out.data<bool>();
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor &tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
// return platform::VisitPlace(visitor);
return visitor();
}
struct ContainsNANPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isnan();
}
};
bool TensorContainsNAN(const framework::Tensor &tensor) {
ContainsNANPredicate predicate;
return Any(tensor, predicate);
}
struct ContainsInfPredicate {
template <typename T>
auto operator()(const T &eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isinf();
}
};
bool TensorContainsInf(const framework::Tensor &tensor) {
ContainsInfPredicate predicate;
return Any(tensor, predicate);
}
void TensorToStream(std::ostream &os, const Tensor &tensor) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version),
sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto::VarType::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto *pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.memory_size();
auto *data_ptr = tensor.data<void>();
// PADDLE_ENFORCE(size <
// std::numeric_limits<std::streamsize>::max(),
// "Index overflow when writing tensor");
os.write(static_cast<const char *>(data_ptr),
static_cast<std::streamsize>(size));
}
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void **buf, Tensor *tensor)
: buf_(buf), tensor_(tensor) {}
template <typename T> void operator()() {
*buf_ = tensor_->mutable_data<T>();
}
void **buf_;
Tensor *tensor_;
};
void TensorFromStream(std::istream &is, framework::Tensor *tensor) {
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
proto::VarType::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
// PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
// "Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(),
std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *buf;
framework::VisitDataType(desc.data_type(),
DeserializedDataFunctor(&buf, tensor));
is.read(static_cast<char *>(buf), tensor->memory_size());
}
}
} // namespace framework
} // namespace paddle_mobile
......@@ -20,47 +20,47 @@ limitations under the License. */
#include <vector>
namespace paddle_mobile {
namespace framework {
namespace framework {
void TensorCopy(const Tensor &src, Tensor *dst);
void TensorCopySync(const Tensor &src, Tensor *dst);
void TensorCopy(const Tensor &src, Tensor *dst);
void TensorCopySync(const Tensor &src, Tensor *dst);
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst);
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst);
template <typename T>
void TesnorToVector(const Tensor &src, std::vector<T> *dst);
template <typename T>
void TesnorToVector(const Tensor &src, std::vector<T> *dst);
bool TensorContainsNAN(const framework::Tensor &tensor);
bool TensorContainsInf(const framework::Tensor &tensor);
bool TensorContainsNAN(const framework::Tensor &tensor);
bool TensorContainsInf(const framework::Tensor &tensor);
void TensorToStream(std::ostream &os, const Tensor &tensor);
void TensorFromStream(std::istream &is, Tensor *tensor);
void TensorToStream(std::ostream &os, const Tensor &tensor);
void TensorFromStream(std::istream &is, Tensor *tensor);
//
// The implementation of template functions.
//
//
// The implementation of template functions.
//
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
auto src_ptr = static_cast<const void *>(src.data());
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
auto size = src.size() * sizeof(T);
template <typename T>
void TensorFromVector(const std::vector<T> &src, Tensor *dst) {
auto src_ptr = static_cast<const void *>(src.data());
dst->Resize({static_cast<int64_t>(src.size())});
auto dst_ptr = static_cast<void *>(dst->mutable_data<T>());
auto size = src.size() * sizeof(T);
memory::Copy(dst_ptr, src_ptr, size);
}
memory::Copy(dst_ptr, src_ptr, size);
}
template <typename T>
void TensorToVector(const Tensor &src, std::vector<T> *dst) {
auto src_ptr = static_cast<const void *>(src.data<T>());
auto size = src.numel() * sizeof(T);
template <typename T>
void TensorToVector(const Tensor &src, std::vector<T> *dst) {
auto src_ptr = static_cast<const void *>(src.data<T>());
auto size = src.numel() * sizeof(T);
dst->resize(src.numel());
auto dst_ptr = static_cast<void *>(dst->data());
dst->resize(src.numel());
auto dst_ptr = static_cast<void *>(dst->data());
memory::Copy(dst_ptr, src_ptr, size);
}
memory::Copy(dst_ptr, src_ptr, size);
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -20,9 +20,9 @@ SOFTWARE.
namespace paddle_mobile {
namespace framework {
namespace framework {
VarDesc::VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
VarDesc::VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -22,67 +22,68 @@ SOFTWARE.
#include "paddle_mobile_object.h"
namespace paddle_mobile {
namespace framework {
namespace framework {
class VarDesc {
public:
VarDesc(const proto::VarDesc &desc);
class VarDesc {
public:
VarDesc(const proto::VarDesc &desc);
std::string Name() const { return desc_.name(); }
std::string Name() const { return desc_.name(); }
proto::VarType::Type GetType() const { return desc_.type().type(); }
proto::VarType::Type GetType() const { return desc_.type().type(); }
bool Persistable() const { return desc_.persistable(); }
bool Persistable() const { return desc_.persistable(); }
const proto::VarType::ChannelDesc &channel_desc() const {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return desc_.type().channel();
default:
break;
}
}
const proto::VarType::ChannelDesc &channel_desc() const {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return desc_.type().channel();
default:
break;
}
}
const proto::VarType::TensorDesc &tensor_desc() const {
switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS:
return desc_.type().selected_rows();
case proto::VarType::LOD_TENSOR:
return desc_.type().lod_tensor().tensor();
case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().tensor();
default:
break;
}
}
const proto::VarType::TensorDesc &tensor_desc() const {
switch (desc_.type().type()) {
case proto::VarType::SELECTED_ROWS:
return desc_.type().selected_rows();
case proto::VarType::LOD_TENSOR:
return desc_.type().lod_tensor().tensor();
case proto::VarType::LOD_TENSOR_ARRAY:
return desc_.type().tensor_array().tensor();
default:
break;
}
}
proto::VarType::Type GetDataType() const {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return channel_desc().data_type();
break;
default:
return tensor_desc().data_type();
}
}
proto::VarType::Type GetDataType() const {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return channel_desc().data_type();
break;
default:
return tensor_desc().data_type();
}
}
template <typename T>
std::vector<T> RepeatedToVector(
const google::protobuf::RepeatedField<T> &repeated_field) const {
std::vector<T> ret;
ret.reserve(repeated_field.size());
std::copy(repeated_field.begin(), repeated_field.end(),
std::back_inserter(ret));
return ret;
}
template <typename T>
std::vector<T> RepeatedToVector(
const google::protobuf::RepeatedField<T> &repeated_field)
const {
std::vector<T> ret;
ret.reserve(repeated_field.size());
std::copy(repeated_field.begin(), repeated_field.end(),
std::back_inserter(ret));
return ret;
}
std::vector<int64_t> GetShape() const {
return this->RepeatedToVector(tensor_desc().dims());
}
std::vector<int64_t> GetShape() const {
return this->RepeatedToVector(tensor_desc().dims());
}
private:
proto::VarDesc desc_;
};
private:
proto::VarDesc desc_;
};
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -23,16 +23,17 @@ SOFTWARE.
#include "variable.h"
namespace paddle_mobile {
namespace framework {
inline proto::VarType::Type ToVarType(std::type_index type) {
if (type.hash_code() == typeid(LoDTensor).hash_code()) {
return proto::VarType_Type_LOD_TENSOR;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
return proto::VarType_Type_SELECTED_ROWS;
} else {
// PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
}
}
namespace framework {
inline proto::VarType::Type ToVarType(std::type_index type) {
if (type.hash_code() == typeid(LoDTensor).hash_code()) {
return proto::VarType_Type_LOD_TENSOR;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
return proto::VarType_Type_SELECTED_ROWS;
} else {
// PADDLE_THROW("ToVarType:Unsupported type %s",
// type.name());
}
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -26,72 +26,71 @@ SOFTWARE.
#include <typeinfo>
namespace paddle_mobile {
namespace framework {
class Variable : public PaddleMobileObject {
public:
Variable() {}
~Variable() {}
template <typename T> const T *Get() const {
return static_cast<const T *>(holder_->Ptr());
}
bool IsInitialized() const { return holder_ != nullptr; }
const std::string *Name() { return name_; }
template <typename T> T *GetMutable() {
if (!IsType<T>()) {
if (*Name() == "pixel") {
// std::cout << " reset " << *Name() << std::endl;
}
holder_.reset(new PlaceholderImp<T>(new T()));
}
return static_cast<T *>(holder_->Ptr());
}
template <typename T> bool IsType() const {
if (holder_) {
// printf("not null \n");
printf(" holder type : %s, this type %s \n", holder_->Type().name(),
typeid(T).name());
}
// std::cout << " " << holder_->Type() << " " << typeid(T) <<
// std::endl;
return holder_ != nullptr && holder_->Type() == typeid(T);
}
void Clear() { holder_.reset(); }
std::type_index Type() const { return holder_->Type(); }
void SetName(const std::string *name) { name_ = name; }
private:
struct Placeholder {
Placeholder() = default;
virtual ~Placeholder() = default;
virtual const std::type_info &Type() const = 0;
virtual void *Ptr() const = 0;
};
template <typename T> struct PlaceholderImp : public Placeholder {
explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}
virtual const std::type_info &Type() const { return type_; }
virtual void *Ptr() const override {
return static_cast<void *>(ptr_.get());
}
std::unique_ptr<T> ptr_;
const std::type_info &type_;
};
std::unique_ptr<Placeholder> holder_;
friend class Scope;
const std::string *name_;
};
} // namespace framework
namespace framework {
class Variable : public PaddleMobileObject {
public:
template <typename T> const T *Get() const {
return static_cast<const T *>(holder_->Ptr());
}
bool IsInitialized() const { return holder_ != nullptr; }
const std::string *Name() { return name_; }
template <typename T> T *GetMutable() {
if (!IsType<T>()) {
if (*Name() == "pixel") {
// std::cout << " reset " << *Name() <<
// std::endl;
}
holder_.reset(new PlaceholderImp<T>(new T()));
}
return static_cast<T *>(holder_->Ptr());
}
template <typename T> bool IsType() const {
if (holder_) {
// printf("not null \n");
printf(" holder type : %s, this type %s \n",
holder_->Type().name(), typeid(T).name());
}
// std::cout << " " << holder_->Type() << " " <<
// typeid(T) <<
// std::endl;
return holder_ != nullptr && holder_->Type() == typeid(T);
}
void Clear() { holder_.reset(); }
std::type_index Type() const { return holder_->Type(); }
void SetName(const std::string *name) { name_ = name; }
private:
struct Placeholder {
Placeholder() = default;
virtual ~Placeholder() = default;
virtual const std::type_info &Type() const = 0;
virtual void *Ptr() const = 0;
};
template <typename T> struct PlaceholderImp : public Placeholder {
explicit PlaceholderImp(T *ptr) : ptr_(ptr), type_(typeid(T)) {}
virtual const std::type_info &Type() const { return type_; }
virtual void *Ptr() const override {
return static_cast<void *>(ptr_.get());
}
std::unique_ptr<T> ptr_;
const std::type_info &type_;
};
std::unique_ptr<Placeholder> holder_;
friend class Scope;
const std::string *name_;
};
} // namespace framework
} // namespace paddle_mobile
......@@ -29,359 +29,405 @@ SOFTWARE.
namespace paddle_mobile {
void ReadBinaryFile(const std::string &filename, std::string *contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
template <typename Dtype, Precision P>
void Loader<Dtype, P>::LoadVar(framework::LoDTensor *tensor,
const std::string &file_path) {
LOG(kLOG_DEBUG) << " to load " << file_path;
// Log(kLOG_DEBUG) << "123";
std::ifstream is(file_path);
std::streampos pos = is.tellg(); // save current position
is.seekg(0, std::ios::end);
LOG(kLOG_DEBUG) << " file length = " << is.tellg();
is.seekg(pos); // restore saved position
// 1. version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
LOG(kLOG_INFO) << " version: " << version;
// 2 Lod information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
LOG(kLOG_DEBUG) << " load level: " << lod_level;
LOG(kLOG_DEBUG) << " lod info: ";
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
for (int j = 0; j < tmp.size(); ++j) {
LOG(kLOG_DEBUG1) << " lod - " << tmp[j];
void ReadBinaryFile(const std::string &filename, std::string *contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
lod[i] = tmp;
}
// 3. tensor version
uint32_t tensor_version;
is.read(reinterpret_cast<char *>(&tensor_version), sizeof(tensor_version));
// std::cout << " tensor_version: " << tensor_version << std::endl;
// 4. tensor desc
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
// std::cout << " tensor desc size: " << size << std::endl;
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
framework::proto::VarType::TensorDesc desc;
desc.ParseFromArray(buf.get(), size);
// std::cout << " desc dims size " << desc.dims().size() << std::endl;
int memory_size = 1;
for (int l = 0; l < desc.dims().size(); ++l) {
// std::cout << " dim " << l << " value: " << desc.dims()[l] <<
// std::endl;
memory_size *= desc.dims()[l];
}
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *memory;
int type_size = 0;
// std::cout << " desc pre type: ";
switch (desc.data_type()) {
case framework::proto::VarType::FP16:
// std::cout << "FP16" << std::endl;
type_size = 2;
break;
case framework::proto::VarType::FP32:
type_size = 4;
memory = tensor->mutable_data<float>();
// std::cout << "FP32" << std::endl;
break;
case framework::proto::VarType::FP64:
type_size = 8;
// std::cout << "FP64" << std::endl;
break;
case framework::proto::VarType::INT32:
type_size = 4;
// std::cout << "INT32" << std::endl;
break;
case framework::proto::VarType::INT64:
type_size = 8;
// std::cout << "INT64" << std::endl;
break;
case framework::proto::VarType::BOOL:
type_size = 1;
// std::cout << "BOOL" << std::endl;
break;
default:
break;
// std::cout << " not support" << std::endl;
}
// std::cout << " malloc size: " << memory_size * type_size << std::endl;
is.read(static_cast<char *>(memory), memory_size * type_size);
// std::cout << " memory: " << memory << std::endl;
is.close();
};
template <typename Dtype, Precision P>
const framework::Program<Dtype, P>
Loader<Dtype, P>::Load(const std::string &dirname) {
std::string model_filename = dirname + "/__model__";
std::string program_desc_str;
ReadBinaryFile(model_filename, &program_desc_str);
framework::proto::ProgramDesc program_desc_proto;
program_desc_proto.ParseFromString(program_desc_str);
std::shared_ptr<framework::ProgramDesc> originProgramDesc =
std::make_shared<framework::ProgramDesc>(program_desc_proto);
framework::Program<Dtype, P> program;
program.originProgram = originProgramDesc;
std::shared_ptr<framework::Scope> scope =
std::make_shared<framework::Scope>();
program.scope = scope;
auto block = originProgramDesc->Block(0);
for (auto block : originProgramDesc->Blocks()) {
// std::cout << "for block" << std::endl;
for (int i = 0; i < block->Vars().size(); ++i) {
std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
auto var = scope->Var(var_desc->Name());
if (var_desc->GetType() == framework::proto::VarType::LOD_TENSOR) {
if (var_desc->Persistable() &&
var_desc->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
var_desc->GetType() != framework::proto::VarType::FETCH_LIST) {
framework::LoDTensor *tensor =
var->GetMutable<framework::LoDTensor>();
// to load
LoadVar(tensor, dirname + "/" + var_desc->Name());
}
} else {
// std::cout << "非 lod" << std::endl;
}
}
}
#ifdef PADDLE_MOBILE_DEBUG
for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
// std::cout << "block: " << block.idx() << std::endl;
for (int j = 0; j < block.ops().size(); ++j) {
framework::proto::OpDesc op = block.ops()[j];
// std::cout << " op: " << op.type() << std::endl;
for (int m = 0; m < op.inputs_size(); ++m) {
const framework::proto::OpDesc::Var &var = op.inputs(m);
// std::cout << " input parameter: " << var.parameter() <<
// std::endl;
for (int n = 0; n < var.arguments().size(); ++n) {
// std::cout << " argument - " << var.arguments()[n] <<
// std::endl;
}
}
for (int y = 0; y < op.outputs_size(); ++y) {
const framework::proto::OpDesc::Var &var = op.outputs(y);
// std::cout << " output parameter: " << var.parameter() <<
// std::endl;
for (int z = 0; z < var.arguments().size(); ++z) {
// std::cout << " argument - " << var.arguments()[z] <<
// std::endl;
}
}
for (int x = 0; x < op.attrs().size(); ++x) {
const framework::proto::OpDesc_Attr attr = op.attrs()[x];
// std::cout << " attr name: " << attr.name() << std::endl;
// std::cout << " attr type: " << attr.type() << std::endl;
switch (attr.type()) {
case framework::proto::AttrType::BOOLEAN:
// std::cout << " boolen: " << attr.b() << std::endl;
break;
case framework::proto::AttrType::INT:
// std::cout << " int: " << attr.i() << std::endl;
break;
case framework::proto::AttrType::FLOAT:
// std::cout << " float: " << attr.f() << std::endl;
case framework::proto::AttrType::STRING:
// std::cout << " string: " << attr.s() << std::endl;
case framework::proto::AttrType::BOOLEANS:
// std::vector<bool>
// bools(attr.bools_size());
for (int y = 0; y < attr.bools_size(); ++y) {
// std::cout << " bool - " << attr.bools(y) <<
// std::endl;
}
case framework::proto::AttrType::LONG:
// std::cout << " long: " << attr.l() << std::endl;
case framework::proto::AttrType::FLOATS:
for (int y = 0; y < attr.floats_size(); ++y) {
// std::cout << " float - " << y << ": " <<
// attr.floats(y)
// << std::endl;
}
case framework::proto::AttrType::INTS:
for (int y = 0; y < attr.ints_size(); ++y) {
// std::cout << " int - " << y << ": " <<
// attr.ints(y)
// << std::endl;
}
case framework::proto::AttrType::STRINGS:
for (int y = 0; y < attr.strings_size(); ++y) {
// std::cout << " string - " << y << ": " <<
// attr.strings(y)
// << std::endl;
}
}
}
}
template <typename Dtype, Precision P>
void Loader<Dtype, P>::LoadVar(framework::LoDTensor *tensor,
const std::string &file_path) {
for (int k = 0; k < block.vars().size(); ++k) {
framework::proto::VarDesc var = block.vars()[k];
if (var.type().type() == framework::proto::VarType::LOD_TENSOR) {
// std::cout << " var name: " << var.name() << std::endl;
const framework::proto::VarType::TensorDesc &tensor_desc =
var.type().lod_tensor().tensor();
// std::cout << " in var tensor desc dims size "
// << tensor_desc.dims().size() << std::endl;
int memory_size = 1;
for (int l = 0; l < tensor_desc.dims().size(); ++l) {
// std::cout << " var tensor desc dim " << l
// << " value: " << tensor_desc.dims()[l] <<
// std::endl;
}
}
LOG(kLOG_DEBUG) << " to load " << file_path;
// Log(kLOG_DEBUG) << "123";
if (var.persistable() &&
var.type().type() != framework::proto::VarType::FEED_MINIBATCH &&
var.type().type() != framework::proto::VarType::FETCH_LIST) {
// std::cout << " to load " << var.name() << std::endl;
std::string file_path = dirname + "/" + var.name();
std::ifstream is(file_path);
std::streampos pos = is.tellg(); // save current position
is.seekg(0, std::ios::end);
// std::cout << " file length = " << is.tellg() << std::endl;
LOG(kLOG_DEBUG) << " file length = " << is.tellg();
is.seekg(pos); // restore saved position
// 1. version
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// std::cout << " version: " << version << std::endl;
LOG(kLOG_INFO) << " version: " << version;
// 2 Lod information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
// std::cout << " load level: " << lod_level << std::endl;
// std::cout << " lod info: " << std::endl;
LOG(kLOG_DEBUG) << " load level: " << lod_level;
LOG(kLOG_DEBUG) << " lod info: ";
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
for (int j = 0; j < tmp.size(); ++j) {
// std::cout << " lod - " << tmp[j] << std::endl;
}
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
for (int j = 0; j < tmp.size(); ++j) {
LOG(kLOG_DEBUG1) << " lod - " << tmp[j];
}
lod[i] = tmp;
}
// 3. tensor version
uint32_t tensor_version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
// std::cout << " tensor_version: " << tensor_version <<
// std::endl;
is.read(reinterpret_cast<char *>(&tensor_version),
sizeof(tensor_version));
// std::cout << " tensor_version: " << tensor_version << std::endl;
// 4. tensor desc
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
// std::cout << " tensor desc size: " << size << std::endl;
// std::cout << " tensor desc size: " << size << std::endl;
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
framework::proto::VarType::TensorDesc desc;
desc.ParseFromArray(buf.get(), size);
// std::cout << " desc dims size " << desc.dims().size() <<
// std::endl;
// std::cout << " desc dims size " << desc.dims().size() <<
// std::endl;
int memory_size = 1;
for (int l = 0; l < desc.dims().size(); ++l) {
// std::cout << " dim " << l << " value: " <<
// desc.dims()[l]
// << std::endl;
memory_size *= desc.dims()[l];
// std::cout << " dim " << l << " value: " << desc.dims()[l]
// <<
// std::endl;
memory_size *= desc.dims()[l];
}
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(),
std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *memory;
int type_size = 0;
// std::cout << " desc pre type: ";
// std::cout << " desc pre type: ";
switch (desc.data_type()) {
case framework::proto::VarType::FP16:
// std::cout << "FP16" << std::endl;
type_size = 2;
break;
// std::cout << "FP16" << std::endl;
type_size = 2;
break;
case framework::proto::VarType::FP32:
type_size = 4;
// std::cout << "FP32" << std::endl;
break;
type_size = 4;
memory = tensor->mutable_data<float>();
// std::cout << "FP32" << std::endl;
break;
case framework::proto::VarType::FP64:
type_size = 8;
// std::cout << "FP64" << std::endl;
break;
type_size = 8;
// std::cout << "FP64" << std::endl;
break;
case framework::proto::VarType::INT32:
type_size = 4;
// std::cout << "INT32" << std::endl;
break;
type_size = 4;
// std::cout << "INT32" << std::endl;
break;
case framework::proto::VarType::INT64:
type_size = 8;
// std::cout << "INT64" << std::endl;
break;
type_size = 8;
// std::cout << "INT64" << std::endl;
break;
case framework::proto::VarType::BOOL:
type_size = 1;
// std::cout << "BOOL" << std::endl;
break;
type_size = 1;
// std::cout << "BOOL" << std::endl;
break;
default:
break;
// std::cout << " not support" << std::endl;
break;
// std::cout << " not support" << std::endl;
}
// std::cout << " malloc size: " << memory_size * type_size
// << std::endl;
void *memory = malloc(memory_size * type_size);
// std::cout << " malloc size: " << memory_size * type_size <<
// std::endl;
is.read(static_cast<char *>(memory), memory_size * type_size);
// std::cout << " memory: " << memory << std::endl;
// std::cout << " memory: " << memory << std::endl;
is.close();
} else {
// std::cout << " *not load "
// << " var : " << var.name() << std::endl;
}
}
}
};
template <typename Dtype, Precision P>
const framework::Program<Dtype, P>
Loader<Dtype, P>::Load(const std::string &dirname) {
std::string model_filename = dirname + "/__model__";
std::string program_desc_str;
ReadBinaryFile(model_filename, &program_desc_str);
framework::proto::ProgramDesc program_desc_proto;
program_desc_proto.ParseFromString(program_desc_str);
std::shared_ptr<framework::ProgramDesc> originProgramDesc =
std::make_shared<framework::ProgramDesc>(program_desc_proto);
framework::Program<Dtype, P> program;
program.originProgram = originProgramDesc;
std::shared_ptr<framework::Scope> scope =
std::make_shared<framework::Scope>();
program.scope = scope;
auto block = originProgramDesc->Block(0);
for (auto block : originProgramDesc->Blocks()) {
// std::cout << "for block" << std::endl;
for (int i = 0; i < block->Vars().size(); ++i) {
std::shared_ptr<framework::VarDesc> var_desc = block->Vars()[i];
auto var = scope->Var(var_desc->Name());
if (var_desc->GetType() ==
framework::proto::VarType::LOD_TENSOR) {
if (var_desc->Persistable() &&
var_desc->GetType() !=
framework::proto::VarType::FEED_MINIBATCH &&
var_desc->GetType() !=
framework::proto::VarType::FETCH_LIST) {
framework::LoDTensor *tensor =
var->GetMutable<framework::LoDTensor>();
// to load
LoadVar(tensor, dirname + "/" + var_desc->Name());
}
} else {
// std::cout << "非 lod" << std::endl;
}
}
}
#ifdef PADDLE_MOBILE_DEBUG
for (int i = 0; i < program_desc_proto.blocks().size(); ++i) {
framework::proto::BlockDesc block = program_desc_proto.blocks()[i];
// std::cout << "block: " << block.idx() << std::endl;
for (int j = 0; j < block.ops().size(); ++j) {
framework::proto::OpDesc op = block.ops()[j];
// std::cout << " op: " << op.type() << std::endl;
for (int m = 0; m < op.inputs_size(); ++m) {
const framework::proto::OpDesc::Var &var = op.inputs(m);
// std::cout << " input parameter: " <<
// var.parameter() <<
// std::endl;
for (int n = 0; n < var.arguments().size(); ++n) {
// std::cout << " argument - " <<
// var.arguments()[n] <<
// std::endl;
}
}
for (int y = 0; y < op.outputs_size(); ++y) {
const framework::proto::OpDesc::Var &var = op.outputs(y);
// std::cout << " output parameter: " <<
// var.parameter() <<
// std::endl;
for (int z = 0; z < var.arguments().size(); ++z) {
// std::cout << " argument - " <<
// var.arguments()[z] <<
// std::endl;
}
}
for (int x = 0; x < op.attrs().size(); ++x) {
const framework::proto::OpDesc_Attr attr = op.attrs()[x];
// std::cout << " attr name: " << attr.name() <<
// std::endl;
// std::cout << " attr type: " << attr.type() <<
// std::endl;
switch (attr.type()) {
case framework::proto::AttrType::BOOLEAN:
// std::cout << " boolen: " << attr.b() <<
// std::endl;
break;
case framework::proto::AttrType::INT:
// std::cout << " int: " << attr.i() <<
// std::endl;
break;
case framework::proto::AttrType::FLOAT:
// std::cout << " float: " << attr.f() <<
// std::endl;
case framework::proto::AttrType::STRING:
// std::cout << " string: " << attr.s() <<
// std::endl;
case framework::proto::AttrType::BOOLEANS:
// std::vector<bool>
// bools(attr.bools_size());
for (int y = 0; y < attr.bools_size(); ++y) {
// std::cout << " bool - " <<
// attr.bools(y) <<
// std::endl;
}
case framework::proto::AttrType::LONG:
// std::cout << " long: " << attr.l() <<
// std::endl;
case framework::proto::AttrType::FLOATS:
for (int y = 0; y < attr.floats_size(); ++y) {
// std::cout << " float - " << y <<
// ": " <<
// attr.floats(y)
// << std::endl;
}
case framework::proto::AttrType::INTS:
for (int y = 0; y < attr.ints_size(); ++y) {
// std::cout << " int - " << y << ":
// " <<
// attr.ints(y)
// << std::endl;
}
case framework::proto::AttrType::STRINGS:
for (int y = 0; y < attr.strings_size(); ++y) {
// std::cout << " string - " << y <<
// ": " <<
// attr.strings(y)
// << std::endl;
}
}
}
}
for (int k = 0; k < block.vars().size(); ++k) {
framework::proto::VarDesc var = block.vars()[k];
if (var.type().type() ==
framework::proto::VarType::LOD_TENSOR) {
// std::cout << " var name: " << var.name() <<
// std::endl;
const framework::proto::VarType::TensorDesc &tensor_desc =
var.type().lod_tensor().tensor();
// std::cout << " in var tensor desc dims size "
// << tensor_desc.dims().size() <<
// std::endl;
int memory_size = 1;
for (int l = 0; l < tensor_desc.dims().size(); ++l) {
// std::cout << " var tensor desc dim " << l
// << " value: " <<
// tensor_desc.dims()[l] <<
// std::endl;
}
}
if (var.persistable() &&
var.type().type() !=
framework::proto::VarType::FEED_MINIBATCH &&
var.type().type() !=
framework::proto::VarType::FETCH_LIST) {
// std::cout << " to load " << var.name() <<
// std::endl;
std::string file_path = dirname + "/" + var.name();
std::ifstream is(file_path);
std::streampos pos =
is.tellg(); // save current position
is.seekg(0, std::ios::end);
// std::cout << " file length = " << is.tellg() <<
// std::endl;
is.seekg(pos); // restore saved position
// 1. version
uint32_t version;
is.read(reinterpret_cast<char *>(&version),
sizeof(version));
// std::cout << " version: " << version <<
// std::endl;
// 2 Lod information
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level),
sizeof(lod_level));
// std::cout << " load level: " << lod_level <<
// std::endl;
// std::cout << " lod info: " << std::endl;
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
for (int j = 0; j < tmp.size(); ++j) {
// std::cout << " lod - " << tmp[j] <<
// std::endl;
}
}
uint32_t tensor_version;
is.read(reinterpret_cast<char *>(&version),
sizeof(version));
// std::cout << " tensor_version: " <<
// tensor_version <<
// std::endl;
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
// std::cout << " tensor desc size: " << size <<
// std::endl;
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
framework::proto::VarType::TensorDesc desc;
desc.ParseFromArray(buf.get(), size);
// std::cout << " desc dims size " <<
// desc.dims().size() <<
// std::endl;
int memory_size = 1;
for (int l = 0; l < desc.dims().size(); ++l) {
// std::cout << " dim " << l << " value: "
// <<
// desc.dims()[l]
// << std::endl;
memory_size *= desc.dims()[l];
}
int type_size = 0;
// std::cout << " desc pre type: ";
switch (desc.data_type()) {
case framework::proto::VarType::FP16:
// std::cout << "FP16" << std::endl;
type_size = 2;
break;
case framework::proto::VarType::FP32:
type_size = 4;
// std::cout << "FP32" << std::endl;
break;
case framework::proto::VarType::FP64:
type_size = 8;
// std::cout << "FP64" << std::endl;
break;
case framework::proto::VarType::INT32:
type_size = 4;
// std::cout << "INT32" << std::endl;
break;
case framework::proto::VarType::INT64:
type_size = 8;
// std::cout << "INT64" << std::endl;
break;
case framework::proto::VarType::BOOL:
type_size = 1;
// std::cout << "BOOL" << std::endl;
break;
default:
break;
// std::cout << " not support" <<
// std::endl;
}
// std::cout << " malloc size: " << memory_size *
// type_size
// << std::endl;
void *memory = malloc(memory_size * type_size);
is.read(static_cast<char *>(memory),
memory_size * type_size);
// std::cout << " memory: " << memory <<
// std::endl;
is.close();
} else {
// std::cout << " *not load "
// << " var : " << var.name() << std::endl;
}
}
}
#endif
return program;
}
return program;
}
template class Loader<CPU, Precision::FP32>;
template class Loader<CPU, Precision::FP32>;
} // namespace paddle_mobile
......@@ -27,13 +27,14 @@ SOFTWARE.
namespace paddle_mobile {
template <typename Dtype, Precision P = Precision::FP32>
class Loader : PaddleMobileObject {
public:
const framework::Program<Dtype, P> Load(const std::string &dirname);
template <typename Dtype, Precision P = Precision::FP32>
class Loader : PaddleMobileObject {
public:
const framework::Program<Dtype, P> Load(const std::string &dirname);
private:
void LoadVar(framework::LoDTensor *tensor, const std::string &file_path);
};
private:
void LoadVar(framework::LoDTensor *tensor,
const std::string &file_path);
};
} // namespace paddle_mobile
......@@ -22,30 +22,30 @@ SOFTWARE.
#include <cstring>
namespace paddle_mobile {
namespace memory {
const int MALLOC_ALIGN = 16;
namespace memory {
const int MALLOC_ALIGN = 16;
void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num);
};
void Copy(void *dst, const void *src, size_t num) {
std::memcpy(dst, src, num);
};
void *Alloc(size_t size) {
size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
char *p = static_cast<char *>(malloc(offset + size));
if (!p) {
return nullptr;
}
void *r = reinterpret_cast<void *>(reinterpret_cast<size_t>(p + offset) &
(~(MALLOC_ALIGN - 1)));
static_cast<void **>(r)[-1] = p;
return r;
}
void *Alloc(size_t size) {
size_t offset = sizeof(void *) + MALLOC_ALIGN - 1;
char *p = static_cast<char *>(malloc(offset + size));
if (!p) {
return nullptr;
}
void *r = reinterpret_cast<void *>(
reinterpret_cast<size_t>(p + offset) & (~(MALLOC_ALIGN - 1)));
static_cast<void **>(r)[-1] = p;
return r;
}
void Free(void *ptr) {
if (ptr) {
free(static_cast<void **>(ptr)[-1]);
}
}
void Free(void *ptr) {
if (ptr) {
free(static_cast<void **>(ptr)[-1]);
}
}
} // namespace memory
} // namespace memory
} // namespace paddle_mobile
......@@ -21,44 +21,44 @@ SOFTWARE.
#include <type_traits>
namespace paddle_mobile {
namespace memory {
namespace memory {
void Copy(void *dst, const void *src, size_t num);
void Copy(void *dst, const void *src, size_t num);
void *Alloc(size_t size);
void *Alloc(size_t size);
void Free(void *ptr);
void Free(void *ptr);
/**
* \brief Free memory block in one place.
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
* static_cast
*/
template <typename T> class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD");
/**
* \brief Free memory block in one place.
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
* static_cast
*/
template <typename T> class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD");
public:
explicit PODDeleter(){};
public:
explicit PODDeleter(){};
void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
};
void operator()(T *ptr) { Free(static_cast<void *>(ptr)); }
};
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
* reinterpret_cast
*/
template <typename T> class PlainDeleter {
public:
explicit PlainDeleter(){};
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
* reinterpret_cast
*/
template <typename T> class PlainDeleter {
public:
explicit PlainDeleter(){};
void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
};
} // namespace memory
void operator()(T *ptr) { Free(reinterpret_cast<void *>(ptr)); }
};
} // namespace memory
} // namespace paddle_mobile
......@@ -22,55 +22,55 @@ SOFTWARE.
#include "framework/operator.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
int ConvOutputSize(int input_size, int filter_size, int dilation, int padding,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
return output_size;
}
int ConvOutputSize(int input_size, int filter_size, int dilation,
int padding, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
return output_size;
}
template <typename Dtype, typename T>
void ConvOp<Dtype, T>::InferShape() const {
// std::cout << " begin get dims: " << std::endl;
template <typename Dtype, typename T>
void ConvOp<Dtype, T>::InferShape() const {
// std::cout << " begin get dims: " << std::endl;
auto in_dims = param_.Input()->dims();
auto in_dims = param_.Input()->dims();
// std::cout << " end get in dims: " << std::endl;
// std::cout << " end get in dims: " << std::endl;
// std::cout << " in_dims: " << in_dims << std::endl;
// std::cout << " in_dims: " << in_dims << std::endl;
// std::cout << " begin get Filter " << std::endl;
// std::cout << " begin get Filter " << std::endl;
auto filter_dims = param_.Filter()->dims();
auto filter_dims = param_.Filter()->dims();
// std::cout << " end get Filter " << std::endl;
// std::cout << " end get Filter " << std::endl;
// std::cout << " begin get Attrs " << std::endl;
// std::cout << " begin get Attrs " << std::endl;
const std::vector<int> &strides = param_.Strides();
const std::vector<int> &strides = param_.Strides();
// std::cout << " end get Attrs " << strides[0] << std::endl;
// std::cout << " end get Attrs " << strides[0] << std::endl;
std::vector<int> paddings = param_.Paddings();
std::vector<int> paddings = param_.Paddings();
int groups = param_.Groups();
int groups = param_.Groups();
std::vector<int> dilations = param_.Dilations();
std::vector<int> dilations = param_.Dilations();
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
}
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
param_.Output()->Resize(ddim);
}
framework::DDim ddim = framework::make_ddim(output_shape);
param_.Output()->Resize(ddim);
}
template class ConvOp<CPU, float>;
template class ConvOp<CPU, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
......@@ -22,32 +22,33 @@ SOFTWARE.
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class ConvOp : public framework::OperatorWithKernel<DeviceType> {
public:
ConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType>(type, inputs, outputs, attrs,
scope),
param_(inputs, outputs, attrs, *scope) {}
using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
void InferShape() const override;
protected:
void RunImpl() const {
operators::ConvKernel<DeviceType, T, ConvParam> kernel;
kernel.Compute(param_);
this->ClearVariables();
}
ConvParam param_;
};
} // operators
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class ConvOp : public framework::OperatorWithKernel<DeviceType> {
public:
ConvOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType>(
type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {}
using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
void InferShape() const override;
void Run() const {
operators::ConvKernel<DeviceType, T, ConvParam> kernel;
kernel.Compute(param_);
this->ClearVariables();
}
private:
ConvParam param_;
};
} // operators
} // paddle_mobile
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#include "elementwise_add_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = param_.InputX()->dims();
param_.Out()->Resize(x_dim);
}
template class ElementwiseAddOp<CPU, float>;
}
}
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#include "framework/operator.h"
#include "kernel/elementwise_add_kernel.h"
#include "op_param.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class ElementwiseAddOp
: public framework::OperatorWithKernel<DeviceType> {
public:
ElementwiseAddOp(const std::string &type,
const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType>(
type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {}
void Run() const {
operators::ElementwiseAddKernel<DeviceType, T,
ElementwiseAddParam>
kernel;
kernel.Compute(param_);
}
using framework::OperatorWithKernel<DeviceType>::OperatorWithKernel;
void InferShape() const override;
protected:
ElementwiseAddParam param_;
};
}
}
......@@ -19,135 +19,146 @@ SOFTWARE.
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
bool IsExpand(const std::vector<int64_t> &filter_dim,
const std::vector<int> &strides, const std::vector<int> &paddings,
const std::vector<int> &dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 = filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
template <>
void ConvKernel<CPU, float, ConvParam>::Compute(const ConvParam &param) const {
const Tensor *input = param.Input();
LOG(kLOG_DEBUG) << param;
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *param.Filter();
Tensor *output = param.Output();
// output->mutable_data<T>(context.GetPlace());
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
DLOG << " compute end get Attrs " << strides[0];
const int batch_size = static_cast<int>(input->dims()[0]);
// filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w}
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
// output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w}
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
// use col_shape in the im2col calculation
// col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d,
// o_h, o_w}
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation
// size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d *
// o_h * o_w)
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
DLOG << " input dim " << input->dims();
DLOG << " output dim " << output->dims();
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
// auto& dev_ctx = context.template
// device_context<DeviceContext>();
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false, float(1.0),
&out_slice, float(0.0));
}
}
}
template class ConvKernel<CPU, float, ConvParam>;
} // namespace operators
namespace operators {
bool IsExpand(const std::vector<int64_t> &filter_dim,
const std::vector<int> &strides,
const std::vector<int> &paddings,
const std::vector<int> &dilations) {
bool filter_1 = true, strides_1 = true, padding_0 = true,
dilation_1 = true;
for (size_t j = 0; j < strides.size(); ++j) {
filter_1 =
filter_1 && (static_cast<int>(filter_dim[j + 2]) == 1);
strides_1 = strides_1 && (strides[j] == 1);
padding_0 = padding_0 && (paddings[j] == 0);
dilation_1 = dilation_1 && (dilations[j] == 1);
}
return !(filter_1 && strides_1 && padding_0 && dilation_1);
}
template <>
void ConvKernel<CPU, float, ConvParam>::Compute(
const ConvParam &param) const {
LOG(kLOG_DEBUG) << param;
const Tensor *input = param.Input();
// The filter will be reshaped in the calculations,
// so here use an assignment operation,
// that avoids modifying the variable in the Scope.
Tensor filter = *param.Filter();
Tensor *output = param.Output();
// output->mutable_data<T>(context.GetPlace());
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
DLOG << " compute end get Attrs " << strides[0];
const int batch_size = static_cast<int>(input->dims()[0]);
// filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h,
// k_w}
std::vector<int64_t> filter_shape_vec(
framework::vectorize(filter.dims()));
// output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h,
// o_w}
std::vector<int64_t> output_shape_vec(
framework::vectorize(output->dims()));
// use col_shape in the im2col calculation
// col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h,
// k_w, o_d,
// o_h, o_w}
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation
// size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w,
// o_d *
// o_h * o_w)
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {
filter.dims()[0], filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
// auto& dev_ctx = context.template
// device_context<DeviceContext>();
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch =
output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice =
in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1],
paddings[0], paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice =
out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice =
filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false,
float(1.0), &out_slice, float(0.0));
}
}
}
template class ConvKernel<CPU, float, ConvParam>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename T> struct AddFunctor {
inline T operator()(T a, T b) const { return a + b; }
};
template <>
void ElementwiseAddKernel<CPU, float, ElementwiseAddParam>::Compute(
const ElementwiseAddParam &param) const {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *Out = param.Out();
Out->mutable_data<float>();
const int axis = param.Axis();
ElementwiseComputeEx<AddFunctor<float>, float>(
input_x, input_y, axis, AddFunctor<float>(), Out);
}
template class ElementwiseAddKernel<CPU, float, ElementwiseAddParam>;
} // namespace operators
} // namespace paddle
......@@ -25,14 +25,15 @@ SOFTWARE.
#pragma once;
namespace paddle_mobile {
namespace operators {
namespace operators {
using namespace framework;
using namespace framework;
template <typename DeviceType, typename T, typename P>
class ConvKernel : public framework::OpKernelBase<DeviceType, ConvParam> {
public:
void Compute(const ConvParam &param) const;
};
}
template <typename DeviceType, typename T, typename P>
class ConvKernel
: public framework::OpKernelBase<DeviceType, ConvParam> {
public:
void Compute(const ConvParam &param) const;
};
}
}
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#pragma once;
#include "framework/operator.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T, typename P>
class ElementwiseAddKernel
: public framework::OpKernelBase<DeviceType, ElementwiseAddParam> {
public:
void Compute(const ElementwiseAddParam &param) const;
};
}
}
......@@ -19,11 +19,12 @@ SOFTWARE.
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
// template<>
// void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const {}
//
// template class ConvKernel<FPGA, float>;
}
// template<>
// void ConvKernel<FPGA, float>::Compute(const ConvParam &param) const
// {}
//
// template class ConvKernel<FPGA, float>;
}
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "transform.h"
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
namespace paddle_mobile {
namespace operators {
/*
* Out = X ⊙ Y
* If Y's shape does not match X' shape, they will be reshaped.
* For example:
* 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
* pre=2, n=3*4, post=5
* x.shape(2, 12, 5) * y.shape(1, 12, 1).broadcast(2, 12, 5)
* 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
* pre=2*3, n=4*5, post=1
* x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
*/
inline void get_mid_dims(const framework::DDim &x_dims,
const framework::DDim &y_dims, const int axis,
int *pre, int *n, int *post) {
*pre = 1;
*n = 1;
*post = 1;
// compute pre
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
assert(x_dims[i + axis] == y_dims[i]);
/// "Broadcast dimension mismatch.");
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
/// remove dims tail 1. (4,20,1,1) -> (4,20)
inline void trim_trailing_singular_dims(framework::DDim *dims) {
// Remove trailing dimensions of size 1 for y
auto actual_dims_size = dims->size();
for (; actual_dims_size != 0; --actual_dims_size) {
if ((*dims)[actual_dims_size - 1] != 1)
break;
}
if (actual_dims_size != dims->size()) {
auto actual_dims = framework::vectorize(*dims);
actual_dims.resize(actual_dims_size);
*dims = framework::make_ddim(actual_dims);
}
}
template <typename T> class RowwiseTransformIterator {
public:
RowwiseTransformIterator(const T *ptr, int n)
: ptr_(ptr), i_(0), n_(n) {}
RowwiseTransformIterator<T> &operator++() {
++i_;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
return *this;
}
bool operator==(const RowwiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const RowwiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int i_;
int64_t n_;
};
/// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
/// dimension
/// in (4,20,2) is 2 ,
/// (20,1) move 1 stride , to fill(add) 2 element with the same number.
template <typename T> class MidWiseTransformIterator {
public:
MidWiseTransformIterator(const T *ptr, int n, int post)
: ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
MidWiseTransformIterator<T> &operator++() {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
return *this;
}
bool operator==(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int64_t i_;
int64_t j_;
int64_t n_;
int64_t post_;
};
template <typename Functor, typename T, typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x,
const framework::Tensor *y, framework::Tensor *z,
Functor func)
: x_(x->data<T>()), y_(y->data<T>()),
z_(z->mutable_data<OutType>()), nx_(x->numel()), func_(func) {
}
inline void Run() const {
math::Transform trans;
// 同时执行func(x_, y_)传入z_。
trans(x_, x_ + nx_, y_, z_, func_);
}
inline void RunRowWise(int n, int pre) const {
math::Transform trans;
trans(x_, x_ + nx_, RowwiseTransformIterator<T>(y_, n), z_,
func_);
}
inline void RunMidWise(int n, int pre, int post) const {
math::Transform trans;
trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post),
z_, func_);
}
private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
Functor func_;
};
template <typename Functor, typename T, typename OutType = T>
void ElementwiseComputeEx(const framework::Tensor *x,
const framework::Tensor *y, int axis,
Functor func, framework::Tensor *z) {
TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
auto x_dims = x->dims();
auto y_dims = y->dims();
// PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
// "Rank of first input must >= rank of second
// input.");
if (x_dims == y_dims) {
functor.Run();
return;
}
/// axis = -1 represent the last dimension.
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
// PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
// "Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
if (post == 1) {
functor.RunRowWise(n, pre);
return;
} else {
functor.RunMidWise(n, pre, post);
return;
}
}
} // namespace operators
} // namespace paddle
......@@ -16,275 +16,349 @@ limitations under the License. */
#include "common/types.h"
namespace paddle_mobile {
namespace operators {
namespace math {
namespace operators {
namespace math {
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
public:
void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *col) {
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height,
* output_width]
*/
template <class T> class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
public:
void operator()(const framework::Tensor &im,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *col) {
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int col_height = col->dims()[3];
int col_width = col->dims()[4];
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[1];
int filter_width = col->dims()[2];
int col_height = col->dims()[3];
int col_width = col->dims()[4];
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
// ((dilation[0] * (filter_height - 1) + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
// ((dilation[1] * (filter_width - 1) + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
// -
// ((dilation[0] * (filter_height - 1)
// + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
// -
// ((dilation[1] * (filter_width - 1)
// + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
int channels_col = im_channels * filter_height * filter_width;
int channels_col =
im_channels * filter_height * filter_width;
const T *im_data = im.data<T>();
T *col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
int col_idx = (c * col_height + h) * col_width + w;
int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
const T *im_data = im.data<T>();
T *col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] +
h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] +
w_offset * dilation[1];
int col_idx =
(c * col_height + h) * col_width + w;
int im_idx =
(im_row_idx + c_im * im_height) * im_width +
im_col_idx;
col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height ||
im_col_idx < 0 || im_col_idx >= im_width)
? static_cast<T>(0)
: im_data[im_idx];
}
}
}
}
};
col_data[col_idx] =
(im_row_idx < 0 ||
im_row_idx >= im_height ||
im_col_idx < 0 || im_col_idx >= im_width)
? static_cast<T>(0)
: im_data[im_idx];
}
}
}
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height, output_width]
*/
template <class T> class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *im) {
// PADDLE_ENFORCE(im->dims().size() == 3);
// PADDLE_ENFORCE(col.dims().size() == 5);
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[1];
int filter_width = col.dims()[2];
int col_height = col.dims()[3];
int col_width = col.dims()[4];
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height,
* output_width]
*/
template <class T> class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *im) {
// PADDLE_ENFORCE(im->dims().size() == 3);
// PADDLE_ENFORCE(col.dims().size() == 5);
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[1];
int filter_width = col.dims()[2];
int col_height = col.dims()[3];
int col_width = col.dims()[4];
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] -
// ((dilation[0] * (filter_height - 1) + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] -
// ((dilation[1] * (filter_width - 1) + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
// -
// ((dilation[0] * (filter_height - 1)
// + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
// -
// ((dilation[1] * (filter_width - 1)
// + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
int channels_col = im_channels * filter_height * filter_width;
int channels_col =
im_channels * filter_height * filter_width;
T *im_data = im->data<T>();
const T *col_data = col.data<T>();
T *im_data = im->data<T>();
const T *col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
(im_col_idx) >= 0 && (im_col_idx) < im_width) {
im_data[(im_row_idx + c_im * im_height) * im_width + im_col_idx] +=
col_data[(c * col_height + h) * col_width + w];
}
}
}
}
}
};
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] +
h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] +
w_offset * dilation[1];
if ((im_row_idx) >= 0 &&
(im_row_idx) < im_height &&
(im_col_idx) >= 0 &&
(im_col_idx) < im_width) {
im_data[(im_row_idx + c_im * im_height) *
im_width +
im_col_idx] +=
col_data[(c * col_height + h) *
col_width +
w];
}
}
}
}
}
};
template class Im2ColFunctor<ColFormat::kCFO, CPU, float>;
template class Im2ColFunctor<ColFormat::kCFO, CPU, double>;
template class Col2ImFunctor<ColFormat::kCFO, CPU, float>;
template class Col2ImFunctor<ColFormat::kCFO, CPU, double>;
template class Im2ColFunctor<ColFormat::kCFO, CPU, float>;
template class Im2ColFunctor<ColFormat::kCFO, CPU, double>;
template class Col2ImFunctor<ColFormat::kCFO, CPU, float>;
template class Col2ImFunctor<ColFormat::kCFO, CPU, double>;
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
public:
void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *col) {
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[3];
int filter_width = col->dims()[4];
int col_height = col->dims()[0];
int col_width = col->dims()[1];
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height,
* filter_width]
*/
template <class T> class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
public:
void operator()(const framework::Tensor &im,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *col) {
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
int im_channels = im.dims()[0];
int im_height = im.dims()[1];
int im_width = im.dims()[2];
int filter_height = col->dims()[3];
int filter_width = col->dims()[4];
int col_height = col->dims()[0];
int col_width = col->dims()[1];
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] - filter_height) / stride[0]
// + 1, col_height, "Output_height and padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] - filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left, padding_right)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] -
// filter_height) / stride[0]
// + 1, col_height, "Output_height and
// padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] -
// filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left,
// padding_right)
// are " "inconsistent.");
const T *im_data = im.data<T>();
T *col_data = col->data<T>();
const T *im_data = im.data<T>();
T *col_data = col->data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
for (int col_row_idx = 0; col_row_idx < col_height;
++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width;
++col_col_idx) {
for (int channel = 0; channel < im_channels;
++channel) {
for (int filter_row_idx = 0;
filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] +
filter_row_idx - padding[0];
for (int filter_col_idx = 0;
filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] +
filter_col_idx - padding[1];
int col_offset =
((((col_row_idx)*col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
int col_offset =
((((col_row_idx)*col_width +
col_col_idx) *
im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
int im_offset = (channel * im_height + im_row_offset) * im_width +
im_col_offset;
col_data[col_offset] =
(im_row_offset < 0 || im_row_offset >= im_height ||
im_col_offset < 0 || im_col_offset >= im_width)
? static_cast<T>(0)
: im_data[im_offset];
}
}
}
}
}
}
};
int im_offset = (channel * im_height +
im_row_offset) *
im_width +
im_col_offset;
col_data[col_offset] =
(im_row_offset < 0 ||
im_row_offset >= im_height ||
im_col_offset < 0 ||
im_col_offset >= im_width)
? static_cast<T>(0)
: im_data[im_offset];
}
}
}
}
}
}
};
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height, filter_width]
*/
template <class T> class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *im) {
// PADDLE_ENFORCE(im->dims().size() == 3);
// PADDLE_ENFORCE(col.dims().size() == 5);
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[3];
int filter_width = col.dims()[4];
int col_height = col.dims()[0];
int col_width = col.dims()[1];
/*
* im = [input_channels, input_height, input_width]
* col =
* [output_height, output_width, input_channels, filter_height,
* filter_width]
*/
template <class T> class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *im) {
// PADDLE_ENFORCE(im->dims().size() == 3);
// PADDLE_ENFORCE(col.dims().size() == 5);
int im_channels = im->dims()[0];
int im_height = im->dims()[1];
int im_width = im->dims()[2];
int filter_height = col.dims()[3];
int filter_width = col.dims()[4];
int col_height = col.dims()[0];
int col_width = col.dims()[1];
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] - filter_height) / stride[0]
// + 1, col_height, "Output_height and padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] - filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left, padding_right)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] -
// filter_height) / stride[0]
// + 1, col_height, "Output_height and
// padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] -
// filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left,
// padding_right)
// are " "inconsistent.");
T *im_data = im->data<T>();
const T *col_data = col.data<T>();
T *im_data = im->data<T>();
const T *col_data = col.data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
for (int col_row_idx = 0; col_row_idx < col_height;
++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width;
++col_col_idx) {
for (int channel = 0; channel < im_channels;
++channel) {
for (int filter_row_idx = 0;
filter_row_idx < filter_height;
++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] +
filter_row_idx - padding[0];
for (int filter_col_idx = 0;
filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] +
filter_col_idx - padding[1];
int col_offset =
(((col_row_idx * col_width + col_col_idx) * im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
int col_offset =
(((col_row_idx * col_width +
col_col_idx) *
im_channels +
channel) *
filter_height +
filter_row_idx) *
filter_width +
filter_col_idx;
if (im_row_offset >= 0 && im_row_offset < im_height &&
im_col_offset >= 0 && im_col_offset < im_width) {
int im_offset =
(channel * im_height + im_row_offset) * im_width +
im_col_offset;
im_data[im_offset] += col_data[col_offset];
}
}
}
}
}
}
}
};
if (im_row_offset >= 0 &&
im_row_offset < im_height &&
im_col_offset >= 0 &&
im_col_offset < im_width) {
int im_offset =
(channel * im_height +
im_row_offset) *
im_width +
im_col_offset;
im_data[im_offset] +=
col_data[col_offset];
}
}
}
}
}
}
}
};
template class Im2ColFunctor<ColFormat::kOCF, CPU, float>;
template class Im2ColFunctor<ColFormat::kOCF, CPU, double>;
template class Col2ImFunctor<ColFormat::kOCF, CPU, float>;
template class Col2ImFunctor<ColFormat::kOCF, CPU, double>;
template class Im2ColFunctor<ColFormat::kOCF, CPU, float>;
template class Im2ColFunctor<ColFormat::kOCF, CPU, double>;
template class Col2ImFunctor<ColFormat::kOCF, CPU, float>;
template class Col2ImFunctor<ColFormat::kOCF, CPU, double>;
} // namespace math
} // namespace operators
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -17,83 +17,96 @@ limitations under the License. */
#include "framework/tensor.h"
namespace paddle_mobile {
namespace operators {
namespace math {
namespace operators {
namespace math {
/* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */
enum class ColFormat { kCFO = 0, kOCF = 1 };
/* The storage format of the coldata in the Im2ColFunctor and
* Col2ImFunctor. */
enum class ColFormat { kCFO = 0, kOCF = 1 };
/*
* \brief Converts the image data of three dimensions(CHW) into a colData of
* five dimensions in the Im2ColFunctor calculation,
* And in the Col2ImFunctor calculation, it is reversed.
*
* \param imData Image data.
* \param imShape The shape of imData,
* [input_channels, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_height * filter_width, and the width is equal
* output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_height,
* filter_width, ======> [height, width]
* output_height,
* output_width]
*
* If the template argument Format is kOCF, the shape of colData is:
* [output_height, output_width, input_channels, filter_height, filter_width]
* So, it is easy to reshape into a sequence matrix for rnn calculation.
* The shape of sequence matrix is [seq_length, step_size], where the seq_length
* is equal output_height * output_width, and the step_size is equal
* input_channels * filter_height * filter_width.
*
* Reshape:
* shape of colData shape of sequence matrix
* [output_height,
* output_width,
* input_channels, ======> [seqLength, stepSize]
* filter_height,
* filter_width]
*
* \note The caller needs to ensure that imShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <ColFormat Format, typename DeviceType, typename T>
class Im2ColFunctor {
public:
void operator()(const framework::Tensor &im, const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *col);
};
/*
* \brief Converts the image data of three dimensions(CHW) into a
* colData of
* five dimensions in the Im2ColFunctor calculation,
* And in the Col2ImFunctor calculation, it is reversed.
*
* \param imData Image data.
* \param imShape The shape of imData,
* [input_channels, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 2-dimension [dilation_height, dilation_width].
*
* \param strides stride data.
* \param 2-dimension [stride_height, stride_width].
*
* \param paddings padding data.
* \param 4-dimension [up_pad, left_pad, down_pad, right_pad].
*
* If the template argument Format is kCFO, the shape of colData is:
* [input_channels, filter_height, filter_width, output_height,
* output_width]
* So, it is easy to reshape into a convolution matrix for
* convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the
* height is equal
* input_channels * filter_height * filter_width, and the width is
* equal
* output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_height,
* filter_width, ======> [height, width]
* output_height,
* output_width]
*
* If the template argument Format is kOCF, the shape of colData is:
* [output_height, output_width, input_channels, filter_height,
* filter_width]
* So, it is easy to reshape into a sequence matrix for rnn
* calculation.
* The shape of sequence matrix is [seq_length, step_size], where
* the seq_length
* is equal output_height * output_width, and the step_size is equal
* input_channels * filter_height * filter_width.
*
* Reshape:
* shape of colData shape of sequence matrix
* [output_height,
* output_width,
* input_channels, ======> [seqLength, stepSize]
* filter_height,
* filter_width]
*
* \note The caller needs to ensure that imShape.inputChannels is
* equal to
* colShape.inputChannels.
*/
template <ColFormat Format, typename DeviceType, typename T>
class Im2ColFunctor {
public:
void operator()(const framework::Tensor &im,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *col);
};
template <ColFormat Format, typename DeviceType, typename T>
class Col2ImFunctor {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding, framework::Tensor *im);
};
template <ColFormat Format, typename DeviceType, typename T>
class Col2ImFunctor {
public:
void operator()(const framework::Tensor &col,
const std::vector<int> &dilation,
const std::vector<int> &stride,
const std::vector<int> &padding,
framework::Tensor *im);
};
} // namespace math
} // namespace operators
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -15,106 +15,125 @@ limitations under the License. */
#include "math_function.h"
namespace paddle_mobile {
namespace operators {
namespace math {
template <>
void gemm<float>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
const int M, const int N, const int K, const float alpha,
const float *A, const float *B, const float beta, float *C) {
int lda = (transA == CblasNoTrans) ? K : M;
int ldb = (transB == CblasNoTrans) ? N : K;
int ldc = N;
cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
beta, C, ldc);
}
template <>
void gemm<double>(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
const int M, const int N, const int K, const double alpha,
const double *A, const double *B, const double beta,
double *C) {
int lda = (transA == CblasNoTrans) ? K : M;
int ldb = (transB == CblasNoTrans) ? N : K;
int ldc = N;
cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A, lda, B, ldb,
beta, C, ldc);
}
template <>
void gemm<float>(const bool transA, const bool transB, const int M, const int N,
const int K, const float alpha, const float *A, const int lda,
const float *B, const int ldb, const float beta, float *C,
const int ldc) {
cblas_sgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
lda, B, ldb, beta, C, ldc);
}
template <>
void gemm<double>(const bool transA, const bool transB, const int M,
const int N, const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc) {
cblas_dgemm(CblasRowMajor, transA == false ? CblasNoTrans : CblasTrans,
transB == false ? CblasNoTrans : CblasTrans, M, N, K, alpha, A,
lda, B, ldb, beta, C, ldc);
}
template <>
void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b, float alpha,
framework::Tensor *matrix_out, float beta) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() ==
// 2,
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place()) &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0];
int N = dim_out[1];
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
gemm<float>(transA, transB, M, N, K, alpha, matrix_a.data<float>(),
matrix_b.data<float>(), beta, matrix_out->data<float>());
}
template <>
void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
double alpha, framework::Tensor *matrix_out, double beta) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() ==
// 2,
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place()) &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0];
int N = dim_out[1];
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
CBLAS_TRANSPOSE transA = (trans_a == false) ? CblasNoTrans : CblasTrans;
CBLAS_TRANSPOSE transB = (trans_b == false) ? CblasNoTrans : CblasTrans;
gemm<double>(transA, transB, M, N, K, alpha, matrix_a.data<double>(),
matrix_b.data<double>(), beta, matrix_out->data<double>());
}
} // namespace math
} // namespace operators
namespace operators {
namespace math {
template <>
void gemm<float>(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE transB, const int M,
const int N, const int K, const float alpha,
const float *A, const float *B, const float beta,
float *C) {
int lda = (transA == CblasNoTrans) ? K : M;
int ldb = (transB == CblasNoTrans) ? N : K;
int ldc = N;
cblas_sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
lda, B, ldb, beta, C, ldc);
}
template <>
void gemm<double>(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE transB, const int M,
const int N, const int K, const double alpha,
const double *A, const double *B,
const double beta, double *C) {
int lda = (transA == CblasNoTrans) ? K : M;
int ldb = (transB == CblasNoTrans) ? N : K;
int ldc = N;
cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, A,
lda, B, ldb, beta, C, ldc);
}
template <>
void gemm<float>(const bool transA, const bool transB, const int M,
const int N, const int K, const float alpha,
const float *A, const int lda, const float *B,
const int ldb, const float beta, float *C,
const int ldc) {
cblas_sgemm(CblasRowMajor,
transA == false ? CblasNoTrans : CblasTrans,
transB == false ? CblasNoTrans : CblasTrans, M, N,
K, alpha, A, lda, B, ldb, beta, C, ldc);
}
template <>
void gemm<double>(const bool transA, const bool transB, const int M,
const int N, const int K, const double alpha,
const double *A, const int lda, const double *B,
const int ldb, const double beta, double *C,
const int ldc) {
cblas_dgemm(CblasRowMajor,
transA == false ? CblasNoTrans : CblasTrans,
transB == false ? CblasNoTrans : CblasTrans, M, N,
K, alpha, A, lda, B, ldb, beta, C, ldc);
}
template <>
void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
float alpha, framework::Tensor *matrix_out,
float beta) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
// dim_out.size() ==
// 2,
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place())
// &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0];
int N = dim_out[1];
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
CBLAS_TRANSPOSE transA =
(trans_a == false) ? CblasNoTrans : CblasTrans;
CBLAS_TRANSPOSE transB =
(trans_b == false) ? CblasNoTrans : CblasTrans;
gemm<float>(transA, transB, M, N, K, alpha,
matrix_a.data<float>(), matrix_b.data<float>(),
beta, matrix_out->data<float>());
}
template <>
void matmul<double>(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
double alpha, framework::Tensor *matrix_out,
double beta) {
auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 &&
// dim_out.size() ==
// 2,
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place())
// &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0];
int N = dim_out[1];
int K = (trans_a == false) ? dim_a[1] : dim_a[0];
CBLAS_TRANSPOSE transA =
(trans_a == false) ? CblasNoTrans : CblasTrans;
CBLAS_TRANSPOSE transB =
(trans_b == false) ? CblasNoTrans : CblasTrans;
gemm<double>(transA, transB, M, N, K, alpha,
matrix_a.data<double>(), matrix_b.data<double>(),
beta, matrix_out->data<double>());
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -19,24 +19,26 @@ limitations under the License. */
#include <cmath>
namespace paddle_mobile {
namespace operators {
namespace math {
template <typename T>
void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB,
const int M, const int N, const int K, const T alpha, const T *A,
const T *B, const T beta, T *C);
template <typename T>
void gemm(const bool transA, const bool transB, const int M, const int N,
const int K, const T alpha, const T *A, const int lda, const T *B,
const int ldb, const T beta, T *C, const int ldc);
// matrix multiply with continuous memory
template <typename T>
void matmul(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b, T alpha,
framework::Tensor *matrix_out, T beta);
} // namespace math
} // namespace operators
namespace operators {
namespace math {
template <typename T>
void gemm(const CBLAS_TRANSPOSE transA,
const CBLAS_TRANSPOSE transB, const int M, const int N,
const int K, const T alpha, const T *A, const T *B,
const T beta, T *C);
template <typename T>
void gemm(const bool transA, const bool transB, const int M,
const int N, const int K, const T alpha, const T *A,
const int lda, const T *B, const int ldb, const T beta,
T *C, const int ldc);
// matrix multiply with continuous memory
template <typename T>
void matmul(const framework::Tensor &matrix_a, bool trans_a,
const framework::Tensor &matrix_b, bool trans_b,
T alpha, framework::Tensor *matrix_out, T beta);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
namespace paddle_mobile {
namespace operators {
namespace math {
// Transform applys a unary or a binary functor on each element in a
// range defined by a pair of iterators.
//
// - The specialization for CPU calls std::transform.
// - The specialization for CUDA calls thrust::tranform.
//
// NOTE: We need to define InputIter and OutputIter defined as
// different types, because the InputIter points op's inputs
// and
// OutputIter pints to op's outputs.
//
// NOTE: We don't assume that InputIter to be const InputType* and
// OutputIter to be OutputType*, because we might use a
// iterator
// class, paddle::fluid::operators::RowwiseTRansformIterator.
struct Transform {
template <typename InputIter, typename OutputIter,
typename UnaryOperation>
void operator()(InputIter first, InputIter last,
OutputIter result, UnaryOperation op) {
std::transform(first, last, result, op);
}
template <typename InputIter1, typename InputIter2,
typename OutputIter, typename BinaryOperation>
void operator()(InputIter1 first1, InputIter1 last1,
InputIter2 first2, OutputIter result,
BinaryOperation op) {
std::transform(first1, last1, first2, result, op);
}
};
}
} // namespace platform
} // namespace paddle
......@@ -15,179 +15,212 @@ limitations under the License. */
#include "vol2col.h"
namespace paddle_mobile {
namespace operators {
namespace math {
using Tensor = paddle_mobile::framework::Tensor;
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <typename T> class Vol2ColFunctor<CPU, T> {
public:
void operator()(const Tensor &vol, const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings, Tensor *col) const {
// PADDLE_ENFORCE(vol.dims().size() == 4);
// PADDLE_ENFORCE(col->dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col->dims()[1];
int filter_height = col->dims()[2];
int filter_width = col->dims()[3];
int output_depth = col->dims()[4];
int output_height = col->dims()[5];
int output_width = col->dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1) + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height - 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1) + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
const T *vol_data = vol.data<T>();
T *col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int c_in = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
int col_idx =
((c * output_depth + d) * output_height + h) * output_width + w;
int vol_idx =
((c_in * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
col_data[col_idx] =
(h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
w_pad >= input_width || d_pad < 0 || d_pad >= input_depth)
? static_cast<T>(0)
: vol_data[vol_idx];
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <typename T> class Col2VolFunctor<CPU, T> {
public:
void operator()(const Tensor &col, const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings, Tensor *vol) const {
// PADDLE_ENFORCE(vol->dims().size() == 4);
// PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol->dims()[0];
int input_depth = vol->dims()[1];
int input_height = vol->dims()[2];
int input_width = vol->dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1) + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height - 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1) + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
T *vol_data = vol->data<T>();
const T *col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int cIm = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2];
if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {
int vol_idx =
((cIm * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
int col_idx =
((c * output_depth + d) * output_height + h) * output_width +
w;
vol_data[vol_idx] += col_data[col_idx];
}
}
}
}
}
}
};
template class Vol2ColFunctor<CPU, float>;
template class Vol2ColFunctor<CPU, double>;
template class Col2VolFunctor<CPU, float>;
template class Col2VolFunctor<CPU, double>;
} // namespace math
} // namespace operators
namespace operators {
namespace math {
using Tensor = paddle_mobile::framework::Tensor;
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <typename T> class Vol2ColFunctor<CPU, T> {
public:
void operator()(const Tensor &vol,
const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings,
Tensor *col) const {
// PADDLE_ENFORCE(vol.dims().size() == 4);
// PADDLE_ENFORCE(col->dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col->dims()[1];
int filter_height = col->dims()[2];
int filter_width = col->dims()[3];
int output_depth = col->dims()[4];
int output_height = col->dims()[5];
int output_width = col->dims()[6];
int channels_col = input_channels * filter_depth *
filter_height * filter_width;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
const T *vol_data = vol.data<T>();
T *col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset =
(c / filter_width / filter_height) % filter_depth;
int c_in =
c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] +
d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] +
h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] +
w_offset * dilations[2];
int col_idx = ((c * output_depth + d) *
output_height +
h) *
output_width +
w;
int vol_idx =
((c_in * input_depth + d_pad) *
input_height +
h_pad) *
input_width +
w_pad;
col_data[col_idx] =
(h_pad < 0 || h_pad >= input_height ||
w_pad < 0 || w_pad >= input_width ||
d_pad < 0 || d_pad >= input_depth)
? static_cast<T>(0)
: vol_data[vol_idx];
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <typename T> class Col2VolFunctor<CPU, T> {
public:
void operator()(const Tensor &col,
const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings,
Tensor *vol) const {
// PADDLE_ENFORCE(vol->dims().size() == 4);
// PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol->dims()[0];
int input_depth = vol->dims()[1];
int input_height = vol->dims()[2];
int input_width = vol->dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col = input_channels * filter_depth *
filter_height * filter_width;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
T *vol_data = vol->data<T>();
const T *col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset =
(c / filter_width / filter_height) % filter_depth;
int cIm =
c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * strides[0] - paddings[0] +
d_offset * dilations[0];
for (int h = 0; h < output_height; ++h) {
int h_pad = h * strides[1] - paddings[1] +
h_offset * dilations[1];
for (int w = 0; w < output_width; ++w) {
int w_pad = w * strides[2] - paddings[2] +
w_offset * dilations[2];
if (h_pad >= 0 && h_pad < input_height &&
w_pad >= 0 && w_pad < input_width &&
d_pad >= 0 && d_pad < input_depth) {
int vol_idx =
((cIm * input_depth + d_pad) *
input_height +
h_pad) *
input_width +
w_pad;
int col_idx = ((c * output_depth + d) *
output_height +
h) *
output_width +
w;
vol_data[vol_idx] += col_data[col_idx];
}
}
}
}
}
}
};
template class Vol2ColFunctor<CPU, float>;
template class Vol2ColFunctor<CPU, double>;
template class Col2VolFunctor<CPU, float>;
template class Col2VolFunctor<CPU, double>;
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -18,66 +18,78 @@ limitations under the License. */
#include "framework/tensor.h"
namespace paddle_mobile {
namespace operators {
namespace math {
/*
* \brief Converts the feature data of four dimensions(CDHW) into a colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height, dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_depth * filter_height * filter_width, and the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is equal to
* colShape.inputChannels.
*/
using Tensor = paddle_mobile::framework::Tensor;
namespace operators {
namespace math {
/*
* \brief Converts the feature data of four dimensions(CDHW) into a
* colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height,
* input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* \param dilations dilation data.
* \param 3-dimension [dilation_depth, dilation_height,
* dilation_width].
*
* \param strides stride data.
* \param 3-dimension [stride_depth, stride_height, stride_width].
*
* \param paddings padding data.
* \param 3-dimension [d_pad, h_pad, w_pad].
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for
* convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the
* height is equal
* input_channels * filter_depth * filter_height * filter_width, and
* the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is
* equal to
* colShape.inputChannels.
*/
using Tensor = paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T> class Vol2ColFunctor {
public:
void operator()(const Tensor &vol, const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings, Tensor *col) const;
};
template <typename DeviceType, typename T> class Vol2ColFunctor {
public:
void operator()(const Tensor &vol,
const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings,
Tensor *col) const;
};
template <typename DeviceType, typename T> class Col2VolFunctor {
public:
void operator()(const Tensor &col, const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings, Tensor *vol) const;
};
template <typename DeviceType, typename T> class Col2VolFunctor {
public:
void operator()(const Tensor &col,
const std::vector<int> &dilations,
const std::vector<int> &strides,
const std::vector<int> &paddings,
Tensor *vol) const;
};
} // namespace math
} // namespace operators
} // namespace math
} // namespace operators
} // namespace paddle_mobile
......@@ -19,28 +19,27 @@ SOFTWARE.
#include "op_param.h"
namespace paddle_mobile {
namespace operators {
Print &operator<<(Print &printer, const ConvParam &conv_param) {
printer << "parameter of conv: "
<< "\n";
printer << " stride: "
<< " (" << conv_param.Strides()[0] << conv_param.Strides()[1] << ") "
<< "\n";
printer << " paddings: "
<< " (" << conv_param.Paddings()[0] << conv_param.Paddings()[1]
<< ") "
<< "\n";
printer << " dilations: "
<< " (" << conv_param.Dilations()[0] << conv_param.Dilations()[1]
<< ") "
<< "\n";
printer << " groups: " << conv_param.Groups() << "\n";
printer << " input dims: " << conv_param.Input()->dims() << "\n";
printer << " filter dims: " << conv_param.Filter()->dims() << "\n";
printer << " output dims: " << conv_param.Output()->dims();
return printer;
}
} // namespace operators
namespace operators {
Print &operator<<(Print &printer, const ConvParam &conv_param) {
printer << "parameter of conv: "
<< "\n";
printer << " stride: "
<< " (" << conv_param.Strides()[0]
<< conv_param.Strides()[1] << ") "
<< "\n";
printer << " paddings: "
<< " (" << conv_param.Paddings()[0]
<< conv_param.Paddings()[1] << ") "
<< "\n";
printer << " dilations: "
<< " (" << conv_param.Dilations()[0]
<< conv_param.Dilations()[1] << ") "
<< "\n";
printer << " groups: " << conv_param.Groups() << "\n";
printer << " input dims: " << conv_param.Input()->dims() << "\n";
printer << " filter dims: " << conv_param.Filter()->dims() << "\n";
printer << " output dims: " << conv_param.Output()->dims();
return printer;
}
} // namespace operators
} // namespace paddle_mobile
......@@ -26,86 +26,211 @@ SOFTWARE.
#include "framework/variable.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
class OpParam : PaddleMobileObject {
public:
protected:
template <typename T>
static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Input", inputs, scope);
}
template <typename T>
static T *OutputFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<T>("Output", outputs, scope);
}
template <typename T>
static T *FilterFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Filter", inputs, scope);
}
template <typename T>
static const T GetAttr(std::string key, const AttributeMap &map) {
return ((Attribute)map.at(key)).Get<T>();
}
template <typename T>
static T *GetVarValue(std::string key, const VariableNameMap &var_map,
const Scope &scope) {
auto var_vec = var_map.at(key);
if (var_vec.size()) {
// std::cout << " get var value -- " << var_vec[0] << std::endl;
auto var = scope.FindVar(var_vec[0]);
return var->GetMutable<T>();
} else {
return nullptr;
}
}
};
class ConvParam : OpParam {
public:
ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
input_ = InputFrom<framework::Tensor>(inputs, scope);
output_ = OutputFrom<framework::Tensor>(outputs, scope);
strides_ = GetAttr<std::vector<int>>("strides", attrs);
paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
}
const Tensor *Input() const { return input_; }
const LoDTensor *Filter() const { return filter_; }
Tensor *Output() const { return output_; }
const std::vector<int> &Strides() const { return strides_; }
const std::vector<int> &Paddings() const { return paddings_; }
const std::vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
private:
Tensor *input_;
Tensor *output_;
LoDTensor *filter_;
std::vector<int> strides_;
std::vector<int> paddings_;
std::vector<int> dilations_;
int groups;
};
Print &operator<<(Print &printer, const ConvParam &conv_param);
} // namespace operators
namespace operators {
using namespace framework;
class OpParam : PaddleMobileObject {
public:
protected:
template <typename T>
static T *InputFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetVarValue<T>("Input", inputs, scope);
}
template <typename T>
static T *InputXFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetVarValue<T>("X", inputs, scope);
}
template <typename T>
static T *InputYFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetVarValue<T>("Y", inputs, scope);
}
template <typename T>
static std::vector<T *>
InputMultiFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetMultiVarValue<T>("Input", inputs, scope);
}
template <typename T>
static T *OutputFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVarValue<T>("Output", outputs, scope);
}
template <typename T>
static T *OutFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVarValue<T>("Out", outputs, scope);
}
template <typename T>
static T *FilterFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetVarValue<T>("Filter", inputs, scope);
}
template <typename T>
static const T GetAttr(std::string key, const AttributeMap &map) {
return ((Attribute)map.at(key)).Get<T>();
}
template <typename T>
static T *GetVarValue(std::string key,
const VariableNameMap &var_map,
const Scope &scope) {
auto var_vec = var_map.at(key);
if (var_vec.size()) {
// std::cout << " get var value -- " << var_vec[0] <<
// std::endl;
auto var = scope.FindVar(var_vec[0]);
return var->GetMutable<T>();
} else {
return nullptr;
}
}
template <typename T>
static std::vector<T *>
GetMultiVarValue(std::string key, const VariableNameMap &var_map,
const Scope &scope) {
auto var_vecs = var_map.at(key);
assert(var_vecs.size() > 1);
std::vector<T *> var_res;
for (auto &var_vec : var_vecs) {
auto var = scope.FindVar(var_vec);
var_res.push_back(var->GetMutable<T>());
}
return var_res;
}
};
class ConvParam : OpParam {
public:
ConvParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
filter_ = FilterFrom<framework::LoDTensor>(inputs, scope);
input_ = InputFrom<framework::Tensor>(inputs, scope);
output_ = OutputFrom<framework::Tensor>(outputs, scope);
strides_ = GetAttr<std::vector<int>>("strides", attrs);
paddings_ = GetAttr<std::vector<int>>("paddings", attrs);
dilations_ = GetAttr<std::vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
}
const Tensor *Input() const { return input_; }
const LoDTensor *Filter() const { return filter_; }
Tensor *Output() const { return output_; }
const std::vector<int> &Strides() const { return strides_; }
const std::vector<int> &Paddings() const { return paddings_; }
const std::vector<int> &Dilations() const { return dilations_; }
const int &Groups() const { return groups; }
private:
Tensor *input_;
Tensor *output_;
LoDTensor *filter_;
std::vector<int> strides_;
std::vector<int> paddings_;
std::vector<int> dilations_;
int groups;
};
Print &operator<<(Print &printer, const ConvParam &conv_param);
class ElementwiseAddParam : OpParam {
public:
ElementwiseAddParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
out_ = OutFrom<framework::Tensor>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
const Tensor *InputX() const { return input_x_; }
const Tensor *InputY() const { return input_y_; }
Tensor *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
Tensor *input_x_;
Tensor *input_y_;
Tensor *out_;
int axis_;
};
class MulParam : OpParam {
public:
MulParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::Tensor>(inputs, scope);
input_y_ = InputYFrom<framework::Tensor>(inputs, scope);
out_ = OutFrom<framework::Tensor>(outputs, scope);
x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
}
const Tensor *InputX() const { return input_x_; }
const Tensor *InputY() const { return input_y_; }
Tensor *Out() const { return out_; }
const int &XNumColDims() const { return x_num_col_dims_; }
const int &YNumColDims() const { return y_num_col_dims_; }
private:
Tensor *input_x_;
Tensor *input_y_;
Tensor *out_;
int x_num_col_dims_;
int y_num_col_dims_;
};
class ConcatParam : public OpParam {
public:
ConcatParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
inputs_ = InputMultiFrom<framework::Tensor>(inputs, scope);
out_ = OutFrom<framework::Tensor>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
std::vector<Tensor *> Inputs() const { return inputs_; }
Tensor *Out() const { return out_; }
const int &Axis() const { return axis_; }
private:
std::vector<Tensor *> inputs_;
Tensor *out_;
int axis_;
};
} // namespace operators
} // namespace paddle_mobile
......@@ -19,106 +19,107 @@ limitations under the License. */
#include <typeindex>
namespace paddle_mobile {
namespace framework {
namespace framework {
inline proto::VarType::Type ToDataType(std::type_index type) {
/*if (typeid(platform::float16).hash_code() == type.hash_code()) {
return proto::VarType::FP16;
} else */
if (typeid(const float).hash_code() == type.hash_code()) {
// CPPLint complains Using C-style cast. Use static_cast<float>() instead
// One fix to this is to replace float with const float because
// typeid(T) == typeid(const T)
// http://en.cppreference.com/w/cpp/language/typeid
return proto::VarType::FP32;
} else if (typeid(const double).hash_code() == type.hash_code()) {
return proto::VarType::FP64;
} else if (typeid(const int).hash_code() == type.hash_code()) {
return proto::VarType::INT32;
} else if (typeid(const int64_t).hash_code() == type.hash_code()) {
return proto::VarType::INT64;
} else if (typeid(const bool).hash_code() == type.hash_code()) {
return proto::VarType::BOOL;
} else {
// PADDLE_THROW("Not supported");
// std::cout << "Not supported";
}
}
inline proto::VarType::Type ToDataType(std::type_index type) {
/*if (typeid(platform::float16).hash_code() == type.hash_code()) {
return proto::VarType::FP16;
} else */
if (typeid(const float).hash_code() == type.hash_code()) {
// CPPLint complains Using C-style cast. Use
// static_cast<float>() instead
// One fix to this is to replace float with const float because
// typeid(T) == typeid(const T)
// http://en.cppreference.com/w/cpp/language/typeid
return proto::VarType::FP32;
} else if (typeid(const double).hash_code() == type.hash_code()) {
return proto::VarType::FP64;
} else if (typeid(const int).hash_code() == type.hash_code()) {
return proto::VarType::INT32;
} else if (typeid(const int64_t).hash_code() == type.hash_code()) {
return proto::VarType::INT64;
} else if (typeid(const bool).hash_code() == type.hash_code()) {
return proto::VarType::BOOL;
} else {
// PADDLE_THROW("Not supported");
// std::cout << "Not supported";
}
}
inline std::type_index ToTypeIndex(proto::VarType::Type type) {
switch (type) {
// case proto::VarType::FP16:
// return typeid(platform::float16);
case proto::VarType::FP32:
return typeid(float);
case proto::VarType::FP64:
return typeid(double);
case proto::VarType::INT32:
return typeid(int);
case proto::VarType::INT64:
return typeid(int64_t);
case proto::VarType::BOOL:
return typeid(bool);
default:
// PADDLE_THROW("Not support type %d", type);
printf("Not support type %d", type);
}
}
inline std::type_index ToTypeIndex(proto::VarType::Type type) {
switch (type) {
// case proto::VarType::FP16:
// return typeid(platform::float16);
case proto::VarType::FP32:
return typeid(float);
case proto::VarType::FP64:
return typeid(double);
case proto::VarType::INT32:
return typeid(int);
case proto::VarType::INT64:
return typeid(int64_t);
case proto::VarType::BOOL:
return typeid(bool);
default:
// PADDLE_THROW("Not support type %d", type);
printf("Not support type %d", type);
}
}
template <typename Visitor>
inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
switch (type) {
// case proto::VarType::FP16:
// visitor.template operator()<platform::float16>();
// break;
case proto::VarType::FP32:
visitor.template operator()<float>();
break;
case proto::VarType::FP64:
visitor.template operator()<double>();
break;
case proto::VarType::INT32:
visitor.template operator()<int>();
break;
case proto::VarType::INT64:
visitor.template operator()<int64_t>();
break;
case proto::VarType::BOOL:
visitor.template operator()<bool>();
break;
default:
// PADDLE_THROW("Not supported");
printf("Not supported");
}
}
template <typename Visitor>
inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
switch (type) {
// case proto::VarType::FP16:
// visitor.template operator()<platform::float16>();
// break;
case proto::VarType::FP32:
visitor.template operator()<float>();
break;
case proto::VarType::FP64:
visitor.template operator()<double>();
break;
case proto::VarType::INT32:
visitor.template operator()<int>();
break;
case proto::VarType::INT64:
visitor.template operator()<int64_t>();
break;
case proto::VarType::BOOL:
visitor.template operator()<bool>();
break;
default:
// PADDLE_THROW("Not supported");
printf("Not supported");
}
}
inline std::string DataTypeToString(const proto::VarType::Type type) {
switch (type) {
case proto::VarType::FP16:
return "float16";
case proto::VarType::FP32:
return "float32";
case proto::VarType::FP64:
return "float64";
case proto::VarType::INT16:
return "int16";
case proto::VarType::INT32:
return "int32";
case proto::VarType::INT64:
return "int64";
case proto::VarType::BOOL:
return "bool";
default:
// PADDLE_THROW("Not support type %d", type);
printf("Not support type %d", type);
}
}
inline std::string DataTypeToString(const proto::VarType::Type type) {
switch (type) {
case proto::VarType::FP16:
return "float16";
case proto::VarType::FP32:
return "float32";
case proto::VarType::FP64:
return "float64";
case proto::VarType::INT16:
return "int16";
case proto::VarType::INT32:
return "int32";
case proto::VarType::INT64:
return "int64";
case proto::VarType::BOOL:
return "bool";
default:
// PADDLE_THROW("Not support type %d", type);
printf("Not support type %d", type);
}
}
inline std::ostream &operator<<(std::ostream &out,
const proto::VarType::Type &type) {
out << DataTypeToString(type);
return out;
}
inline std::ostream &operator<<(std::ostream &out,
const proto::VarType::Type &type) {
out << DataTypeToString(type);
return out;
}
} // namespace framework
} // namespace framework
} // namespace paddle_mobile
......@@ -17,9 +17,9 @@ limitations under the License. */
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname &) = delete; \
classname(classname &&) = delete; \
classname &operator=(const classname &) = delete; \
classname &operator=(classname &&) = delete
private: \
classname(const classname &) = delete; \
classname(classname &&) = delete; \
classname &operator=(const classname &) = delete; \
classname &operator=(classname &&) = delete
#endif
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#pragma once
#include "operators/elementwise_add_op.h"
#include "test_include.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype> class TestElementwiseAddOp {
public:
TestElementwiseAddOp(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// std::cout << " **block size " << blocks.size() << std::endl;
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops =
block_desc->Ops();
// std::cout << " ops " << ops.size() << std::endl;
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op = ops[j];
if (op->Type() == "elementwise_add") {
if (op->GetAttrMap().at("axis").Get<int>() != -1) {
std::cout
<< "attr: axis = "
<< op->GetAttrMap().at("axis").Get<int>()
<< std::endl;
}
}
std::cout << "op:" << op->Type() << std::endl;
if (op->Type() == "elementwise_add" &&
op->Input("X")[0] == "batch_norm_2.tmp_2") {
std::cout << " elementwise_add attr size: "
<< op->GetAttrMap().size() << std::endl;
std::cout
<< " inputs size: " << op->GetInputs().size()
<< std::endl;
std::cout
<< " outputs size: " << op->GetOutputs().size()
<< std::endl;
std::cout << " Input X is : " << op->Input("X")[0]
<< std::endl;
std::cout << " Input Y is : " << op->Input("Y")[0]
<< std::endl;
std::cout
<< " Output Out is : " << op->Output("Out")[0]
<< std::endl;
Attribute axis_attr = op->GetAttrMap().at("axis");
int axis = axis_attr.Get<int>();
std::cout << " Attr axis is : " << axis
<< std::endl;
std::shared_ptr<
operators::ElementwiseAddOp<Dtype, float>>
add = std::make_shared<
operators::ElementwiseAddOp<Dtype, float>>(
op->Type(), op->GetInputs(),
op->GetOutputs(), op->GetAttrMap(),
program_.scope);
ops_of_block_[*block_desc.get()].push_back(add);
}
}
}
}
std::shared_ptr<Tensor> predict_add(Tensor &t1, Tensor &t2) {
// feed
auto scope = program_.scope;
Variable *x_feed_value = scope->Var("batch_norm_2.tmp_2");
auto tensor_x = x_feed_value->GetMutable<Tensor>();
tensor_x->ShareDataWith(t1);
Variable *y_feed_value = scope->Var("batch_norm_0.tmp_3");
auto tensor_y = y_feed_value->GetMutable<Tensor>();
tensor_y->ShareDataWith(t2);
Variable *con_output = scope->Var("elementwise_add_0.tmp_0");
Tensor *output_tensor = con_output->GetMutable<Tensor>();
output_tensor->mutable_data<float>({1, 3, 224, 224});
// std::cout << typeid(output_tensor).name() << std::endl;
// std::cout << "output_tensor dims: " << output_tensor->dims()
// <<
// std::endl;
std::shared_ptr<Tensor> out_tensor =
std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict_add(t1, t2, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
void predict_add(const Tensor &t1, const Tensor &t2, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0;
j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
std::cout << "op -> run()" << std::endl;
op->Run();
}
}
};
template class TestElementwiseAddOp<CPU>;
} // namespace framework
namespace test {
void testElementwiseAdd() {
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(
std::string("../../test/models/"
"image_classification_resnet.inference.model"));
/// input x (1,3,224,224)
paddle_mobile::framework::Tensor inputx;
SetupTensor<float>(&inputx, {1, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1));
float *inputx_ptr = inputx.data<float>();
/// input y (224,)
paddle_mobile::framework::Tensor inputy;
SetupTensor<float>(&inputy, {224}, static_cast<float>(0),
static_cast<float>(1));
float *inputy_ptr = inputy.data<float>();
paddle_mobile::framework::TestElementwiseAddOp<paddle_mobile::CPU>
testElementwiseAddOp(program);
auto output_add = testElementwiseAddOp.predict_add(inputx, inputy);
float *output_add_ptr = output_add->data<float>();
for (int j = 0; j < output_add->numel(); ++j) {
// std::cout << "value of output: " << output_add_ptr[j] <<
// std::endl;
}
/// output (1,3,224,224)
std::cout << "output memory size : " << output_add->memory_size()
<< std::endl;
std::cout << "output numel : " << output_add->numel() << std::endl;
std::cout << inputx_ptr[226] << " + " << inputy_ptr[2] << " = "
<< output_add_ptr[226] << std::endl;
}
} // namespace test
} // namespace paddle_mobile
......@@ -16,6 +16,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#include "elementwise_add_op_test.h"
#include "framework/executor.h"
#include "io.h"
#include "test_helper.h"
......@@ -36,45 +37,44 @@ SOFTWARE.
//}
int main() {
std::string data_set = "cifar10";
//
// if (data_set == "cifar10") {
// SetupTensor<float>(&input, {FLAGS_batch_size, 3, 32, 32},
// static_cast<float>(0), static_cast<float>(1));
// } else if (data_set == "imagenet") {
// SetupTensor<float>(&input, {FLAGS_batch_size, 3, 224, 224},
// static_cast<float>(0), static_cast<float>(1));
// } else {
// LOG(FATAL) << "Only cifar10 or imagenet is supported.";
// }
std::string data_set = "cifar10";
//
// if (data_set == "cifar10") {
// SetupTensor<float>(&input, {FLAGS_batch_size, 3, 32, 32},
// static_cast<float>(0), static_cast<float>(1));
// } else if (data_set == "imagenet") {
// SetupTensor<float>(&input, {FLAGS_batch_size, 3, 224, 224},
// static_cast<float>(0), static_cast<float>(1));
// } else {
// LOG(FATAL) << "Only cifar10 or imagenet is supported.";
// }
paddle_mobile::Loader<paddle_mobile::CPU> loader;
//../../test/models/image_classification_resnet.inference.model
auto program = loader.Load(std::string(
"../../test/models/image_classification_resnet.inference.model"));
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(
"../../test/models/image_classification_resnet.inference.model"));
paddle_mobile::framework::Executor<paddle_mobile::CPU> executor(program);
paddle_mobile::framework::Executor<paddle_mobile::CPU> executor(program);
paddle_mobile::framework::Tensor input;
SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
static_cast<float>(1));
float *input_ptr = input.data<float>();
for (int i = 0; i < input.numel(); ++i) {
// std::cout << input_ptr[i] << std::endl;
}
paddle_mobile::framework::Tensor input;
SetupTensor<float>(&input, {1, 3, 32, 32}, static_cast<float>(0),
static_cast<float>(1));
float *input_ptr = input.data<float>();
for (int i = 0; i < input.numel(); ++i) {
// std::cout << input_ptr[i] << std::endl;
}
// std::cout << "input: " << input.memory_size() << std::endl;
// std::cout << "input: " << input.numel() << std::endl;
// std::cout << "input: " << input.memory_size() << std::endl;
// std::cout << "input: " << input.numel() << std::endl;
auto output = executor.predict(input);
auto output = executor.predict(input);
// std::cout << "output: " << output->memory_size() << std::endl;
// std::cout << "output: " << output->numel() << std::endl;
// std::cout << "output: " << output->memory_size() << std::endl;
// std::cout << "output: " << output->numel() << std::endl;
// float* output_ptr = output->data<float>();
// for (int j = 0; j < output->numel(); ++j) {
// std::cout << " value of output: " << output_ptr[j] << std::endl;
// }
return 0;
// float* output_ptr = output->data<float>();
// for (int j = 0; j < output->numel(); ++j) {
// std::cout << " value of output: " << output_ptr[j] << std::endl;
//
paddle_mobile::test::testElementwiseAdd();
return 0;
}
......@@ -15,20 +15,21 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
#include <random>
#pragma once
#include "framework/ddim.h"
#include "framework/tensor.h"
#include <random>
template <typename T>
void SetupTensor(paddle_mobile::framework::Tensor* input,
void SetupTensor(paddle_mobile::framework::Tensor *input,
paddle_mobile::framework::DDim dims, T lower, T upper) {
static unsigned int seed = 100;
std::mt19937 rng(seed++);
std::uniform_real_distribution<double> uniform_dist(0, 1);
static unsigned int seed = 100;
std::mt19937 rng(seed++);
std::uniform_real_distribution<double> uniform_dist(0, 1);
T* input_ptr = input->mutable_data<T>(dims);
for (int i = 0; i < input->numel(); ++i) {
input_ptr[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
}
T *input_ptr = input->mutable_data<T>(dims);
for (int i = 0; i < input->numel(); ++i) {
input_ptr[i] =
static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
}
}
#include "framework/block_desc.h"
#include "framework/framework.pb.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/program.h"
#include "framework/program_desc.h"
#include "framework/scope.h"
#include "framework/tensor.h"
#include "framework/variable.h"
#include "framework/variable.h"
#include "io.h"
#include "test_helper.h"
#include <map>
#include <string>
#include <vector>
\ No newline at end of file
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import io, re
import sys, os
import subprocess
import platform
COPYRIGHT = '''
Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''
LANG_COMMENT_MARK = None
NEW_LINE_MARK = None
COPYRIGHT_HEADER = None
if platform.system() == "Windows":
NEW_LINE_MARK = "\r\n"
else:
NEW_LINE_MARK = '\n'
COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
date, err = process.communicate()
date = date.decode("utf-8").rstrip("\n")
COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
def generate_copyright(template, lang='C'):
if lang == 'Python':
LANG_COMMENT_MARK = '#'
else:
LANG_COMMENT_MARK = "//"
lines = template.split(NEW_LINE_MARK)
BLANK = " "
ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
for lino, line in enumerate(lines):
if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
if len(line) == 0:
BLANK = ""
else:
BLANK = " "
ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
return ans + "\n"
def lang_type(filename):
if filename.endswith(".py"):
return "Python"
elif filename.endswith(".h"):
return "C"
elif filename.endswith(".c"):
return "C"
elif filename.endswith(".hpp"):
return "C"
elif filename.endswith(".cc"):
return "C"
elif filename.endswith(".cpp"):
return "C"
elif filename.endswith(".cu"):
return "C"
elif filename.endswith(".cuh"):
return "C"
elif filename.endswith(".go"):
return "C"
elif filename.endswith(".proto"):
return "C"
else:
print("Unsupported filetype %s", filename)
exit(0)
PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
def main(argv=None):
parser = argparse.ArgumentParser(
description='Checker for copyright declaration.')
parser.add_argument('filenames', nargs='*', help='Filenames to check')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
fd = io.open(filename, encoding="utf-8")
first_line = fd.readline()
second_line = fd.readline()
if "COPYRIGHT (C)" in first_line.upper(): continue
if first_line.startswith("#!") or PYTHON_ENCODE.match(
second_line) != None or PYTHON_ENCODE.match(first_line) != None:
continue
original_contents = io.open(filename, encoding="utf-8").read()
new_contents = generate_copyright(
COPYRIGHT, lang_type(filename)) + original_contents
print('Auto Insert Copyright Header {}'.format(filename))
retv = 1
with io.open(filename, 'w') as output_file:
output_file.write(new_contents)
return retv
if __name__ == '__main__':
exit(main())
#!/bin/bash
set -e
readonly VERSION="version 3."
version=$(clang-format -version)
if ! [[ $version == *"$VERSION"* ]]; then
echo "clang-format version check failed."
echo "a version contains '$VERSION' is needed, but get '$version'"
echo "you can install the right version, and make an soft-link to '\$PATH' env"
exit -1
fi
clang-format $@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import io, re
import sys, os
import subprocess
import platform
COPYRIGHT = '''
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
==============================================================================*/
'''
LANG_COMMENT_MARK = None
NEW_LINE_MARK = None
COPYRIGHT_HEADER = None
if platform.system() == "Windows":
NEW_LINE_MARK = "\r\n"
else:
NEW_LINE_MARK = '\n'
COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
date, err = process.communicate()
date = date.decode("utf-8").rstrip("\n")
COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
def generate_copyright(template, lang='C'):
if lang == 'Python':
LANG_COMMENT_MARK = '#'
else:
LANG_COMMENT_MARK = "//"
lines = template.split(NEW_LINE_MARK)
BLANK = " "
ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
for lino, line in enumerate(lines):
if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
if len(line) == 0:
BLANK = ""
else:
BLANK = " "
ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK
return ans + "\n"
def lang_type(filename):
if filename.endswith(".py"):
return "Python"
elif filename.endswith(".h"):
return "C"
elif filename.endswith(".c"):
return "C"
elif filename.endswith(".hpp"):
return "C"
elif filename.endswith(".cc"):
return "C"
elif filename.endswith(".cpp"):
return "C"
elif filename.endswith(".cu"):
return "C"
elif filename.endswith(".cuh"):
return "C"
elif filename.endswith(".go"):
return "C"
elif filename.endswith(".proto"):
return "C"
else:
print("Unsupported filetype %s", filename)
exit(0)
PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
def main(argv=None):
parser = argparse.ArgumentParser(
description='Checker for copyright declaration.')
parser.add_argument('filenames', nargs='*', help='Filenames to check')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
fd = io.open(filename, encoding="utf-8")
first_line = fd.readline()
second_line = fd.readline()
if "COPYRIGHT " in first_line.upper(): continue
if first_line.startswith("#!") or PYTHON_ENCODE.match(
second_line) != None or PYTHON_ENCODE.match(
first_line) != None:
continue
original_contents = io.open(filename, encoding="utf-8").read()
new_contents = generate_copyright(
COPYRIGHT, lang_type(filename)) + original_contents
print('Auto Insert Copyright Header {}'.format(filename))
retv = 1
with io.open(filename, 'w') as output_file:
output_file.write(new_contents)
return retv
if __name__ == '__main__':
exit(main())
#!/bin/bash
TOTAL_ERRORS=0
#iclang-tidy *.[ch]pp -checks=*
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'|grep -v ".pb." | grep -v "third-party/"); do
cpplint $file
TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
done
exit $TOTAL_ERRORS
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册