提交 4d49f1d8 编写于 作者: Q qijun

merge baidu/develop

...@@ -137,7 +137,8 @@ if(WITH_GPU) ...@@ -137,7 +137,8 @@ if(WITH_GPU)
endif(WITH_GPU) endif(WITH_GPU)
if(USE_NNPACK) if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK) endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
......
...@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") ...@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON) set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else() else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif() endif()
...@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const { ...@@ -117,6 +117,8 @@ int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var); return boost::apply_visitor(DynamicConstIndexer(idx), var);
} }
ssize_t DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const { bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) { if (var.which() != d.getVar().which()) {
return false; return false;
......
...@@ -65,6 +65,8 @@ struct DDim { ...@@ -65,6 +65,8 @@ struct DDim {
DDimVar getVar() { return var; } DDimVar getVar() { return var; }
ssize_t size() const;
bool operator==(DDim d) const; bool operator==(DDim d) const;
bool operator!=(DDim d) const; bool operator!=(DDim d) const;
......
...@@ -49,6 +49,7 @@ TEST(DDim, Equality) { ...@@ -49,6 +49,7 @@ TEST(DDim, Equality) {
// arity of a DDim // arity of a DDim
EXPECT_EQ(paddle::framework::arity(ddim), 3); EXPECT_EQ(paddle::framework::arity(ddim), 3);
EXPECT_EQ(ddim.size(), 3);
// product of a DDim // product of a DDim
EXPECT_EQ(paddle::framework::product(vddim), 45); EXPECT_EQ(paddle::framework::product(vddim), 45);
......
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <atomic>
#include <type_traits> #include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
...@@ -214,11 +215,14 @@ class OpRegistry { ...@@ -214,11 +215,14 @@ class OpRegistry {
} }
static OperatorPtr CreateOp(const OpDesc& op_desc) { static OperatorPtr CreateOp(const OpDesc& op_desc) {
//! Create a OpPtr by type.
std::string op_type = op_desc.type(); std::string op_type = op_desc.type();
OperatorPtr op(creators().at(op_type)()); OperatorPtr op(creators().at(op_type)());
//! Fill op's data member. Not use constructor because it will be noising
//! for Op developer.
const OpProto& op_proto = protos().at(op_type); const OpProto& op_proto = protos().at(op_type);
// set op's inputs_ from desc.
op->type_ = op_desc.type(); op->type_ = op_desc.type();
// set op's inputs_ from desc.
op->inputs_.reserve((size_t)op_desc.inputs_size()); op->inputs_.reserve((size_t)op_desc.inputs_size());
std::copy(op_desc.inputs().begin(), op_desc.inputs().end(), std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
std::back_inserter(op->inputs_)); std::back_inserter(op->inputs_));
...@@ -226,13 +230,20 @@ class OpRegistry { ...@@ -226,13 +230,20 @@ class OpRegistry {
op->outputs_.reserve((size_t)op_desc.outputs_size()); op->outputs_.reserve((size_t)op_desc.outputs_size());
std::copy(op_desc.outputs().begin(), op_desc.outputs().end(), std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
std::back_inserter(op->outputs_)); std::back_inserter(op->outputs_));
// set op's attr;
//! Fill attrs, and validate attrs.
for (auto& attr : op_desc.attrs()) { for (auto& attr : op_desc.attrs()) {
op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr); op->attrs_[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
} }
op_checkers().at(op_type).Check(op->attrs_); op_checkers().at(op_type).Check(op->attrs_);
//! Convert Temporary variable name to an unique variable name.
GenerateTempVariableName(op.get());
// set argument offsets stored in op. // set argument offsets stored in op.
CreateInOutOffsetMap(op, op_proto); CreateInOutOffsetMap(op, op_proto);
//! Other op's custom Init for a complex Op. For simple Op, the Init
//! method do nothing.
op->Init(); op->Init();
return op; return op;
} }
...@@ -248,6 +259,17 @@ class OpRegistry { ...@@ -248,6 +259,17 @@ class OpRegistry {
}; };
private: private:
static void GenerateTempVariableName(OperatorBase* op) {
static std::atomic<size_t> gUniqId(0UL);
for (auto& outname : op->outputs_) {
if (outname == OperatorBase::TMP_VAR_NAME()) {
outname += op->type_;
outname += "@";
outname += std::to_string(gUniqId.fetch_add(1));
}
}
}
static std::unordered_map<std::string, OpCreator>& creators() { static std::unordered_map<std::string, OpCreator>& creators() {
static std::unordered_map<std::string, OpCreator> creators_; static std::unordered_map<std::string, OpCreator> creators_;
return creators_; return creators_;
......
...@@ -91,23 +91,21 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const { ...@@ -91,23 +91,21 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
std::string OperatorBase::DebugString() const { std::string OperatorBase::DebugString() const {
std::stringstream ss; std::stringstream ss;
ss << "=================\n"; ss << "Op(" << type_ << "), inputs:(";
ss << "type = " << type_ << "\n"; for (size_t i = 0; i < inputs_.size(); ++i) {
ss << "inputs = ["; ss << inputs_[i];
for (auto& ipt : inputs_) { if (i != inputs_.size() - 1) {
ss << ipt << ", "; ss << ", ";
} }
ss << "]\n";
ss << "outputs = [";
for (auto& opt : outputs_) {
ss << opt << ", ";
} }
ss << "]\n"; ss << "), outputs:(";
ss << "attr_keys = ["; for (size_t i = 0; i < outputs_.size(); ++i) {
for (auto& attr : attrs_) { ss << outputs_[i];
ss << attr.first << ", "; if (i != outputs_.size() - 1) {
ss << ", ";
} }
ss << "]\n"; }
ss << ").";
return ss.str(); return ss.str();
} }
......
...@@ -56,6 +56,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>; ...@@ -56,6 +56,13 @@ using OperatorPtr = std::shared_ptr<OperatorBase>;
*/ */
class OperatorBase { class OperatorBase {
public: public:
/// If a variable is a empty variable, that name will be used.
static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
/// If a variable is a temporary variable, that name will be set in Python,
/// but it will be convert to a unique name in scope after OpCreator.
static std::string TMP_VAR_NAME() { return "@TEMP@"; }
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
template <typename T> template <typename T>
......
...@@ -29,9 +29,7 @@ namespace framework { ...@@ -29,9 +29,7 @@ namespace framework {
class Tensor { class Tensor {
public: public:
Tensor() : numel_(0), offset_(0) {} Tensor() : offset_(0) {}
Tensor& operator=(const Tensor& src) = delete;
template <typename T> template <typename T>
const T* data() const { const T* data() const {
...@@ -48,34 +46,33 @@ class Tensor { ...@@ -48,34 +46,33 @@ class Tensor {
} }
template <typename T> template <typename T>
T* mutable_data(DDim dims, paddle::platform::Place place) { T* mutable_data(DDim dims, platform::Place place) {
set_dims(dims); set_dims(dims);
return mutable_data<T>(place); return mutable_data<T>(place);
} }
template <typename T> template <typename T>
T* mutable_data(paddle::platform::Place place) { T* mutable_data(platform::Place place) {
PADDLE_ENFORCE(numel_ > 0, PADDLE_ENFORCE(product(dims_) > 0,
"Tensor::numel_ must be larger than zero to call " "Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."); "Tensor::mutable_data. Call Tensor::set_dim first.");
if (holder_ == nullptr || if (holder_ == nullptr ||
!(holder_->place() == !(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */ place) /* some versions of boost::variant don't have operator!= */
|| holder_->size() < numel_ * sizeof(T) + offset_) { || holder_->size() < product(dims_) * sizeof(T) + offset_) {
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), numel_ * sizeof(T))); boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} } else if (platform::is_gpu_place(place)) {
#ifdef __CUDACC__ #ifdef __CUDACC__
else if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), numel_ * sizeof(T))); boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
}
#else #else
else if (platform::is_gpu_place(place)) { PADDLE_ENFORCE(true, "'GPUPlace' is not supported in CPU only device.");
PADDLE_ENFORCE(true, "GPU not support!");
}
#endif #endif
} else {
PADDLE_ENFORCE(true, "Unknown 'place'.");
}
offset_ = 0; offset_ = 0;
} }
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
...@@ -98,7 +95,7 @@ class Tensor { ...@@ -98,7 +95,7 @@ class Tensor {
// flat to rank = 1 // flat to rank = 1
template <typename T> template <typename T>
typename TTypes<T>::Flat flat() { typename TTypes<T>::Flat flat() {
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)})); return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
} }
// to TensorType Vec // to TensorType Vec
...@@ -129,7 +126,7 @@ class Tensor { ...@@ -129,7 +126,7 @@ class Tensor {
template <typename T> template <typename T>
typename TTypes<T>::ConstFlat flat() const { typename TTypes<T>::ConstFlat flat() const {
return shaped<T, 1>(make_ddim({static_cast<int>(numel_)})); return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
} }
template <typename T> template <typename T>
...@@ -151,12 +148,12 @@ class Tensor { ...@@ -151,12 +148,12 @@ class Tensor {
} }
template <typename T> template <typename T>
void CopyFrom(const Tensor& src, paddle::platform::Place dst_place) { void CopyFrom(const Tensor& src, platform::Place dst_place) {
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) && PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
platform::is_cpu_place(dst_place), platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now."); "Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>(); src.CheckDims<T>();
size_t size = src.numel_ * sizeof(T); size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims()); set_dims(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>()); const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place)); void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
...@@ -190,7 +187,6 @@ class Tensor { ...@@ -190,7 +187,6 @@ class Tensor {
return; return;
} }
dims_ = dims; dims_ = dims;
numel_ = product(dims_);
} }
DDim dims() const { return dims_; } DDim dims() const { return dims_; }
...@@ -201,7 +197,7 @@ class Tensor { ...@@ -201,7 +197,7 @@ class Tensor {
struct Placeholder { struct Placeholder {
virtual ~Placeholder() {} virtual ~Placeholder() {}
virtual void* ptr() const = 0; virtual void* ptr() const = 0;
virtual paddle::platform::Place place() const = 0; virtual platform::Place place() const = 0;
virtual size_t size() const = 0; virtual size_t size() const = 0;
}; };
...@@ -212,9 +208,7 @@ class Tensor { ...@@ -212,9 +208,7 @@ class Tensor {
class Deleter { class Deleter {
public: public:
Deleter(PType place) : place_(place) {} Deleter(PType place) : place_(place) {}
void operator()(T* ptr) { void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
paddle::memory::Free(place_, static_cast<void*>(ptr));
}
private: private:
PType place_; PType place_;
...@@ -222,17 +216,17 @@ class Tensor { ...@@ -222,17 +216,17 @@ class Tensor {
public: public:
PlaceholderImpl(PlaceType place, size_t size) PlaceholderImpl(PlaceType place, size_t size)
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)), : ptr_(static_cast<T*>(memory::Alloc(place, size)),
Deleter<PlaceType>(place)), Deleter<PlaceType>(place)),
place_(place), place_(place),
size_(size) {} size_(size) {}
virtual void* ptr() const { return static_cast<void*>(ptr_.get()); } virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
virtual paddle::platform::Place place() const { return place_; } virtual platform::Place place() const { return place_; }
std::unique_ptr<T, Deleter<PlaceType>> ptr_; std::unique_ptr<T, Deleter<PlaceType>> ptr_;
paddle::platform::Place place_; // record the place of ptr_. platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block. size_t size_; // size of the memory block.
}; };
...@@ -240,14 +234,13 @@ class Tensor { ...@@ -240,14 +234,13 @@ class Tensor {
inline void CheckDims() const { inline void CheckDims() const {
PADDLE_ENFORCE(holder_ != nullptr, PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first."); "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= numel_ * sizeof(T) + offset_, PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."); "first to re-allocate memory.");
} }
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated. std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_; DDim dims_;
size_t numel_; // cache of `product(dims_)`
size_t offset_; // marks the begin of tensor data area. size_t offset_; // marks the begin of tensor data area.
}; };
......
...@@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) { ...@@ -47,7 +47,7 @@ TEST(Tensor, DataAssert) {
/* following tests are not available at present /* following tests are not available at present
because Memory::Alloc() and Memory::Free() have not been ready. because Memory::Alloc() and Memory::Free() have not been ready.
*/
TEST(Tensor, MutableData) { TEST(Tensor, MutableData) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
...@@ -72,7 +72,7 @@ TEST(Tensor, MutableData) { ...@@ -72,7 +72,7 @@ TEST(Tensor, MutableData) {
p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace()); p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifdef __CUDACC__
{ {
Tensor src_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
...@@ -94,6 +94,7 @@ TEST(Tensor, MutableData) { ...@@ -94,6 +94,7 @@ TEST(Tensor, MutableData) {
p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), GPUPlace()); p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), GPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#endif
} }
TEST(Tensor, ShareDataFrom) { TEST(Tensor, ShareDataFrom) {
...@@ -108,9 +109,11 @@ TEST(Tensor, ShareDataFrom) { ...@@ -108,9 +109,11 @@ TEST(Tensor, ShareDataFrom) {
dst_tensor.ShareDataFrom<float>(src_tensor); dst_tensor.ShareDataFrom<float>(src_tensor);
} catch (EnforceNotMet err) { } catch (EnforceNotMet err) {
caught = true; caught = true;
std::string msg = "Tenosr holds no memory. Call Tensor::mutable_data std::string msg =
first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); "Tenosr holds no memory. Call Tensor::mutable_data first.";
++i) { ASSERT_EQ(what[i], msg[i]); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
} }
} }
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
...@@ -120,6 +123,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ...@@ -120,6 +123,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length();
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#ifdef __CUDACC__
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
...@@ -127,6 +131,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ...@@ -127,6 +131,7 @@ first."; const char* what = err.what(); for (size_t i = 0; i < msg.length();
dst_tensor.ShareDataFrom<int>(src_tensor); dst_tensor.ShareDataFrom<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#endif
} }
TEST(Tensor, Slice) { TEST(Tensor, Slice) {
...@@ -155,6 +160,7 @@ TEST(Tensor, Slice) { ...@@ -155,6 +160,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
} }
#ifdef __CUDACC__
{ {
Tensor src_tensor; Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace()); src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
...@@ -176,6 +182,7 @@ TEST(Tensor, Slice) { ...@@ -176,6 +182,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address);
EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address);
} }
#endif
} }
TEST(Tensor, CopyFrom) { TEST(Tensor, CopyFrom) {
...@@ -203,4 +210,3 @@ TEST(Tensor, CopyFrom) { ...@@ -203,4 +210,3 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
} }
*/
\ No newline at end of file
...@@ -11,7 +11,6 @@ if(WITH_GPU) ...@@ -11,7 +11,6 @@ if(WITH_GPU)
endif() endif()
if(USE_NNPACK) if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING) if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/function/ConvOp.h" #include "paddle/function/ConvOp.h"
DEFINE_bool(nnpack_allocate_outside, DEFINE_bool(nnpack_allocate_outside,
false, true,
"Allocate and free workspace memory outside the NNPACK interface."); "Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads, DEFINE_int32(nnpack_num_threads,
0, 0,
...@@ -58,18 +58,10 @@ public: ...@@ -58,18 +58,10 @@ public:
workspaceBuffer_ = nullptr; workspaceBuffer_ = nullptr;
workspaceSize_ = 0; workspaceSize_ = 0;
threadpool_ = nullptr; create_nnpack_threadpool();
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
} }
~NNPACKConvFunction() { ~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) { if (workspaceBuffer_) {
free(workspaceBuffer_); free(workspaceBuffer_);
} }
...@@ -225,14 +217,25 @@ public: ...@@ -225,14 +217,25 @@ public:
} }
} }
static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}
private: private:
nnp_convolution_algorithm algorithm_; nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_; nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_; void* workspaceBuffer_;
size_t workspaceSize_; size_t workspaceSize_;
pthreadpool_t threadpool_; static pthreadpool_t threadpool_;
}; };
template <DeviceType Device>
pthreadpool_t NNPACKConvFunction<Device>::threadpool_ = nullptr;
REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);
} // namespace paddle } // namespace paddle
...@@ -63,6 +63,23 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -63,6 +63,23 @@ All parameter, weight, gradient are variables in Paddle.
} }
return ret_values; return ret_values;
}); });
m.def_submodule(
"var_names",
"The module will return special predefined variable name in Paddle")
.def("empty", pd::OperatorBase::EMPTY_VAR_NAME)
.def("temp", pd::OperatorBase::TMP_VAR_NAME);
py::class_<pd::OperatorBase, pd::OperatorPtr>(m, "Operator")
.def("__str__", &pd::OperatorBase::DebugString)
.def_static("create", [](const std::string& protobin) {
pd::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
return pd::OpRegistry::CreateOp(desc);
});
return m.ptr(); return m.ptr();
} }
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
import cStringIO
def get_all_op_protos(): def get_all_op_protos():
"""
Get all registered op proto from Paddle C++
:return: list of OpProto
"""
protostrs = core.get_all_op_protos() protostrs = core.get_all_op_protos()
ret_values = [] ret_values = []
for pbstr in protostrs: for pbstr in protostrs:
op_proto = op_proto_pb2.OpProto.FromString(str(pbstr)) op_proto = op_proto_pb2.OpProto.FromString(str(pbstr))
ret_values.append(op_proto) ret_values.append(op_proto)
return ret_values return ret_values
class OpDescCreationMethod(object):
"""
A Functor object to convert user input(use key word args) to OpDesc based on
OpProto.
:param op_proto: The OpProto object.
:type op_proto: op_proto_pb2.OpProto
"""
def __init__(self, op_proto):
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Argument should be OpProto")
self.__op_proto__ = op_proto
def __call__(self, *args, **kwargs):
"""
Convert user input to OpDesc. Only key-word args are supported.
:return: OpDesc based on user input
:rtype: op_desc_pb2.OpDesc
"""
if len(args) != 0:
raise ValueError("Only keyword arguments is supported by Paddle")
op_desc = op_desc_pb2.OpDesc()
# Inputs
ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output(
"input", kwargs, self.__op_proto__.inputs)
op_desc.inputs.extend(ipts)
if ipt_format is not None:
op_desc.attrs.extend([ipt_format])
# Outputs
outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output(
"output", kwargs, self.__op_proto__.outputs)
op_desc.outputs.extend(outs)
if out_format is not None:
op_desc.attrs.extend([out_format])
if len(tmp_index) != 0:
tmp_index_attr = op_desc.attrs.add()
tmp_index_attr.type = attr_type_pb2.INTS
tmp_index_attr.name = "temporary_index"
tmp_index_attr.ints.extend(tmp_index)
# Types
op_desc.type = self.__op_proto__.type
# Attrs
for attr in self.__op_proto__.attrs:
if attr.generated:
continue
user_defined_attr = kwargs.get(attr.name, None)
if user_defined_attr is not None:
new_attr = op_desc.attrs.add()
new_attr.name = attr.name
new_attr.type = attr.type
if attr.type == attr_type_pb2.INT:
new_attr.i = user_defined_attr
elif attr.type == attr_type_pb2.FLOAT:
new_attr.f = user_defined_attr
elif attr.type == attr_type_pb2.STRING:
new_attr.s = user_defined_attr
elif attr.type == attr_type_pb2.INTS:
new_attr.ints.extend(user_defined_attr)
elif attr.type == attr_type_pb2.FLOATS:
new_attr.floats.extend(user_defined_attr)
elif attr.type == attr_type_pb2.STRINGS:
new_attr.strings.extend(user_defined_attr)
else:
raise NotImplementedError("Not support attribute type " +
attr.type)
return op_desc
@staticmethod
def extract_input_or_output(in_out, kwargs, meta):
"""
Extract input variable names or output variable names from key-word
arguments, which base on VarProtos.
:param in_out: "input" or "output"
:param kwargs: key-word arguments that user inputted.
:param meta: a list of VarProto
:return: The three object will be return. The variable names. The
input_format or output_format attribute(None if the input or output is
not multiple). The temporary variable index list.
"""
multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta))
tmp_index = []
retv = []
if multiple:
var_format = op_desc_pb2.AttrDesc()
var_format.type = attr_type_pb2.INTS
var_format.name = "%s_format" % in_out
var_format.ints.append(0)
for var in meta:
var_name = var.name
if var.temporary:
var_name = [core.var_names.temp()]
tmp_index.append(len(retv))
else:
var_name = kwargs.get(var_name, [])
if not isinstance(var_name, list):
var_name = [var_name]
retv.extend(var_name)
var_format.ints.append(len(var_name) + var_format.ints[-1])
return retv, var_format, tmp_index
else:
for var in meta:
if var.temporary:
retv.append(kwargs.get(var.name, core.var_names.temp()))
tmp_index.append(len(retv))
else:
retv.append(kwargs.get(var.name, core.var_names.empty()))
return retv, None, tmp_index
@staticmethod
def any_is_true(generator):
"""
Reduce a bool array to one. If any of them is True, then return True.
"""
for flag in generator:
if flag:
return True
return False
def get_docstring_from_op_proto(op_proto):
"""
Generate docstring from a OpProto
:param op_proto: a OpProto instance.
:type op_proto: op_proto_pb2.OpProto
:return: docstring
"""
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Input must be OpProto")
f = cStringIO.StringIO()
f.write(op_proto.comment)
f.write("\n")
def __append_param__(name, comment, type):
# Maybe replace the following line with template engine is better.
f.write(":param ")
f.write(name)
f.write(": ")
f.write(comment)
f.write("\n")
f.write(":type ")
f.write(name)
f.write(": ")
f.write(type)
f.write("\n")
for ipt in op_proto.inputs:
__append_param__(ipt.name, ipt.comment, "list | basestr"
if ipt.multiple else "basestr")
temp_var_prefix = \
"This is a temporary variable. It does not have to set by user. "
for opt in op_proto.outputs:
__append_param__(opt.name, opt.comment if not opt.temporary else
temp_var_prefix + opt.comment, "list | basestr"
if opt.multiple else "basestr")
for attr in op_proto.attrs:
attr_type = None
if attr.type == attr_type_pb2.INT:
attr_type = "int"
elif attr.type == attr_type_pb2.FLOAT:
attr_type = "float"
elif attr.type == attr_type_pb2.STRING:
attr_type = "basestr"
elif attr.type == attr_type_pb2.INTS:
attr_type = "list of int"
elif attr.type == attr_type_pb2.FLOATS:
attr_type = "list of float"
elif attr.type == attr_type_pb2.STRINGS:
attr_type = "list of basestr"
if attr_type is None:
raise RuntimeError("Not supported attribute type " + attr.type)
__append_param__(attr.name, attr.comment, attr_type)
return f.getvalue()
def create_op_creation_method(op_proto):
"""
Generate op creation method for an OpProto
"""
method = OpDescCreationMethod(op_proto)
def __impl__(*args, **kwargs):
opdesc = method(*args, **kwargs)
return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto)
return __impl__
class OpCreationsHolder(object):
"""
A object will holds all op creation methods.
Use `op_creations.xxx_op` to access them.
"""
pass
op_creations = OpCreationsHolder()
def __bootstrap__():
"""
Bootstrap function for this module. It will dynamic create all op creation
methods in runtime.
"""
for op_proto in get_all_op_protos():
func = create_op_creation_method(op_proto)
func.__name__ = str(op_proto.type)
setattr(op_creations, func.__name__, func)
__bootstrap__()
import unittest import unittest
import paddle.v2.framework.create_op_creation_methods as creation import paddle.v2.framework.create_op_creation_methods as creation
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
class TestOpCreationsMethods(unittest.TestCase): class TestGetAllProtos(unittest.TestCase):
def test_all_protos(self): def test_all(self):
all_protos = creation.get_all_op_protos() all_protos = creation.get_all_op_protos()
self.assertNotEqual(0, len(all_protos)) self.assertNotEqual(0, len(all_protos))
...@@ -11,5 +15,240 @@ class TestOpCreationsMethods(unittest.TestCase): ...@@ -11,5 +15,240 @@ class TestOpCreationsMethods(unittest.TestCase):
self.assertTrue(each.IsInitialized()) self.assertTrue(each.IsInitialized())
class TestOpDescCreationMethod(unittest.TestCase):
def test_plain_input_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = "not matter"
ipt = op.inputs.add()
ipt.name = "Y"
ipt.comment = "not matter"
opt = op.outputs.add()
opt.name = "Z"
opt.comment = "not matter"
op.comment = "not matter"
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
output = method(X="a", Y="b", Z="c")
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(["a", "b"])
expected.outputs.append("c")
self.assertEqual(expected, output)
def test_multiple_input_plain_output(self):
op = op_proto_pb2.OpProto()
op.type = "fc"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "W"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "b"
ipt.comment = ""
out = op.outputs.add()
out.name = "Y"
out.comment = ""
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated1 = method(X="x", W="w", b="b", Y="y")
expected1 = op_desc_pb2.OpDesc()
expected1.inputs.extend(['x', 'w', 'b'])
expected1.outputs.extend(['y'])
expected1.type = 'fc'
attr = expected1.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3])
self.assertEqual(expected1, generated1)
generated2 = method(
X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y')
expected2 = op_desc_pb2.OpDesc()
expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b'])
expected2.outputs.extend(['y'])
expected2.type = 'fc'
attr = expected2.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 3, 6, 7])
self.assertEqual(expected2, generated2)
def test_attrs(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = 'X'
ipt.comment = ""
def __add_attr__(name, type):
attr = op.attrs.add()
attr.name = name
attr.comment = ""
attr.type = type
__add_attr__("int_attr", attr_type_pb2.INT)
__add_attr__("float_attr", attr_type_pb2.FLOAT)
__add_attr__("string_attr", attr_type_pb2.STRING)
__add_attr__("ints_attr", attr_type_pb2.INTS)
__add_attr__("floats_attr", attr_type_pb2.FLOATS)
__add_attr__("strings_attr", attr_type_pb2.STRINGS)
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated = method(
X="a",
int_attr=10,
float_attr=3.2,
string_attr="test_str",
ints_attr=[0, 1, 2, 3, 4],
floats_attr=[0.2, 3.2, 4.5],
strings_attr=["a", "b", "c"])
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(['a'])
attr = expected.attrs.add()
attr.name = "int_attr"
attr.type = attr_type_pb2.INT
attr.i = 10
attr = expected.attrs.add()
attr.name = "float_attr"
attr.type = attr_type_pb2.FLOAT
attr.f = 3.2
attr = expected.attrs.add()
attr.name = "string_attr"
attr.type = attr_type_pb2.STRING
attr.s = "test_str"
attr = expected.attrs.add()
attr.name = "ints_attr"
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3, 4])
attr = expected.attrs.add()
attr.name = 'floats_attr'
attr.type = attr_type_pb2.FLOATS
attr.floats.extend([0.2, 3.2, 4.5])
attr = expected.attrs.add()
attr.name = 'strings_attr'
attr.type = attr_type_pb2.STRINGS
attr.strings.extend(['a', 'b', 'c'])
self.assertEqual(expected, generated)
def test_input_temporary_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
out = op.outputs.add()
out.name = "OUT"
out.comment = ""
out = op.outputs.add()
out.name = "TMP"
out.comment = ""
out.temporary = True
out = op.outputs.add()
out.name = "OUT2"
out.comment = ""
op.comment = ""
method = creation.OpDescCreationMethod(op)
generated = method(OUT="a", OUT2="b")
desc = op_desc_pb2.OpDesc()
desc.outputs.extend(["a", core.var_names.temp(), "b"])
desc.type = "test"
attr = desc.attrs.add()
attr.name = "temporary_index"
attr.type = attr_type_pb2.INTS
attr.ints.append(2)
self.assertEqual(generated, desc)
class TestOpCreationDocStr(unittest.TestCase):
def test_all(self):
op = op_proto_pb2.OpProto()
op.type = "test"
op.comment = """Test Op.
This op is used for unit test, not a real op.
"""
a = op.inputs.add()
a.name = "a"
a.comment = "Input a for test op"
a.multiple = True
b = op.inputs.add()
b.name = "b"
b.comment = "Input b for test op"
self.assertTrue(op.IsInitialized())
o1 = op.outputs.add()
o1.name = "output"
o1.comment = "The output of test op"
o2 = op.outputs.add()
o2.name = "temp output"
o2.comment = "The temporary output of test op"
o2.temporary = True
test_str = op.attrs.add()
test_str.name = "str_attr"
test_str.type = attr_type_pb2.STRING
test_str.comment = "A string attribute for test op"
actual = creation.get_docstring_from_op_proto(op)
expected_docstring = '''Test Op.
This op is used for unit test, not a real op.
:param a: Input a for test op
:type a: list | basestr
:param b: Input b for test op
:type b: basestr
:param output: The output of test op
:type output: basestr
:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op
:type temp output: basestr
:param str_attr: A string attribute for test op
:type str_attr: basestr
'''
self.assertEqual(expected_docstring, actual)
class TestOpCreations(unittest.TestCase):
def test_all(self):
add_op = creation.op_creations.add_two(X="a", Y="b", Out="z")
self.assertIsNotNone(add_op)
# Invoke C++ DebugString()
self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).',
str(add_op))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.optimizers as v1_optimizers
""" """
...@@ -17,6 +16,7 @@ __all__ = [ ...@@ -17,6 +16,7 @@ __all__ = [
class Optimizer(object): class Optimizer(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
import py_paddle.swig_paddle as swig_api
if 'batch_size' in kwargs: if 'batch_size' in kwargs:
del kwargs['batch_size'] # not important for python library. del kwargs['batch_size'] # not important for python library.
...@@ -35,18 +35,22 @@ class Optimizer(object): ...@@ -35,18 +35,22 @@ class Optimizer(object):
For each optimizer(SGD, Adam), GradientMachine should enable different For each optimizer(SGD, Adam), GradientMachine should enable different
buffers. buffers.
""" """
import py_paddle.swig_paddle as swig_api
tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__)
assert isinstance(tmp, swig_api.ParameterOptimizer) assert isinstance(tmp, swig_api.ParameterOptimizer)
return tmp.getParameterTypes() return tmp.getParameterTypes()
def __create_local_updater__(self): def __create_local_updater__(self):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__)
def __create_remote_updater__(self, pass_num, use_sparse_updater): def __create_remote_updater__(self, pass_num, use_sparse_updater):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createRemoteUpdater( return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater) self.__opt_conf__, pass_num, use_sparse_updater)
def __create_new_remote_updater__(self, pserver_spec, use_etcd): def __create_new_remote_updater__(self, pserver_spec, use_etcd):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createNewRemoteUpdater( return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec, use_etcd) self.__opt_conf__, pserver_spec, use_etcd)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册