提交 31de6095 编写于 作者: P peizhilin

Merge branch 'windows/build' into windows/online

test=develop
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
| kexinzhao | Ke-Xin Zhao | | kexinzhao | Ke-Xin Zhao |
| kuke | Yi-Bing Liu | | kuke | Yi-Bing Liu |
| lcy-seso | Ying Cao | | lcy-seso | Ying Cao |
| cjld | Dun Liang |
| lipeng-unisound | Peng Li | | lipeng-unisound | Peng Li |
| liuyuan | Yuan Liu | | liuyuan | Yuan Liu |
| livc | Zhao Li | | livc | Zhao Li |
......
...@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME) ...@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WIN32)
list(APPEND win32_deps shlwapi)
if("${cc_test_DEPS};" MATCHES "python;")
list(REMOVE_ITEM cc_test_DEPS python)
list(APPEND win32_deps ${PYTHON_LIBRARIES})
endif()
endif(WIN32)
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
if(WIN32) if(WIN32)
target_link_libraries(${TARGET_NAME} shlwapi) target_link_libraries(${TARGET_NAME} ${win32_deps})
endif(WIN32) endif(WIN32)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
...@@ -679,7 +686,7 @@ function(py_test TARGET_NAME) ...@@ -679,7 +686,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true FLAGS_cpu_deterministic=true
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
......
...@@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's ...@@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
......
...@@ -23,7 +23,7 @@ namespace paddle { ...@@ -23,7 +23,7 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
...@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() { ...@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
} }
if (platform::is_gpu_place(lod_tensors[0]->place())) { if (platform::is_gpu_place(lod_tensors[0]->place())) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr."); PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr.");
int dtype = -1; int dtype = -1;
size_t numel = 0; size_t numel = 0;
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -29,7 +29,7 @@ namespace framework { ...@@ -29,7 +29,7 @@ namespace framework {
namespace details { namespace details {
struct AllReduceOpHandle : public OpHandleBase { struct AllReduceOpHandle : public OpHandleBase {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes, AllReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs); const platform::NCCLContextMap *ctxs);
...@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase { ...@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
private: private:
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
#endif #endif
}; };
......
...@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
}); });
} }
} else { } else {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VarHandle *out_handle = nullptr; VarHandle *out_handle = nullptr;
int root_id = boost::get<platform::CUDAPlace>(in_tensor.place()).device; int root_id = boost::get<platform::CUDAPlace>(in_tensor.place()).device;
std::vector<std::function<void()>> broadcast_calls; std::vector<std::function<void()>> broadcast_calls;
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -34,7 +34,7 @@ namespace details { ...@@ -34,7 +34,7 @@ namespace details {
struct BroadcastOpHandle : public OpHandleBase { struct BroadcastOpHandle : public OpHandleBase {
public: public:
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
BroadcastOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes, BroadcastOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const platform::NCCLContextMap *nccl_ctxs) const platform::NCCLContextMap *nccl_ctxs)
...@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase { ...@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
#endif #endif
......
...@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle { ...@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
std::vector<std::unique_ptr<ir::Node>> nodes_; std::vector<std::unique_ptr<ir::Node>> nodes_;
std::vector<p::Place> place_list_; std::vector<p::Place> place_list_;
bool use_gpu_; bool use_gpu_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
...@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle { ...@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
for (size_t j = 0; j < ctxs_.size(); ++j) { for (size_t j = 0; j < ctxs_.size(); ++j) {
ctxs_[j]->Wait(); ctxs_[j]->Wait();
} }
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_) { if (nccl_ctxs_) {
nccl_ctxs_->WaitAll(); nccl_ctxs_->WaitAll();
} }
...@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle { ...@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
void InitCtxOnGpu(bool use_gpu) { void InitCtxOnGpu(bool use_gpu) {
use_gpu_ = use_gpu; use_gpu_ = use_gpu;
if (use_gpu_) { if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int count = p::GetCUDADeviceCount(); int count = p::GetCUDADeviceCount();
if (count <= 1) { if (count <= 1) {
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
...@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle { ...@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
place_list_.push_back(p); place_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new p::CPUDeviceContext(p));
} }
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_.reset(nullptr); nccl_ctxs_.reset(nullptr);
#endif #endif
} }
...@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle { ...@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
nodes_.emplace_back( nodes_.emplace_back(
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation)); ir::CreateNodeForTest("node0", ir::Node::Type::kOperation));
if (use_gpu_) { if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
place_list_, nccl_ctxs_.get()); place_list_, nccl_ctxs_.get());
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW("CUDA is not support.");
#endif #endif
} else { } else {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
place_list_, nccl_ctxs_.get()); place_list_, nccl_ctxs_.get());
#else #else
......
...@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply( ...@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const std::string &loss_var_name, const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names, const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const { const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
#else #else
const bool use_cuda) const { const bool use_cuda) const {
...@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply( ...@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass->Erase("local_scopes"); pass->Erase("local_scopes");
pass->SetNotOwned<const std::vector<Scope *>>("local_scopes", pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
&local_scopes); &local_scopes);
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr; platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
pass->Erase("nccl_ctxs"); pass->Erase("nccl_ctxs");
pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx); pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -98,7 +98,7 @@ struct BuildStrategy { ...@@ -98,7 +98,7 @@ struct BuildStrategy {
const std::string &loss_var_name, const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names, const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes, const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const; const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
#else #else
const bool use_cuda) const; const bool use_cuda) const;
......
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle::DataBalanceOpHandle( DataBalanceOpHandle::DataBalanceOpHandle(
ir::Node *node, const std::vector<Scope *> &local_scopes, ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -29,7 +29,7 @@ namespace details { ...@@ -29,7 +29,7 @@ namespace details {
struct DataBalanceOpHandle : public OpHandleBase { struct DataBalanceOpHandle : public OpHandleBase {
public: public:
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes, DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs); const platform::NCCLContextMap *ctxs);
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -35,7 +35,7 @@ namespace details { ...@@ -35,7 +35,7 @@ namespace details {
struct FusedBroadcastOpHandle : public BroadcastOpHandle { struct FusedBroadcastOpHandle : public BroadcastOpHandle {
public: public:
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
FusedBroadcastOpHandle(ir::Node *node, FusedBroadcastOpHandle(ir::Node *node,
const std::vector<Scope *> local_scopes, const std::vector<Scope *> local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
......
...@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { ...@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
nodes_.emplace_back( nodes_.emplace_back(
ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation)); ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation));
if (use_gpu_) { if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new FusedBroadcastOpHandle( op_handle_ = new FusedBroadcastOpHandle(
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
#else #else
PADDLE_THROW("CUDA is not supported."); PADDLE_THROW("CUDA is not supported.");
#endif #endif
} else { } else {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new FusedBroadcastOpHandle( op_handle_ = new FusedBroadcastOpHandle(
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
#else #else
......
...@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const { ...@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
places_ = Get<const std::vector<platform::Place>>(kPlaces); places_ = Get<const std::vector<platform::Place>>(kPlaces);
local_scopes_ = Get<const std::vector<Scope *>>(kLocalScopes); local_scopes_ = Get<const std::vector<Scope *>>(kLocalScopes);
strategy_ = Get<const BuildStrategy>(kStrategy); strategy_ = Get<const BuildStrategy>(kStrategy);
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_ = &Get<platform::NCCLContextMap>("nccl_ctxs"); nccl_ctxs_ = &Get<platform::NCCLContextMap>("nccl_ctxs");
#endif #endif
...@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
} }
} }
bool use_gpu = false; bool use_gpu = false;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
use_gpu = nccl_ctxs_ != nullptr; use_gpu = nccl_ctxs_ != nullptr;
#endif #endif
...@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const { ...@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
void MultiDevSSAGraphBuilder::SetCommunicationContext( void MultiDevSSAGraphBuilder::SetCommunicationContext(
OpHandleBase *op_handle, const platform::Place &p) const { OpHandleBase *op_handle, const platform::Place &p) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_ == nullptr) { if (nccl_ctxs_ == nullptr) {
op_handle->SetDeviceContext(p, op_handle->SetDeviceContext(p,
platform::DeviceContextPool::Instance().Get(p)); platform::DeviceContextPool::Instance().Get(p));
...@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext( ...@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result, void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
const std::string &p_name, const std::string &p_name,
size_t src_dev_id) const { size_t src_dev_id) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *op_handle = new BroadcastOpHandle( auto *op_handle = new BroadcastOpHandle(
result->CreateEmptyNode("broadcast", ir::Node::Type::kOperation), result->CreateEmptyNode("broadcast", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_); local_scopes_, places_, nccl_ctxs_);
...@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result, ...@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::CreateFusedBroadcastOp( void MultiDevSSAGraphBuilder::CreateFusedBroadcastOp(
ir::Graph *result, ir::Graph *result,
const std::vector<std::unordered_set<std::string>> &bcast_varnames) const { const std::vector<std::unordered_set<std::string>> &bcast_varnames) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *op_handle = new FusedBroadcastOpHandle( auto *op_handle = new FusedBroadcastOpHandle(
result->CreateEmptyNode("fused_broadcast", ir::Node::Type::kOperation), result->CreateEmptyNode("fused_broadcast", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_); local_scopes_, places_, nccl_ctxs_);
...@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result, ...@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result, void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
const std::string &og) const { const std::string &og) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle( result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle(
result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation), result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_)); local_scopes_, places_, nccl_ctxs_));
...@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result, ...@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::InsertDataBalanceOp( void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
ir::Graph *result, const std::vector<std::string> &datas) const { ir::Graph *result, const std::vector<std::string> &datas) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new DataBalanceOpHandle( result->Get<GraphOps>(kGraphOps).emplace_back(new DataBalanceOpHandle(
result->CreateEmptyNode("data_balance", ir::Node::Type::kOperation), result->CreateEmptyNode("data_balance", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_)); local_scopes_, places_, nccl_ctxs_));
...@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result, ...@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result, VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
const std::string &og, const std::string &og,
int dst_dev_id) const { int dst_dev_id) const {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new ReduceOpHandle( result->Get<GraphOps>(kGraphOps).emplace_back(new ReduceOpHandle(
result->CreateEmptyNode("reduce", ir::Node::Type::kOperation), result->CreateEmptyNode("reduce", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_)); local_scopes_, places_, nccl_ctxs_));
......
...@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass { ...@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t device_id) const; size_t device_id) const;
void Init() const; void Init() const;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
mutable platform::NCCLContextMap *nccl_ctxs_; mutable platform::NCCLContextMap *nccl_ctxs_;
#endif #endif
......
...@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
} }
}); });
} else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) { } else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto pre_in = pre_in_var->Get<framework::LoDTensor>(); auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data( VariableVisitor::GetMutableTensor(out_var).mutable_data(
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
...@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase { ...@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
std::vector<Scope *> local_scopes_; std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
ReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes, ReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places, const std::vector<platform::Place> &places,
......
...@@ -35,7 +35,7 @@ struct TestReduceOpHandle { ...@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
std::vector<p::Place> gpu_list_; std::vector<p::Place> gpu_list_;
std::vector<std::unique_ptr<p::DeviceContext>> ctxs_; std::vector<std::unique_ptr<p::DeviceContext>> ctxs_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
...@@ -43,7 +43,7 @@ struct TestReduceOpHandle { ...@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
for (size_t j = 0; j < ctxs_.size(); ++j) { for (size_t j = 0; j < ctxs_.size(); ++j) {
ctxs_[j]->Wait(); ctxs_[j]->Wait();
} }
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_) { if (nccl_ctxs_) {
nccl_ctxs_->WaitAll(); nccl_ctxs_->WaitAll();
} }
...@@ -53,7 +53,7 @@ struct TestReduceOpHandle { ...@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
void InitCtxOnGpu(bool use_gpu) { void InitCtxOnGpu(bool use_gpu) {
use_gpu_ = use_gpu; use_gpu_ = use_gpu;
if (use_gpu) { if (use_gpu) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int count = p::GetCUDADeviceCount(); int count = p::GetCUDADeviceCount();
if (count <= 1) { if (count <= 1) {
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA " LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
...@@ -77,7 +77,7 @@ struct TestReduceOpHandle { ...@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p); gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new p::CPUDeviceContext(p));
} }
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_.reset(nullptr); nccl_ctxs_.reset(nullptr);
#endif #endif
} }
...@@ -99,14 +99,14 @@ struct TestReduceOpHandle { ...@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
nodes.emplace_back(new ir::Node("node")); nodes.emplace_back(new ir::Node("node"));
if (use_gpu_) { if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
gpu_list_, nccl_ctxs_.get())); gpu_list_, nccl_ctxs_.get()));
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW("CUDA is not support.");
#endif #endif
} else { } else {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
gpu_list_, nccl_ctxs_.get())); gpu_list_, nccl_ctxs_.get()));
#else #else
......
...@@ -15,7 +15,10 @@ ...@@ -15,7 +15,10 @@
#include "paddle/fluid/framework/ir/is_test_pass.h" #include "paddle/fluid/framework/ir/is_test_pass.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#ifdef _WIN32
#undef FALSE
#undef TRUE
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace ir { namespace ir {
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) { ...@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) { for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) {
LOG(INFO) << "data: " LOG(INFO) << "data: "
<< static_cast<float*>(outputs.front().data.data())[i]; << static_cast<float*>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i], PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
......
...@@ -15,10 +15,6 @@ ...@@ -15,10 +15,6 @@
#pragma once #pragma once
#include <glog/logging.h> #include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#else
#endif
#include <algorithm> #include <algorithm>
#include <chrono> // NOLINT #include <chrono> // NOLINT
...@@ -28,6 +24,7 @@ ...@@ -28,6 +24,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h> #include <time.h>
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <sys/time.h>
#include <time.h> #include <time.h>
#include <fstream> #include <fstream>
#include <thread> // NOLINT #include <thread> // NOLINT
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
......
...@@ -30,23 +30,23 @@ using std::endl; ...@@ -30,23 +30,23 @@ using std::endl;
void CreateInput(LoDTensor* ids, LoDTensor* scores) { void CreateInput(LoDTensor* ids, LoDTensor* scores) {
LoD lod; LoD lod;
vector<size_t> level0({0, 2, 4}); vector<size_t> level0{0, 2, 4};
vector<size_t> level1({0, 1, 2, 3, 4}); vector<size_t> level1{0, 1, 2, 3, 4};
lod.push_back(level0); lod.push_back(level0);
lod.push_back(level1); lod.push_back(level1);
ids->set_lod(lod); ids->set_lod(lod);
scores->set_lod(lod); scores->set_lod(lod);
auto dims = framework::make_ddim(vector<int64_t>({4, 3})); auto dims = framework::make_ddim(vector<int64_t>{4, 3});
ids->Resize(dims); ids->Resize(dims);
scores->Resize(dims); scores->Resize(dims);
CPUPlace place; CPUPlace place;
auto* ids_data = ids->mutable_data<int64_t>(place); auto* ids_data = ids->mutable_data<int64_t>(place);
auto* scores_data = scores->mutable_data<float>(place); auto* scores_data = scores->mutable_data<float>(place);
vector<int64_t> _ids({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1}); vector<int64_t> _ids{4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1};
vector<float> _scores( vector<float> _scores{0.5f, 0.3f, 0.2f, 0.6f, 0.3f, 0.1f,
{0.5, 0.3, 0.2, 0.6, 0.3, 0.1, 0.9, 0.5, 0.1, 0.7, 0.5, 0.1}); 0.9f, 0.5f, 0.1f, 0.7f, 0.5f, 0.1f};
for (int i = 0; i < 12; i++) { for (int i = 0; i < 12; i++) {
ids_data[i] = _ids[i]; ids_data[i] = _ids[i];
...@@ -79,8 +79,8 @@ TEST(DISABLED_beam_search_op, run) { ...@@ -79,8 +79,8 @@ TEST(DISABLED_beam_search_op, run) {
ASSERT_EQ(sids.lod(), sscores.lod()); ASSERT_EQ(sids.lod(), sscores.lod());
vector<int> tids({4, 2, 3, 8}); vector<int> tids{4, 2, 3, 8};
vector<float> tscores({0.5, 0.6, 0.9, 0.7}); vector<float> tscores{0.5f, 0.6f, 0.9f, 0.7f};
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
ASSERT_EQ(tids[i], sids.data<int64_t>()[i]); ASSERT_EQ(tids[i], sids.data<int64_t>()[i]);
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <sys/time.h>
#include <limits> #include <limits>
#include "glog/logging.h" // For VLOG #include "glog/logging.h" // For VLOG
...@@ -20,6 +19,7 @@ limitations under the License. */ ...@@ -20,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_client.h" #include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h" #include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <nccl.h> #include <nccl.h>
#endif #endif
#include <sys/time.h>
#include <thread> // NOLINT #include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/coded_stream.h"
...@@ -26,6 +25,7 @@ limitations under the License. */ ...@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_variable_response.h" #include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h" #include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <sys/time.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h" #include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h" #include "paddle/fluid/operators/distributed/send_recv.pb.h"
......
...@@ -15,12 +15,12 @@ limitations under the License. */ ...@@ -15,12 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <nccl.h> #include <nccl.h>
#endif #endif
#include <sys/time.h>
#include <thread> // NOLINT #include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/variable_response.h" #include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/platform/port.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <sys/time.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -24,6 +23,7 @@ limitations under the License. */ ...@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h" #include "paddle/fluid/operators/distributed/send_recv.pb.h"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/group_norm_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DataLayout = framework::DataLayout;
class GroupNormOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"),
"Output(Y) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Mean"),
"Output(Mean) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Variance"),
"Output(Variance) of GroupNormOp should not be null.");
auto x_dim = ctx->GetInputDim("X");
auto channel_num = x_dim[1];
auto batch_size = x_dim[0];
auto groups = ctx->Attrs().Get<int>("groups");
PADDLE_ENFORCE_LE(
groups, channel_num,
"'groups' must be less equal than the number of channels.");
PADDLE_ENFORCE_GE(groups, 1, "'groups' must be greater equal than 1.");
if (ctx->HasInput("Scale")) {
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], channel_num);
}
if (ctx->HasInput("Bias")) {
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], channel_num);
}
ctx->SetOutputDim("Y", ctx->GetInputDim("X"));
ctx->SetOutputDim("Mean", {batch_size, groups});
ctx->SetOutputDim("Variance", {batch_size, groups});
ctx->ShareLoD("X", "Y");
}
};
class GroupNormOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input tensor.");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C"
"that is applied to the output.")
.AsDispensable();
AddInput("Bias",
"Bias is a 1-dimensional tensor of size C "
"that is applied to the output")
.AsDispensable();
AddOutput("Y", "Result after normalization.");
AddOutput("Mean", "Mean of each group.").AsIntermediate();
AddOutput("Variance", "Variance of each group.").AsIntermediate();
AddAttr<float>("epsilon",
"Constant for numerical stability [default 1e-5].")
.SetDefault(1e-5)
.AddCustomChecker([](const float &epsilon) {
PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 1.0f,
"'epsilon' should be between 0.0 and 1.0.");
});
AddAttr<int>("groups", "The number of groups that divided from channels.")
.AddCustomChecker([](const int &groups) {
PADDLE_ENFORCE_GT(groups, 0, "'groups' should be greater than zero.");
});
AddComment(R"DOC(
Group Normalization
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_
)DOC");
}
};
class GroupNormGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
// check input
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Mean"),
"Input(Mean) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Variance"),
"Input(Variance) of GroupNormOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
"Input(Y@GRAD) of GroupNormOp should not be null.");
// check output
if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
if (ctx->HasOutput(framework::GradVarName("Scale"))) {
ctx->SetOutputDim(framework::GradVarName("Scale"),
ctx->GetInputDim("Scale"));
}
if (ctx->HasOutput(framework::GradVarName("Bias"))) {
ctx->SetOutputDim(framework::GradVarName("Bias"),
ctx->GetInputDim("Bias"));
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD");
}
const Tensor *t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
} else if (var->IsType<LoDTensor>()) {
t = &var->Get<LoDTensor>();
}
if (t == nullptr) {
PADDLE_THROW("can't find Y@GRAD");
}
return framework::OpKernelType(framework::ToDataType(t->type()),
ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(group_norm, ops::GroupNormOp, ops::GroupNormOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(group_norm_grad, ops::GroupNormGradOp);
REGISTER_OP_CPU_KERNEL(
group_norm, ops::GroupNormKernel<paddle::platform::CPUDeviceContext, float>,
ops::GroupNormKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
group_norm_grad,
ops::GroupNormGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GroupNormGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cub/cub.cuh>
#include "paddle/fluid/operators/group_norm_op.h"
namespace paddle {
namespace operators {
template <typename T>
__global__ void GroupNormForwardGetMeanAndVar(const T* x, int N, int C,
int imsize, int groups,
int group_size, T* mean, T* var) {
int gid = blockIdx.y;
int cid = blockIdx.x;
int bid = blockIdx.z;
int number = min(group_size, static_cast<int>(C - gid * group_size));
int ccid = gid * group_size + cid;
if (ccid >= C) return;
T x_mean = 0, x_var = 0;
for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) {
T val = x[(bid * C + ccid) * imsize + imid];
x_mean += val;
x_var += val * val;
}
x_mean /= number * imsize;
x_var /= number * imsize;
__shared__ T s_mem[2];
if (threadIdx.x == 0) {
s_mem[0] = s_mem[1] = 0;
}
__syncthreads();
paddle::platform::CudaAtomicAdd(&s_mem[0], x_mean);
paddle::platform::CudaAtomicAdd(&s_mem[1], x_var);
__syncthreads();
if (threadIdx.x == 0) {
paddle::platform::CudaAtomicAdd(&mean[bid * groups + gid], s_mem[0]);
paddle::platform::CudaAtomicAdd(&var[bid * groups + gid], s_mem[1]);
}
}
template <typename T>
__global__ void GroupNormForward(const T* x, const T* mean, const T* var,
const T* scale, const T* bias, int N, int C,
int imsize, int groups, int group_size,
T epsilon, T* y, T* real_var) {
int gid = blockIdx.y;
int cid = blockIdx.x;
int bid = blockIdx.z;
int ccid = gid * group_size + cid;
if (ccid >= C) return;
T x_mean = mean[bid * groups + gid];
T x_var = var[bid * groups + gid];
x_var = x_var - x_mean * x_mean;
T var_inv = 1.0 / sqrt(x_var + epsilon);
if (cid == 0 && threadIdx.x == 0) real_var[bid * groups + gid] = x_var;
for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) {
T val = x[(bid * C + ccid) * imsize + imid];
val = (val - x_mean) * var_inv;
if (scale) val *= scale[gid * group_size + cid];
if (bias) val += bias[gid * group_size + cid];
y[(bid * C + ccid) * imsize + imid] = val;
}
}
template <typename T>
class GroupNormKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const float epsilon = ctx.Attr<float>("epsilon");
auto* scale = ctx.Input<Tensor>("Scale");
auto* bias = ctx.Input<Tensor>("Bias");
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Output<Tensor>("Y");
auto* mean = ctx.Output<Tensor>("Mean");
auto* var = ctx.Output<Tensor>("Variance");
const auto groups = ctx.Attr<int>("groups");
const auto x_dims = x->dims();
const int group_size = (x_dims[1] - 1) / groups + 1;
y->mutable_data<T>(ctx.GetPlace());
mean->mutable_data<T>(ctx.GetPlace());
var->mutable_data<T>(ctx.GetPlace());
math::SetConstant<platform::CUDADeviceContext, T> set_zero;
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
Tensor temp_var;
temp_var.mutable_data<T>(var->dims(), ctx.GetPlace());
set_zero(dev_ctx, mean, static_cast<T>(0));
set_zero(dev_ctx, &temp_var, static_cast<T>(0));
auto* x_data = x->data<T>();
auto* y_data = y->data<T>();
auto* mean_data = mean->data<T>();
auto* var_data = var->data<T>();
auto* temp_var_data = temp_var.data<T>();
const T* scale_data = nullptr;
if (scale) scale_data = scale->data<T>();
const T* bias_data = nullptr;
if (bias) bias_data = bias->data<T>();
int imsize = x_dims[2] * x_dims[3];
int block_size = std::min(512, imsize);
dim3 grid(group_size, groups, x_dims[0]);
dim3 threads(block_size, 1, 1);
GroupNormForwardGetMeanAndVar<T><<<grid, threads, 0, dev_ctx.stream()>>>(
x_data, x_dims[0], x_dims[1], imsize, groups, group_size, mean_data,
temp_var_data);
GroupNormForward<T><<<grid, threads, 0, dev_ctx.stream()>>>(
x_data, mean_data, temp_var_data, scale_data, bias_data, x_dims[0],
x_dims[1], imsize, groups, group_size, epsilon, y_data, var_data);
}
};
template <typename T>
__global__ void GroupNormBackwardGetMeanAndVar(
const T* x, const T* mean, const T* var, const T* scale, const T* d_y,
int N, int C, int imsize, int groups, int group_size, T epsilon, T* d_x,
T* d_mean, T* d_var, T* d_scale, T* d_bias) {
int gid = blockIdx.y;
int cid = blockIdx.x;
int bid = blockIdx.z;
int number = min(group_size, static_cast<int>(C - gid * group_size));
int ccid = gid * group_size + cid;
if (ccid >= C) return;
T x_mean = mean[bid * groups + gid];
T x_var = var[bid * groups + gid];
T var_inv = 1.0 / sqrt(x_var + epsilon);
T d_var_inv = 0, d_x_mean = 0;
T d_mean_data = 0, d_var_data = 0, d_scale_data = 0, d_bias_data = 0;
for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) {
T tmp = x[(bid * C + ccid) * imsize + imid];
T val = (tmp - x_mean) * var_inv;
T dval = d_y[(bid * C + ccid) * imsize + imid];
if (d_bias) d_bias_data += dval;
if (d_scale) d_scale_data += val * dval;
if (scale) dval = dval * scale[ccid];
d_var_data += (tmp - x_mean) * dval;
T d_tmp = dval * var_inv;
if (d_x) d_x[(bid * C + ccid) * imsize + imid] = d_tmp;
d_mean_data -= d_tmp;
}
__shared__ T s_mem[4];
if (threadIdx.x == 0) {
s_mem[0] = s_mem[1] = 0;
if (d_scale) s_mem[2] = 0;
if (d_bias) s_mem[3] = 0;
}
__syncthreads();
paddle::platform::CudaAtomicAdd(&s_mem[0], d_mean_data);
paddle::platform::CudaAtomicAdd(&s_mem[1], d_var_data);
if (d_scale) paddle::platform::CudaAtomicAdd(&s_mem[2], d_scale_data);
if (d_bias) paddle::platform::CudaAtomicAdd(&s_mem[3], d_bias_data);
__syncthreads();
if (threadIdx.x == 0) {
paddle::platform::CudaAtomicAdd(&d_mean[bid * groups + gid], s_mem[0]);
paddle::platform::CudaAtomicAdd(&d_var[bid * groups + gid], s_mem[1]);
if (d_scale) paddle::platform::CudaAtomicAdd(&d_scale[ccid], s_mem[2]);
if (d_bias) paddle::platform::CudaAtomicAdd(&d_bias[ccid], s_mem[3]);
}
}
template <typename T>
__global__ void GroupNormBackward(const T* x, const T* mean, const T* var,
const T* d_mean, const T* d_var, int N, int C,
int imsize, int groups, int group_size,
T epsilon, T* d_x) {
int gid = blockIdx.y;
int cid = blockIdx.x;
int bid = blockIdx.z;
int number = min(group_size, static_cast<int>(C - gid * group_size));
int ccid = gid * group_size + cid;
if (ccid >= C) return;
T x_mean = mean[bid * groups + gid];
T x_var = var[bid * groups + gid];
T d_x_mean = d_mean[bid * groups + gid];
T d_var_inv = d_var[bid * groups + gid];
T d_x_var =
-1.0 / (2 * (x_var + epsilon) * sqrt(x_var + epsilon)) * d_var_inv;
d_x_mean -= 2 * d_x_var * x_mean;
d_x_var /= number * imsize;
d_x_mean /= number * imsize;
for (int imid = threadIdx.x; imid < imsize; imid += blockDim.x) {
T tmp = x[(bid * C + ccid) * imsize + imid];
if (d_x)
d_x[(bid * C + ccid) * imsize + imid] += d_x_mean + tmp * 2 * d_x_var;
}
}
template <typename T>
class GroupNormGradKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const float epsilon = ctx.Attr<float>("epsilon");
auto* x = ctx.Input<Tensor>("X");
auto* mean = ctx.Input<Tensor>("Mean");
auto* var = ctx.Input<Tensor>("Variance");
auto* scale = ctx.Input<Tensor>("Scale");
auto* d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto groups = ctx.Attr<int>("groups");
// init output
auto* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* d_scale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
const auto& x_dims = x->dims();
const int group_size = (x_dims[1] - 1) / groups + 1;
T* d_x_data = nullptr;
if (d_x) {
d_x->mutable_data<T>(ctx.GetPlace());
d_x_data = d_x->data<T>();
}
math::SetConstant<platform::CUDADeviceContext, T> set_zero;
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
Tensor temp_var;
temp_var.mutable_data<T>(var->dims(), ctx.GetPlace());
set_zero(dev_ctx, &temp_var, static_cast<T>(0));
T* temp_var_data = temp_var.data<T>();
Tensor temp_mean;
temp_mean.mutable_data<T>(var->dims(), ctx.GetPlace());
set_zero(dev_ctx, &temp_mean, static_cast<T>(0));
T* temp_mean_data = temp_mean.data<T>();
auto* x_data = x->data<T>();
auto* y_data = d_y->data<T>();
auto* mean_data = mean->data<T>();
auto* var_data = var->data<T>();
T* d_scale_data = nullptr;
if (d_scale) {
d_scale->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, d_scale, static_cast<T>(0));
d_scale_data = d_scale->data<T>();
}
T* d_bias_data = nullptr;
if (d_bias) {
d_bias->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, d_bias, static_cast<T>(0));
d_bias_data = d_bias->data<T>();
}
const T* scale_data = nullptr;
if (scale) scale_data = scale->data<T>();
int imsize = x_dims[2] * x_dims[3];
int block_size = std::min(512, imsize);
dim3 grid(group_size, groups, x_dims[0]);
dim3 threads(block_size, 1, 1);
GroupNormBackwardGetMeanAndVar<T><<<grid, threads, 0, dev_ctx.stream()>>>(
x_data, mean_data, var_data, scale_data, y_data, x_dims[0], x_dims[1],
imsize, groups, group_size, epsilon, d_x_data, temp_mean_data,
temp_var_data, d_scale_data, d_bias_data);
GroupNormBackward<T><<<grid, threads, 0, dev_ctx.stream()>>>(
x_data, mean_data, var_data, temp_mean_data, temp_var_data, x_dims[0],
x_dims[1], imsize, groups, group_size, epsilon, d_x_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
group_norm,
ops::GroupNormKernel<paddle::platform::CUDADeviceContext, float>,
ops::GroupNormKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
group_norm_grad,
ops::GroupNormGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::GroupNormGradKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DataLayout = framework::DataLayout;
template <typename DeviceContext, typename T>
class GroupNormKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const float epsilon = ctx.Attr<float>("epsilon");
auto* scale = ctx.Input<Tensor>("Scale");
auto* bias = ctx.Input<Tensor>("Bias");
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Output<Tensor>("Y");
auto* mean = ctx.Output<Tensor>("Mean");
auto* var = ctx.Output<Tensor>("Variance");
const auto groups = ctx.Attr<int>("groups");
const auto x_dims = x->dims();
const int group_size = (x_dims[1] - 1) / groups + 1;
y->mutable_data<T>(ctx.GetPlace());
mean->mutable_data<T>(ctx.GetPlace());
var->mutable_data<T>(ctx.GetPlace());
auto* x_data = x->data<T>();
auto* y_data = y->data<T>();
auto* mean_data = mean->data<T>();
auto* var_data = var->data<T>();
const T* scale_data = nullptr;
if (scale) scale_data = scale->data<T>();
const T* bias_data = nullptr;
if (bias) bias_data = bias->data<T>();
int imsize = x_dims[2] * x_dims[3];
auto* iter_x_data = x_data;
auto* iter_y_data = y_data;
for (int bid = 0; bid < x_dims[0]; bid++)
for (int gid = 0; gid < groups; gid++) {
T x_mean = 0, x_var = 0;
int number = std::min(group_size,
static_cast<int>(x_dims[1] - gid * group_size));
auto* tmp = iter_x_data;
for (int cid = 0; cid < number; cid++) {
for (int imid = 0; imid < imsize; imid++, iter_x_data++) {
x_mean += iter_x_data[0];
x_var += iter_x_data[0] * iter_x_data[0];
}
}
x_mean /= number * imsize;
x_var /= number * imsize;
x_var = x_var - x_mean * x_mean;
T var_inv = 1.0 / sqrt(x_var + epsilon);
mean_data[bid * groups + gid] = x_mean;
var_data[bid * groups + gid] = x_var;
for (int cid = 0; cid < number; cid++) {
for (int imid = 0; imid < imsize; imid++, tmp++, iter_y_data++) {
T val = (tmp[0] - x_mean) * var_inv;
if (scale_data) val *= scale_data[gid * group_size + cid];
if (bias_data) val += bias_data[gid * group_size + cid];
iter_y_data[0] = val;
}
}
}
}
};
template <typename DeviceContext, typename T>
class GroupNormGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const float epsilon = ctx.Attr<float>("epsilon");
auto* x = ctx.Input<Tensor>("X");
auto* mean = ctx.Input<Tensor>("Mean");
auto* var = ctx.Input<Tensor>("Variance");
auto* scale = ctx.Input<Tensor>("Scale");
auto* d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto groups = ctx.Attr<int>("groups");
// init output
auto* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* d_scale = ctx.Output<Tensor>(framework::GradVarName("Scale"));
auto* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
const auto& x_dims = x->dims();
const int group_size = (x_dims[1] - 1) / groups + 1;
// TODO(liangdun): need to check d_x is null
math::SetConstant<DeviceContext, T> set_zero;
auto& dev_ctx = ctx.template device_context<DeviceContext>();
T* d_x_data = nullptr;
if (d_x) {
d_x->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, d_x, static_cast<T>(0));
d_x_data = d_x->data<T>();
}
auto* x_data = x->data<T>();
auto* y_data = d_y->data<T>();
auto* mean_data = mean->data<T>();
auto* var_data = var->data<T>();
T* d_scale_data = nullptr;
if (d_scale) {
d_scale->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, d_scale, static_cast<T>(0));
d_scale_data = d_scale->data<T>();
}
T* d_bias_data = nullptr;
if (d_bias) {
d_bias->mutable_data<T>(ctx.GetPlace());
set_zero(dev_ctx, d_bias, static_cast<T>(0));
d_bias_data = d_bias->data<T>();
}
const T* scale_data = nullptr;
if (scale) scale_data = scale->data<T>();
int imsize = x_dims[2] * x_dims[3];
auto* iter_x_data = x_data;
auto* iter_d_x_data = d_x_data;
auto* iter_y_data = y_data;
for (int bid = 0; bid < x_dims[0]; bid++)
for (int gid = 0; gid < groups; gid++) {
T x_mean = mean_data[bid * groups + gid];
T x_var = var_data[bid * groups + gid];
T var_inv = 1.0 / sqrt(x_var + epsilon);
int number = std::min(group_size,
static_cast<int>(x_dims[1] - gid * group_size));
auto* tmp = iter_x_data;
auto* tmp2 = iter_d_x_data;
T d_var_inv = 0, d_x_mean = 0;
for (int cid = 0; cid < number; cid++) {
for (int imid = 0; imid < imsize;
imid++, tmp++, iter_y_data++, iter_d_x_data++) {
T val = (tmp[0] - x_mean) * var_inv;
T dval = iter_y_data[0];
if (d_bias_data) d_bias_data[gid * group_size + cid] += dval;
if (d_scale_data)
d_scale_data[gid * group_size + cid] += val * dval;
if (scale_data) dval = scale_data[gid * group_size + cid] * dval;
d_var_inv += (tmp[0] - x_mean) * dval;
T d_tmp = dval * var_inv;
if (d_x_data) iter_d_x_data[0] += d_tmp;
d_x_mean -= d_tmp;
}
}
T d_x_var =
-1.0 / (2 * (x_var + epsilon) * sqrt(x_var + epsilon)) * d_var_inv;
d_x_mean -= 2 * d_x_var * x_mean;
d_x_var /= number * imsize;
d_x_mean /= number * imsize;
iter_d_x_data = tmp2;
if (d_x_data) {
for (int cid = 0; cid < number; cid++) {
for (int imid = 0; imid < imsize;
imid++, iter_x_data++, iter_d_x_data++) {
iter_d_x_data[0] += d_x_mean;
iter_d_x_data[0] += iter_x_data[0] * 2 * d_x_var;
}
}
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <sys/time.h>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <random> #include <random>
...@@ -22,6 +21,7 @@ limitations under the License. */ ...@@ -22,6 +21,7 @@ limitations under the License. */
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/port.h"
inline double GetCurrentUS() { inline double GetCurrentUS() {
struct timeval time; struct timeval time;
......
...@@ -14,9 +14,9 @@ limitations under the License. */ ...@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/operators/math/im2col.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <sys/time.h>
#include <vector> #include <vector>
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h" #include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/platform/port.h"
template <typename DeviceContext, typename Place> template <typename DeviceContext, typename Place>
void testIm2col() { void testIm2col() {
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/math/jit_kernel.h" #include "paddle/fluid/operators/math/jit_kernel.h"
#include <sys/time.h>
#include <cmath> // for exp #include <cmath> // for exp
#include <cstring> // for memcpy #include <cstring> // for memcpy
#include <random> #include <random>
...@@ -22,6 +21,7 @@ limitations under the License. */ ...@@ -22,6 +21,7 @@ limitations under the License. */
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/fluid/platform/dynload/mklml.h"
......
...@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) { ...@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_ENFORCE(condition) \ #define CUDNN_ENFORCE(condition) \
do { \ do { \
cudnnStatus_t status = condition; \ auto status = condition; \
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \ if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \ PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \ } \
......
...@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else #else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
inline cudnnStatus_t operator()(Args... args) { \ inline auto operator()(Args... args) { \
return ::__name(args...); \ return ::__name(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#endif #endif
......
...@@ -19,7 +19,10 @@ limitations under the License. */ ...@@ -19,7 +19,10 @@ limitations under the License. */
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
DEFINE_double(fraction_of_gpu_memory_to_use, 0.92, // fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
DEFINE_double(fraction_of_gpu_memory_to_use, 0.5,
"Allocate a trunk of gpu memory that is this fraction of the " "Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated " "total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, " "from the trunk. If the trunk doesn't have enough gpu memory, "
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include "ThreadPool.h" #include <ThreadPool.h>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
......
...@@ -37,6 +37,7 @@ limitations under the License. */ ...@@ -37,6 +37,7 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -86,6 +87,9 @@ bool IsCompiledWithDIST() { ...@@ -86,6 +87,9 @@ bool IsCompiledWithDIST() {
} }
PYBIND11_PLUGIN(core) { PYBIND11_PLUGIN(core) {
// Not used, just make sure cpu_info.cc is linked.
paddle::platform::CpuTotalPhysicalMemory();
paddle::memory::allocation::UseAllocatorStrategyGFlag(); paddle::memory::allocation::UseAllocatorStrategyGFlag();
py::module m("core", "C++ core of PaddlePaddle"); py::module m("core", "C++ core of PaddlePaddle");
......
...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef _WIN32
#ifndef HL_WARPCTC_WRAP_H_ #ifndef HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_ #define HL_WARPCTC_WRAP_H_
#include "ctc.h" #include "ctc.h"
#include "hl_base.h" #include "hl_base.h"
...@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths, ...@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
size_t* bytes); size_t* bytes);
#endif // HL_WARPCTC_WRAP_H_ #endif // HL_WARPCTC_WRAP_H_
#endif
...@@ -132,11 +132,15 @@ inline pid_t gettid() { ...@@ -132,11 +132,15 @@ inline pid_t gettid() {
uint64_t tid; uint64_t tid;
pthread_threadid_np(NULL, &tid); pthread_threadid_np(NULL, &tid);
#else #else
#ifndef _WIN32
#ifndef __NR_gettid #ifndef __NR_gettid
#define __NR_gettid 224 #define __NR_gettid 224
#endif #endif
pid_t tid = syscall(__NR_gettid); pid_t tid = syscall(__NR_gettid);
#endif #endif
#else // _WIN32
pid_t tid = _getpid();
#endif // _WIN32
CHECK_NE((int)tid, -1); CHECK_NE((int)tid, -1);
return tid; return tid;
} }
......
...@@ -14,10 +14,12 @@ limitations under the License. */ ...@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#ifndef _WIN32
#include <pthread.h> #include <pthread.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#endif
#include <sys/types.h>
#include <map> #include <map>
#include <mutex> #include <mutex>
#include <random> #include <random>
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#ifndef _WIN32
#include <sys/syscall.h> // for syscall() #include <sys/syscall.h> // for syscall()
#endif
#include <sys/types.h> #include <sys/types.h>
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
...@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) { ...@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
} }
#endif #endif
#ifdef _WIN32
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
template <typename T>
inline int __builtin_clz(const T& value) {
DWORD leadning_zero = 0;
if (_BitScanReverse(&leadning_zero, value)) {
return static_cast<int>(sizeof(T) * 8 - leadning_zero);
} else {
return static_cast<int>(0);
}
}
inline int __builtin_clzl(const unsigned long& value) {
return __builtin_clz(value);
}
inline int __builtin_clzll(const unsigned long long& value) {
return __builtin_clz(value);
}
#define pid_t int
#endif
/** /**
* Loop over the elements in a container * Loop over the elements in a container
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach, * TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
......
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
if(WITH_TESTING) if(WITH_TESTING)
add_library(paddle_test_main STATIC TestMain.cpp) add_library(paddle_test_main STATIC TestMain.cpp)
add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies}) add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies})
add_library(paddle_test_util STATIC TestUtil.cpp) if(NOT WIN32)
add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) add_library(paddle_test_util STATIC TestUtil.cpp)
add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies})
endif(NOT WIN32)
if(NOT MOBILE_INFERENCE) if(NOT MOBILE_INFERENCE)
cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags) cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags)
endif() endif()
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from . import hdfs_utils
from .hdfs_utils import *
__all__ = hdfs_utils.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HDFS Utils"""
import os
import subprocess
import multiprocessing
from datetime import datetime
import re
import copy
import errno
import logging
__all__ = ["HDFSClient", "multi_download"]
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
_logger = logging.getLogger("hdfs_utils")
_logger.setLevel(logging.INFO)
class HDFSClient(object):
def __init__(self, hadoop_home, configs):
self.pre_commands = []
hadoop_bin = '%s/bin/hadoop' % hadoop_home
self.pre_commands.append(hadoop_bin)
dfs = 'fs'
self.pre_commands.append(dfs)
for k, v in configs.iteritems():
config_command = '-D%s=%s' % (k, v)
self.pre_commands.append(config_command)
def __run_hdfs_cmd(self, commands, retry_times=5):
whole_commands = copy.deepcopy(self.pre_commands)
whole_commands.extend(commands)
print('Running system command: {0}'.format(' '.join(whole_commands)))
ret_code = 0
ret_out = None
ret_err = None
for x in range(retry_times + 1):
proc = subprocess.Popen(
whole_commands, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(output, errors) = proc.communicate()
ret_code, ret_out, ret_err = proc.returncode, output, errors
if ret_code:
_logger.warn(
'Times: %d, Error running command: %s. Return code: %d, Error: %s'
% (x, ' '.join(whole_commands), proc.returncode, errors))
else:
break
return ret_code, ret_out, ret_err
def upload(self, hdfs_path, local_path, overwrite=False, retry_times=5):
"""
upload the local file to hdfs
args:
local_file_path: the local file path
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
return:
True or False
"""
assert hdfs_path is not None
assert local_path is not None and os.path.exists(local_path)
if os.path.isdir(local_path):
_logger.warn(
"The Local path: {} is dir and I will support it later, return".
format(local_path))
return
base = os.path.basename(local_path)
if not self.is_exist(hdfs_path):
self.makedirs(hdfs_path)
else:
if self.is_exist(os.path.join(hdfs_path, base)):
if overwrite:
_logger.error(
"The HDFS path: {} is exist and overwrite is True, delete it".
format(hdfs_path))
self.delete(hdfs_path)
else:
_logger.error(
"The HDFS path: {} is exist and overwrite is False, return".
format(hdfs_path))
return False
put_commands = ["-put", local_path, hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(put_commands,
retry_times)
if returncode:
_logger.error("Put local path: {} to HDFS path: {} failed".format(
local_path, hdfs_path))
return False
else:
_logger.info("Put local path: {} to HDFS path: {} successfully".
format(local_path, hdfs_path))
return True
def download(self, hdfs_path, local_path, overwrite=False, unzip=False):
"""
download from hdfs
args:
local_file_path: the local file path
remote_file_path: remote dir on hdfs
return:
True or False
"""
_logger.info('Downloading %r to %r.', hdfs_path, local_path)
_logger.info('Download of %s to %r complete.', hdfs_path, local_path)
if not self.is_exist(hdfs_path):
print("HDFS path: {} do not exist".format(hdfs_path))
return False
if self.is_dir(hdfs_path):
_logger.error(
"The HDFS path: {} is dir and I will support it later, return".
format(hdfs_path))
if os.path.exists(local_path):
base = os.path.basename(hdfs_path)
local_file = os.path.join(local_path, base)
if os.path.exists(local_file):
if overwrite:
os.remove(local_file)
else:
_logger.error(
"The Local path: {} is exist and overwrite is False, return".
format(local_file))
return False
self.make_local_dirs(local_path)
download_commands = ["-get", hdfs_path, local_path]
returncode, output, errors = self.__run_hdfs_cmd(download_commands)
if returncode:
_logger.error("Get local path: {} from HDFS path: {} failed".format(
local_path, hdfs_path))
return False
else:
_logger.info("Get local path: {} from HDFS path: {} successfully".
format(local_path, hdfs_path))
return True
def is_exist(self, hdfs_path=None):
"""
whether the remote hdfs path exists?
args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
return:
True or False
"""
exist_cmd = ['-test', '-e', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(
exist_cmd, retry_times=1)
if returncode:
_logger.error("HDFS is_exist HDFS path: {} failed".format(
hdfs_path))
return False
else:
_logger.info("HDFS is_exist HDFS path: {} successfully".format(
hdfs_path))
return True
def is_dir(self, hdfs_path=None):
"""
whether the remote hdfs path exists?
args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
return:
True or False
"""
if not self.is_exist(hdfs_path):
return False
dir_cmd = ['-test', '-d', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(dir_cmd, retry_times=1)
if returncode:
_logger.error("HDFS path: {} failed is not a directory".format(
hdfs_path))
return False
else:
_logger.info("HDFS path: {} successfully is a directory".format(
hdfs_path))
return True
def delete(self, hdfs_path):
"""Remove a file or directory from HDFS.
:param hdfs_path: HDFS path.
:param recursive: Recursively delete files and directories. By default,
this method will raise an :class:`HdfsError` if trying to delete a
non-empty directory.
This function returns `True` if the deletion was successful and `False` if
no file or directory previously existed at `hdfs_path`.
"""
_logger.info('Deleting %r.', hdfs_path)
if not self.is_exist(hdfs_path):
_logger.warn("HDFS path: {} do not exist".format(hdfs_path))
return True
if self.is_dir(hdfs_path):
del_cmd = ['-rmr', hdfs_path]
else:
del_cmd = ['-rm', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(del_cmd, retry_times=0)
if returncode:
_logger.error("HDFS path: {} delete files failure".format(
hdfs_path))
return False
else:
_logger.info("HDFS path: {} delete files successfully".format(
hdfs_path))
return True
def rename(self, hdfs_src_path, hdfs_dst_path, overwrite=False):
"""Move a file or folder.
:param hdfs_src_path: Source path.
:param hdfs_dst_path: Destination path. If the path already exists and is
a directory, the source will be moved into it. If the path exists and is
a file, or if a parent destination directory is missing, this method will
raise an :class:`HdfsError`.
"""
assert hdfs_src_path is not None
assert hdfs_dst_path is not None
if not self.is_exist(hdfs_src_path):
_logger.info("HDFS path do not exist: {}".format(hdfs_src_path))
if self.is_exist(hdfs_dst_path) and not overwrite:
_logger.error("HDFS path is exist: {} and overwrite=False".format(
hdfs_dst_path))
rename_command = ['-mv', hdfs_src_path, hdfs_dst_path]
returncode, output, errors = self.__run_hdfs_cmd(
rename_command, retry_times=1)
if returncode:
_logger.error("HDFS rename path: {} to {} failed".format(
hdfs_src_path, hdfs_dst_path))
return False
else:
_logger.info("HDFS rename path: {} to {} successfully".format(
hdfs_src_path, hdfs_dst_path))
return True
@staticmethod
def make_local_dirs(local_path):
try:
os.makedirs(local_path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
def makedirs(self, hdfs_path):
"""Create a remote directory, recursively if necessary.
:param hdfs_path: Remote path. Intermediate directories will be created
appropriately.
"""
_logger.info('Creating directories to %r.', hdfs_path)
assert hdfs_path is not None
if self.is_exist(hdfs_path):
return
mkdirs_commands = ['-mkdir', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(
mkdirs_commands, retry_times=1)
if returncode:
_logger.error("HDFS mkdir path: {} failed".format(hdfs_path))
return False
else:
_logger.error("HDFS mkdir path: {} successfully".format(hdfs_path))
return True
def ls(self, hdfs_path):
assert hdfs_path is not None
if not self.is_exist(hdfs_path):
return []
ls_commands = ['-ls', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(
ls_commands, retry_times=1)
if returncode:
_logger.error("HDFS list path: {} failed".format(hdfs_path))
return []
else:
_logger.info("HDFS list path: {} successfully".format(hdfs_path))
ret_lines = []
regex = re.compile('\s+')
out_lines = output.strip().split("\n")
for line in out_lines:
re_line = regex.split(line)
if len(re_line) == 8:
ret_lines.append(re_line[7])
return ret_lines
def lsr(self, hdfs_path, only_file=True, sort=True):
def sort_by_time(v1, v2):
v1_time = datetime.strptime(v1[1], '%Y-%m-%d %H:%M')
v2_time = datetime.strptime(v2[1], '%Y-%m-%d %H:%M')
return v1_time > v2_time
assert hdfs_path is not None
if not self.is_exist(hdfs_path):
return []
ls_commands = ['-lsr', hdfs_path]
returncode, output, errors = self.__run_hdfs_cmd(
ls_commands, retry_times=1)
if returncode:
_logger.error("HDFS list all files: {} failed".format(hdfs_path))
return []
else:
_logger.info("HDFS list all files: {} successfully".format(
hdfs_path))
lines = []
regex = re.compile('\s+')
out_lines = output.strip().split("\n")
for line in out_lines:
re_line = regex.split(line)
if len(re_line) == 8:
if only_file and re_line[0][0] == "d":
continue
else:
lines.append(
(re_line[7], re_line[5] + " " + re_line[6]))
if sort:
sorted(lines, cmp=sort_by_time)
ret_lines = [ret[0] for ret in lines]
return ret_lines
def multi_upload(client,
hdfs_path,
local_path,
multi_processes=5,
overwrite=False):
"""
:param overwrite: will overwrite hdfs file or not
:param multi_processes: the upload data process at the same time, default=5
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:return:
"""
def __subprocess_upload(datas):
for data in datas:
re_path = os.path.relpath(os.path.dirname(data), local_path)
hdfs_re_path = os.path.join(hdfs_path, re_path)
client.upload(hdfs_re_path, data, overwrite, retry_times=5)
def get_local_files(path):
rlist = []
if not os.path.isdir(path):
return rlist
for dirname, folder, files in os.walk(path):
for i in files:
t = os.path.join(dirname, i)
rlist.append(t)
return rlist
assert isinstance(client, HDFSClient)
all_files = get_local_files(local_path)
if not all_files:
_logger.info("there are nothing need to upload, exit")
return
_logger.info("Start {} multi process to upload datas".format(
multi_processes))
procs = []
for i in range(multi_processes):
process_datas = all_files[i::multi_processes]
p = multiprocessing.Process(
target=__subprocess_upload, args=(process_datas, ))
procs.append(p)
p.start()
# complete the processes
for proc in procs:
proc.join()
_logger.info("Finish {} multi process to upload datas".format(
multi_processes))
def multi_download(client,
hdfs_path,
local_path,
trainer_id,
trainers,
multi_processes=5):
"""
multi_download
:param client: instance of HDFSClient
:param hdfs_path: path on hdfs
:param local_path: path on local
:param trainer_id: current trainer id
:param trainers: all trainers number
:param multi_processes: the download data process at the same time, default=5
:return: None
"""
def __subprocess_download(datas):
for data in datas:
re_path = os.path.relpath(os.path.dirname(data), hdfs_path)
local_re_path = os.path.join(local_path, re_path)
client.download(data, local_re_path)
assert isinstance(client, HDFSClient)
client.make_local_dirs(local_path)
_logger.info("Make local dir {} successfully".format(local_path))
all_need_download = client.lsr(hdfs_path, sort=True)
need_download = all_need_download[trainer_id::trainers]
_logger.info("Get {} files From all {} files need to be download from {}".
format(len(need_download), len(all_need_download), hdfs_path))
_logger.info("Start {} multi process to download datas".format(
multi_processes))
procs = []
for i in range(multi_processes):
process_datas = need_download[i::multi_processes]
p = multiprocessing.Process(
target=__subprocess_download, args=(process_datas, ))
procs.append(p)
p.start()
# complete the processes
for proc in procs:
proc.join()
_logger.info("Finish {} multi process to download datas".format(
multi_processes))
local_downloads = []
for data in need_download:
data_name = os.path.basename(data)
re_path = os.path.relpath(os.path.dirname(data), hdfs_path)
local_re_path = os.path.join(local_path, re_path, data_name)
local_downloads.append(local_re_path)
return local_downloads
if __name__ == "__main__":
hadoop_home = "/home/client/hadoop-client/hadoop/"
configs = {
"fs.default.name": "hdfs://xxx.hadoop.com:54310",
"hadoop.job.ugi": "hello,hello123"
}
client = HDFSClient(hadoop_home, configs)
client.ls("/user/com/train-25")
files = client.lsr("/user/com/train-25/models")
downloads = multi_download(
client,
"/user/com/train-25/model",
"/home/xx/data1",
1,
5,
multi_processes=5)
multi_upload(client, "/user/com/train-25/model", "/home/xx/data1")
...@@ -85,6 +85,7 @@ __all__ = [ ...@@ -85,6 +85,7 @@ __all__ = [
'row_conv', 'row_conv',
'multiplex', 'multiplex',
'layer_norm', 'layer_norm',
'group_norm',
'softmax_with_cross_entropy', 'softmax_with_cross_entropy',
'smooth_l1', 'smooth_l1',
'one_hot', 'one_hot',
...@@ -2547,6 +2548,84 @@ def layer_norm(input, ...@@ -2547,6 +2548,84 @@ def layer_norm(input,
return helper.append_activation(layer_norm_out) return helper.append_activation(layer_norm_out)
@templatedoc()
def group_norm(input,
groups,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
act=None,
data_layout='NCHW',
name=None):
"""
**Group Normalization Layer**
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`
Args:
input(Variable): The input tensor variable.
groups(int): The number of groups that divided from channels.
epsilon(float): The small value added to the variance to prevent
division by zero.
param_attr(ParamAttr|None): The parameter attribute for the learnable
scale :math:`g`. If it is set to False, no scale will be added to the output units.
If it is set to None, the bias is initialized one. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act(str): Activation to be applied to the output of group normalizaiton.
data_layout(string|NCHW): Only NCHW is supported.
name (str): The name of this layer. It is optional.
Returns:
Variable: A tensor variable which is the result after applying group normalization on the input.
Examples:
>>> data = fluid.layers.data(name='data', shape=[8, 32, 32],
>>> dtype='float32')
>>> x = fluid.layers.group_norm(input=data, groups=4)
"""
helper = LayerHelper('group_norm', **locals())
dtype = helper.input_dtype()
# create intput and parameters
inputs = {'X': input}
input_shape = input.shape
if data_layout != 'NCHW':
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [input_shape[1]]
if param_attr:
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0))
inputs['Scale'] = scale
if bias_attr:
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
inputs['Bias'] = bias
# create output
mean_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
variance_out = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
group_norm_out = helper.create_tmp_variable(dtype)
helper.append_op(
type="group_norm",
inputs=inputs,
outputs={
"Y": group_norm_out,
"Mean": mean_out,
"Variance": variance_out,
},
attrs={"epsilon": epsilon,
"groups": groups})
return helper.append_activation(group_norm_out)
def conv2d_transpose(input, def conv2d_transpose(input,
num_filters, num_filters,
output_size=None, output_size=None,
......
...@@ -46,8 +46,8 @@ def _is_numpy_(var): ...@@ -46,8 +46,8 @@ def _is_numpy_(var):
def _is_number_(var): def _is_number_(var):
return isinstance(var, int) or isinstance(var, float) or (isinstance( return isinstance(var, int) or isinstance(var, np.int64) or isinstance(
var, np.ndarray) and var.shape == (1, )) var, float) or (isinstance(var, np.ndarray) and var.shape == (1, ))
def _is_number_or_matrix_(var): def _is_number_or_matrix_(var):
......
...@@ -23,7 +23,9 @@ if(NOT WITH_DISTRIBUTE) ...@@ -23,7 +23,9 @@ if(NOT WITH_DISTRIBUTE)
LIST(REMOVE_ITEM TEST_OPS test_dist_text_classification) LIST(REMOVE_ITEM TEST_OPS test_dist_text_classification)
endif(NOT WITH_DISTRIBUTE) endif(NOT WITH_DISTRIBUTE)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) if (NOT ${WITH_GPU})
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
elseif(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif() endif()
...@@ -79,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) ...@@ -79,10 +81,12 @@ list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_dist_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list(REMOVE_ITEM TEST_OPS test_interpolate_op)
foreach(TEST_OP ${TEST_OPS}) foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP) endforeach(TEST_OP)
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
py_test_modules(test_interpolate_op MODULES test_interpolate_op SERIAL)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
......
...@@ -381,8 +381,8 @@ class OpTest(unittest.TestCase): ...@@ -381,8 +381,8 @@ class OpTest(unittest.TestCase):
outs.sort(key=len) outs.sort(key=len)
checker(outs) checker(outs)
def __assert_is_close(self, numeric_grads, analytic_grads, names, def _assert_is_close(self, numeric_grads, analytic_grads, names,
max_relative_error, msg_prefix): max_relative_error, msg_prefix):
for a, b, name in six.moves.zip(numeric_grads, analytic_grads, names): for a, b, name in six.moves.zip(numeric_grads, analytic_grads, names):
abs_a = np.abs(a) abs_a = np.abs(a)
...@@ -451,9 +451,9 @@ class OpTest(unittest.TestCase): ...@@ -451,9 +451,9 @@ class OpTest(unittest.TestCase):
analytic_grads = self._get_gradient(inputs_to_check, place, analytic_grads = self._get_gradient(inputs_to_check, place,
output_names, no_grad_set) output_names, no_grad_set)
self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check, self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
max_relative_error, max_relative_error,
"Gradient Check On %s" % str(place)) "Gradient Check On %s" % str(place))
@staticmethod @staticmethod
def _numpy_to_lod_tensor(np_value, lod, place): def _numpy_to_lod_tensor(np_value, lod, place):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from operator import mul
import paddle.fluid.core as core
import paddle.fluid as fluid
from op_test import OpTest
from testsuite import create_op
def group_norm_naive(x, scale, bias, epsilon, groups):
N, C, H, W = x.shape
G = groups
x = x.reshape((N * G, -1))
mean = np.mean(x, axis=1, keepdims=True)
var = np.var(x, axis=1, keepdims=True)
output = (x - mean) / np.sqrt(var + epsilon)
output = output.reshape((N, C, H, W)) * scale.reshape(
(-1, 1, 1)) + bias.reshape((-1, 1, 1))
return output, mean.reshape((N, G)), var.reshape((N, G))
class TestGroupNormOp(OpTest):
def setUp(self):
self.op_type = "group_norm"
self.data_format = "NCHW"
self.dtype = np.float32
self.shape = (2, 4, 3, 3)
self.attrs = {'epsilon': 1e-5, 'groups': 2}
self.compare_between_place = False
self.init_test_case()
input = np.random.random(self.shape).astype(self.dtype)
scale = np.random.random([self.shape[1]]).astype(self.dtype)
bias = np.random.random([self.shape[1]]).astype(self.dtype)
output, mean, var = group_norm_naive(
input, scale, bias, self.attrs['epsilon'], self.attrs['groups'])
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(input),
'Scale': OpTest.np_dtype_to_fluid_dtype(scale),
'Bias': OpTest.np_dtype_to_fluid_dtype(bias)
}
self.outputs = {'Y': output, 'Mean': mean, 'Variance': var}
def test_check_output(self):
atol = 1e-4
place = core.CPUPlace()
self.check_output_with_place(place, atol=atol)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=atol)
def do_compare_between_place(self):
if not core.is_compiled_with_cuda(): return
place = core.CPUPlace()
place2 = core.CUDAPlace(0)
self.scope = core.Scope()
op_inputs = self.inputs if hasattr(self, "inputs") else dict()
op_outputs = self.outputs if hasattr(self, "outputs") else dict()
op_attrs = self.attrs if hasattr(self, "attrs") else dict()
self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs,
op_attrs)
inputs_to_check = set(['X', 'Scale', 'Bias'])
output_names = 'Y'
cpu_grads = self._get_gradient(inputs_to_check, place, output_names,
None)
gpu_grads = self._get_gradient(inputs_to_check, place2, output_names,
None)
self._assert_is_close(cpu_grads, gpu_grads, inputs_to_check, 0.005,
"Gradient Check On %s" % str(place))
def test_check_grad(self):
if self.compare_between_place:
self.do_compare_between_place()
return
place = core.CPUPlace()
self.check_grad_with_place(
place, set(['X', 'Scale', 'Bias']), 'Y', max_relative_error=0.01)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['X', 'Scale', 'Bias']),
'Y',
max_relative_error=0.01)
def init_test_case(self):
pass
class TestGroupNormOp1(TestGroupNormOp):
def init_test_case(self):
self.attrs['groups'] = 1
class TestGroupNormOp2(TestGroupNormOp):
def init_test_case(self):
self.attrs['groups'] = 4
class TestGroupNormOpBigEps1(TestGroupNormOp):
def init_test_case(self):
self.attrs['groups'] = 1
self.attrs['epsilon'] = 0.5
class TestGroupNormOpBigEps2(TestGroupNormOp):
def init_test_case(self):
self.attrs['groups'] = 4
self.attrs['epsilon'] = 0.5
class TestGroupNormOpBigEps3(TestGroupNormOp):
def init_test_case(self):
self.attrs['epsilon'] = 0.5
class TestGroupNormOpLargeData(TestGroupNormOp):
def init_test_case(self):
self.shape = (2, 32, 64, 64)
self.attrs['groups'] = 8
self.compare_between_place = True
if __name__ == '__main__':
unittest.main()
...@@ -36,17 +36,21 @@ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf ...@@ -36,17 +36,21 @@ RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf
tar xzf protobuf-cpp-3.1.0.tar.gz && \ tar xzf protobuf-cpp-3.1.0.tar.gz && \
cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz
RUN wget -O /root/requirements.txt https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \ RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install -r /root/requirements.txt && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install -r /root/requirements.txt && \
go get github.com/Masterminds/glide && \ go get github.com/Masterminds/glide && \
rm -rf /root/requirements.txt rm -rf /root/requirements.txt
RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \ RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \ LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python LD_LIBRARY_PATH=/opt/_internal/cpython-3.5.1/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.5.1/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.6.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.6.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python && \
LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install pre-commit 'ipython==5.3.0' opencv-python
RUN wget -O /opt/swig-2.0.12.tar.gz https://cytranet.dl.sourceforge.net/project/swig/swig/swig-2.0.12/swig-2.0.12.tar.gz && \ RUN wget -O /opt/swig-2.0.12.tar.gz https://cytranet.dl.sourceforge.net/project/swig/swig/swig-2.0.12/swig-2.0.12.tar.gz && \
cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz
......
...@@ -9,12 +9,12 @@ set -ex ...@@ -9,12 +9,12 @@ set -ex
# remove others to expedite build and reduce docker image size. The original # remove others to expedite build and reduce docker image size. The original
# manylinux docker image project builds many python versions. # manylinux docker image project builds many python versions.
# NOTE We added back 3.5.1, since auditwheel requires python 3.3+ # NOTE We added back 3.5.1, since auditwheel requires python 3.3+
CPYTHON_VERSIONS="2.7.11 3.5.1" CPYTHON_VERSIONS="3.7.0 3.6.0 3.5.1 2.7.11"
# openssl version to build, with expected sha256 hash of .tar.gz # openssl version to build, with expected sha256 hash of .tar.gz
# archive # archive
OPENSSL_ROOT=openssl-1.0.2l OPENSSL_ROOT=openssl-1.1.0i
OPENSSL_HASH=ce07195b659e75f4e1db43552860070061f156a98bb37b672b101ba6e3ddf30c OPENSSL_HASH=ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99
EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
...@@ -25,7 +25,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969 ...@@ -25,7 +25,7 @@ AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
# Dependencies for compiling Python that we want to remove from # Dependencies for compiling Python that we want to remove from
# the final image after compiling Python # the final image after compiling Python
PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel" PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel"
# Libraries that are allowed as part of the manylinux1 profile # Libraries that are allowed as part of the manylinux1 profile
MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel freetype-devel libpng-devel" MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel freetype-devel libpng-devel"
...@@ -61,7 +61,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \ ...@@ -61,7 +61,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \
wget -q https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz && tar xzf cmake-3.5.2.tar.gz && \ wget -q https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz && tar xzf cmake-3.5.2.tar.gz && \
cd cmake-3.5.2 && ./bootstrap && \ cd cmake-3.5.2 && ./bootstrap && \
make -j4 && make install && cd .. && rm cmake-3.5.2.tar.gz make -j8 && make install && cd .. && rm cmake-3.5.2.tar.gz
# Install newest autoconf # Install newest autoconf
...@@ -77,11 +77,13 @@ mkdir -p /opt/python ...@@ -77,11 +77,13 @@ mkdir -p /opt/python
build_cpythons $CPYTHON_VERSIONS build_cpythons $CPYTHON_VERSIONS
PY35_BIN=/opt/python/cp35-cp35m/bin PY35_BIN=/opt/python/cp35-cp35m/bin
PY36_BIN=/opt/python/cp36-cp36m/bin
PY37_BIN=/opt/python/cp37-cp37m/bin
# NOTE Since our custom manylinux image builds pythons with shared # NOTE Since our custom manylinux image builds pythons with shared
# libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running # libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running
# python. # python.
ORIGINAL_LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" ORIGINAL_LD_LIBRARY_PATH="${LD_LIBRARY_PATH}"
LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib:$(dirname ${PY36_BIN})/lib:$(dirname ${PY37_BIN})/lib"
# Our openssl doesn't know how to find the system CA trust store # Our openssl doesn't know how to find the system CA trust store
# (https://github.com/pypa/manylinux/issues/53) # (https://github.com/pypa/manylinux/issues/53)
...@@ -119,9 +121,8 @@ ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel ...@@ -119,9 +121,8 @@ ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel
# final image # final image
yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \ yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
avahi freetype bitstream-vera-fonts \ avahi freetype bitstream-vera-fonts \
${PYTHON_COMPILE_DEPS} > /dev/null 2>&1 ${PYTHON_COMPILE_DEPS} > /dev/null 2>&1 || true
yum -y install ${MANYLINUX1_DEPS} yum -y install ${MANYLINUX1_DEPS} && yum -y clean all > /dev/null 2>&1 || true
yum -y clean all > /dev/null 2>&1
yum list installed yum list installed
# we don't need libpython*.a, and they're many megabytes # we don't need libpython*.a, and they're many megabytes
find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
......
...@@ -52,9 +52,17 @@ function do_cpython_build { ...@@ -52,9 +52,17 @@ function do_cpython_build {
# NOTE --enable-shared for generating libpython shared library needed for # NOTE --enable-shared for generating libpython shared library needed for
# linking of some of the nupic.core test executables. # linking of some of the nupic.core test executables.
CFLAGS="-Wformat" ./configure --prefix=${prefix} --enable-shared $unicode_flags > /dev/null if [ $(lex_pyver $py_ver) -ge $(lex_pyver 3.7) ]; then
make -j2 > /dev/null # NOTE python 3.7 should be installed via make altinstall rather than
make install > /dev/null # make install, and we should specify the location of ssl
CFLAGS="-Wformat" ./configure --prefix=${prefix} --with-openssl=/usr/local/ssl --enable-shared $unicode_flags > /dev/null
make -j8 > /dev/null
make altinstall > /dev/null
else
CFLAGS="-Wformat" ./configure --prefix=${prefix} --enable-shared $unicode_flags > /dev/null
make -j8 > /dev/null
make install > /dev/null
fi
popd popd
echo "ZZZ looking for libpython" echo "ZZZ looking for libpython"
find / -name 'libpython*.so*' find / -name 'libpython*.so*'
...@@ -64,6 +72,9 @@ function do_cpython_build { ...@@ -64,6 +72,9 @@ function do_cpython_build {
if [ -e ${prefix}/bin/python3 ]; then if [ -e ${prefix}/bin/python3 ]; then
ln -s python3 ${prefix}/bin/python ln -s python3 ${prefix}/bin/python
fi fi
if [ -e ${prefix}/bin/python3.7 ]; then
ln -s python3.7 ${prefix}/bin/python
fi
# NOTE Make libpython shared library visible to python calls below # NOTE Make libpython shared library visible to python calls below
LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python get-pip.py LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python get-pip.py
LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/pip install wheel LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/pip install wheel
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册