提交 7c8c9dc9 编写于 作者: P peizhilin

fix unit test cases

上级 bef475c9
......@@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WIN32)
list(APPEND win32_deps shlwapi)
if("${cc_test_DEPS};" MATCHES "python;")
list(REMOVE_ITEM cc_test_DEPS python)
list(APPEND win32_deps ${PYTHON_LIBRARIES})
endif()
endif(WIN32)
add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
if(WIN32)
target_link_libraries(${TARGET_NAME} shlwapi)
target_link_libraries(${TARGET_NAME} ${win32_deps})
endif(WIN32)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
......@@ -679,7 +686,7 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME}
COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
......
......@@ -23,7 +23,7 @@ namespace paddle {
namespace framework {
namespace details {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
......@@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
}
if (platform::is_gpu_place(lod_tensors[0]->place())) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr.");
int dtype = -1;
size_t numel = 0;
......
......@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -29,7 +29,7 @@ namespace framework {
namespace details {
struct AllReduceOpHandle : public OpHandleBase {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
AllReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs);
......@@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
private:
std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_;
#endif
};
......
......@@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
});
}
} else {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VarHandle *out_handle = nullptr;
int root_id = boost::get<platform::CUDAPlace>(in_tensor.place()).device;
std::vector<std::function<void()>> broadcast_calls;
......
......@@ -24,7 +24,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -34,7 +34,7 @@ namespace details {
struct BroadcastOpHandle : public OpHandleBase {
public:
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
BroadcastOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap *nccl_ctxs)
......@@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {
std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_;
#endif
......
......@@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
std::vector<std::unique_ptr<ir::Node>> nodes_;
std::vector<p::Place> place_list_;
bool use_gpu_;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif
......@@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
for (size_t j = 0; j < ctxs_.size(); ++j) {
ctxs_[j]->Wait();
}
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_) {
nccl_ctxs_->WaitAll();
}
......@@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
void InitCtxOnGpu(bool use_gpu) {
use_gpu_ = use_gpu;
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int count = p::GetCUDADeviceCount();
if (count <= 1) {
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
......@@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
place_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
}
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_.reset(nullptr);
#endif
}
......@@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
nodes_.emplace_back(
ir::CreateNodeForTest("node0", ir::Node::Type::kOperation));
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
place_list_, nccl_ctxs_.get());
#else
PADDLE_THROW("CUDA is not support.");
#endif
} else {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
place_list_, nccl_ctxs_.get());
#else
......
......@@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
#else
const bool use_cuda) const {
......@@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass->Erase("local_scopes");
pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
&local_scopes);
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
pass->Erase("nccl_ctxs");
pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
......
......@@ -23,7 +23,7 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -98,7 +98,7 @@ struct BuildStrategy {
const std::string &loss_var_name,
const std::unordered_set<std::string> &param_names,
const std::vector<Scope *> &local_scopes,
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
#else
const bool use_cuda) const;
......
......@@ -20,7 +20,7 @@ namespace paddle {
namespace framework {
namespace details {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle::DataBalanceOpHandle(
ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
......
......@@ -19,7 +19,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -29,7 +29,7 @@ namespace details {
struct DataBalanceOpHandle : public OpHandleBase {
public:
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs);
......
......@@ -25,7 +25,7 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -35,7 +35,7 @@ namespace details {
struct FusedBroadcastOpHandle : public BroadcastOpHandle {
public:
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
FusedBroadcastOpHandle(ir::Node *node,
const std::vector<Scope *> local_scopes,
const std::vector<platform::Place> &places,
......
......@@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
nodes_.emplace_back(
ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation));
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new FusedBroadcastOpHandle(
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
#else
PADDLE_THROW("CUDA is not supported.");
#endif
} else {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_ = new FusedBroadcastOpHandle(
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
#else
......
......@@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
places_ = Get<const std::vector<platform::Place>>(kPlaces);
local_scopes_ = Get<const std::vector<Scope *>>(kLocalScopes);
strategy_ = Get<const BuildStrategy>(kStrategy);
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_ = &Get<platform::NCCLContextMap>("nccl_ctxs");
#endif
......@@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
}
}
bool use_gpu = false;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
use_gpu = nccl_ctxs_ != nullptr;
#endif
......@@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {
void MultiDevSSAGraphBuilder::SetCommunicationContext(
OpHandleBase *op_handle, const platform::Place &p) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_ == nullptr) {
op_handle->SetDeviceContext(p,
platform::DeviceContextPool::Instance().Get(p));
......@@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
const std::string &p_name,
size_t src_dev_id) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *op_handle = new BroadcastOpHandle(
result->CreateEmptyNode("broadcast", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_);
......@@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::CreateFusedBroadcastOp(
ir::Graph *result,
const std::vector<std::unordered_set<std::string>> &bcast_varnames) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *op_handle = new FusedBroadcastOpHandle(
result->CreateEmptyNode("fused_broadcast", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_);
......@@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
const std::string &og) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle(
result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_));
......@@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
ir::Graph *result, const std::vector<std::string> &datas) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new DataBalanceOpHandle(
result->CreateEmptyNode("data_balance", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_));
......@@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
const std::string &og,
int dst_dev_id) const {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
result->Get<GraphOps>(kGraphOps).emplace_back(new ReduceOpHandle(
result->CreateEmptyNode("reduce", ir::Node::Type::kOperation),
local_scopes_, places_, nccl_ctxs_));
......
......@@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t device_id) const;
void Init() const;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
mutable platform::NCCLContextMap *nccl_ctxs_;
#endif
......
......@@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
}
});
} else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto pre_in = pre_in_var->Get<framework::LoDTensor>();
VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
VariableVisitor::GetMutableTensor(out_var).mutable_data(
......
......@@ -23,7 +23,7 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
......@@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
std::vector<Scope *> local_scopes_;
std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const platform::NCCLContextMap *nccl_ctxs_;
ReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
......
......@@ -35,7 +35,7 @@ struct TestReduceOpHandle {
std::vector<p::Place> gpu_list_;
std::vector<std::unique_ptr<p::DeviceContext>> ctxs_;
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif
......@@ -43,7 +43,7 @@ struct TestReduceOpHandle {
for (size_t j = 0; j < ctxs_.size(); ++j) {
ctxs_[j]->Wait();
}
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (nccl_ctxs_) {
nccl_ctxs_->WaitAll();
}
......@@ -53,7 +53,7 @@ struct TestReduceOpHandle {
void InitCtxOnGpu(bool use_gpu) {
use_gpu_ = use_gpu;
if (use_gpu) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int count = p::GetCUDADeviceCount();
if (count <= 1) {
LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
......@@ -77,7 +77,7 @@ struct TestReduceOpHandle {
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
}
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
nccl_ctxs_.reset(nullptr);
#endif
}
......@@ -99,14 +99,14 @@ struct TestReduceOpHandle {
nodes.emplace_back(new ir::Node("node"));
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
gpu_list_, nccl_ctxs_.get()));
#else
PADDLE_THROW("CUDA is not support.");
#endif
} else {
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
gpu_list_, nccl_ctxs_.get()));
#else
......
......@@ -15,7 +15,10 @@
#include "paddle/fluid/framework/ir/is_test_pass.h"
#include <gtest/gtest.h>
#ifdef _WIN32
#undef FALSE
#undef TRUE
#endif
namespace paddle {
namespace framework {
namespace ir {
......
......@@ -19,6 +19,7 @@
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/platform/port.h"
namespace paddle {
namespace inference {
......@@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) {
LOG(INFO) << "data: "
<< static_cast<float*>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
......
......@@ -15,10 +15,6 @@
#pragma once
#include <glog/logging.h>
#if !defined(_WIN32)
#include <sys/time.h>
#else
#endif
#include <algorithm>
#include <chrono> // NOLINT
......@@ -28,6 +24,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
......
......@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <fstream>
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <time.h>
#include <fstream>
#include <thread> // NOLINT
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(use_mkldnn);
......
......@@ -30,23 +30,23 @@ using std::endl;
void CreateInput(LoDTensor* ids, LoDTensor* scores) {
LoD lod;
vector<size_t> level0({0, 2, 4});
vector<size_t> level1({0, 1, 2, 3, 4});
vector<size_t> level0{0, 2, 4};
vector<size_t> level1{0, 1, 2, 3, 4};
lod.push_back(level0);
lod.push_back(level1);
ids->set_lod(lod);
scores->set_lod(lod);
auto dims = framework::make_ddim(vector<int64_t>({4, 3}));
auto dims = framework::make_ddim(vector<int64_t>{4, 3});
ids->Resize(dims);
scores->Resize(dims);
CPUPlace place;
auto* ids_data = ids->mutable_data<int64_t>(place);
auto* scores_data = scores->mutable_data<float>(place);
vector<int64_t> _ids({4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1});
vector<float> _scores(
{0.5, 0.3, 0.2, 0.6, 0.3, 0.1, 0.9, 0.5, 0.1, 0.7, 0.5, 0.1});
vector<int64_t> _ids{4, 2, 5, 2, 1, 3, 3, 5, 2, 8, 2, 1};
vector<float> _scores{0.5f, 0.3f, 0.2f, 0.6f, 0.3f, 0.1f,
0.9f, 0.5f, 0.1f, 0.7f, 0.5f, 0.1f};
for (int i = 0; i < 12; i++) {
ids_data[i] = _ids[i];
......@@ -79,8 +79,8 @@ TEST(DISABLED_beam_search_op, run) {
ASSERT_EQ(sids.lod(), sscores.lod());
vector<int> tids({4, 2, 3, 8});
vector<float> tscores({0.5, 0.6, 0.9, 0.7});
vector<int> tids{4, 2, 3, 8};
vector<float> tscores{0.5f, 0.6f, 0.9f, 0.7f};
for (int i = 0; i < 4; i++) {
ASSERT_EQ(tids[i], sids.data<int64_t>()[i]);
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <limits>
#include "glog/logging.h" // For VLOG
......@@ -20,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_client.h"
#include "paddle/fluid/operators/distributed/grpc_serde.h"
#include "paddle/fluid/operators/distributed/request_handler.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
......@@ -26,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/operators/distributed/grpc_variable_response.h"
#include "paddle/fluid/operators/distributed/proto_encoder_helper.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
......@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
......
......@@ -15,12 +15,12 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <sys/time.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/distributed/sendrecvop_utils.h"
#include "paddle/fluid/operators/distributed/variable_response.h"
#include "paddle/fluid/platform/port.h"
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <sys/time.h>
#include <iostream>
#include <string>
#include <vector>
......@@ -24,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <cmath>
#include <cstring>
#include <random>
......@@ -22,6 +21,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/port.h"
inline double GetCurrentUS() {
struct timeval time;
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col.h"
#include <gtest/gtest.h>
#include <sys/time.h>
#include <vector>
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/platform/port.h"
template <typename DeviceContext, typename Place>
void testIm2col() {
......
......@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/jit_kernel.h"
#include <sys/time.h>
#include <cmath> // for exp
#include <cstring> // for memcpy
#include <random>
......@@ -22,6 +21,7 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
......
......@@ -62,7 +62,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_ENFORCE(condition) \
do { \
cudnnStatus_t status = condition; \
auto status = condition; \
if (UNLIKELY(status != CUDNN_STATUS_SUCCESS)) { \
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
......
......@@ -48,13 +48,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#else
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline cudnnStatus_t operator()(Args... args) { \
return ::__name(args...); \
} \
}; \
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline auto operator()(Args... args) { \
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
#endif
......
......@@ -19,7 +19,10 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_double(fraction_of_gpu_memory_to_use, 0.92,
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
DEFINE_double(fraction_of_gpu_memory_to_use, 0.5,
"Allocate a trunk of gpu memory that is this fraction of the "
"total gpu memory size. Future memory usage will be allocated "
"from the trunk. If the trunk doesn't have enough gpu memory, "
......
......@@ -18,7 +18,7 @@
#include <cuda_runtime.h>
#include <functional>
#include <memory>
#include "ThreadPool.h"
#include <ThreadPool.h>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#ifndef HL_WARPCTC_WRAP_H_
#define HL_WARPCTC_WRAP_H_
#include "ctc.h"
#include "hl_base.h"
......@@ -91,3 +91,4 @@ extern void hl_warpctc_get_workspace_size(const int* cpuLabelLengths,
size_t* bytes);
#endif // HL_WARPCTC_WRAP_H_
#endif
......@@ -132,11 +132,15 @@ inline pid_t gettid() {
uint64_t tid;
pthread_threadid_np(NULL, &tid);
#else
#ifndef _WIN32
#ifndef __NR_gettid
#define __NR_gettid 224
#endif
pid_t tid = syscall(__NR_gettid);
#endif
#else // _WIN32
pid_t tid = _getpid();
#endif // _WIN32
CHECK_NE((int)tid, -1);
return tid;
}
......
......@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <sys/types.h>
#include <map>
#include <mutex>
#include <random>
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#ifndef _WIN32
#include <sys/syscall.h> // for syscall()
#endif
#include <sys/types.h>
#include <algorithm>
#include <cmath>
......@@ -40,6 +42,31 @@ inline int rand_r(unsigned int* seedp) {
}
#endif
#ifdef _WIN32
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
template <typename T>
inline int __builtin_clz(const T& value) {
DWORD leadning_zero = 0;
if (_BitScanReverse(&leadning_zero, value)) {
return static_cast<int>(sizeof(T) * 8 - leadning_zero);
} else {
return static_cast<int>(0);
}
}
inline int __builtin_clzl(const unsigned long& value) {
return __builtin_clz(value);
}
inline int __builtin_clzll(const unsigned long long& value) {
return __builtin_clz(value);
}
#define pid_t int
#endif
/**
* Loop over the elements in a container
* TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach,
......
......@@ -3,8 +3,10 @@
if(WITH_TESTING)
add_library(paddle_test_main STATIC TestMain.cpp)
add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies})
add_library(paddle_test_util STATIC TestUtil.cpp)
add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies})
if(NOT WIN32)
add_library(paddle_test_util STATIC TestUtil.cpp)
add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies})
endif(NOT WIN32)
if(NOT MOBILE_INFERENCE)
cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags)
endif()
......
......@@ -46,8 +46,8 @@ def _is_numpy_(var):
def _is_number_(var):
return isinstance(var, int) or isinstance(var, float) or (isinstance(
var, np.ndarray) and var.shape == (1, ))
return isinstance(var, int) or isinstance(var, np.int64) or isinstance(
var, float) or (isinstance(var, np.ndarray) and var.shape == (1, ))
def _is_number_or_matrix_(var):
......
......@@ -23,9 +23,11 @@ if(NOT WITH_DISTRIBUTE)
LIST(REMOVE_ITEM TEST_OPS test_dist_text_classification)
endif(NOT WITH_DISTRIBUTE)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif()
if(WITH_GPU)
if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif()
endif(WITH_GPU)
list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册