You need to sign in or sign up before continuing.
提交 85f8dd1c 编写于 作者: D dzhwinter

debug version

上级 e1999538
...@@ -172,7 +172,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF) ...@@ -172,7 +172,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if (NOT WIN32) # windows msvc2015 support c++11 natively. if (NOT WIN32) # windows msvc2015 support c++11 natively.
# -std=c++11 -fPIC not recoginize by msvc # -std=c++11 -fPIC not recoginize by msvc
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") list(APPEND CUDA_NVCC_FLAGS "-w" "-Xcompiler -fPIC" "-Xcompiler /w")
endif(NOT WIN32) endif(NOT WIN32)
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math") list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
......
...@@ -150,7 +150,7 @@ set(COMMON_FLAGS ...@@ -150,7 +150,7 @@ set(COMMON_FLAGS
"/w") #disable all warnings. "/w") #disable all warnings.
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
"") #disable all warnings "/w") #disable all warnings
endif(NOT WIN32) endif(NOT WIN32)
...@@ -177,12 +177,22 @@ endif(UNIX AND NOT APPLE) ...@@ -177,12 +177,22 @@ endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS}) foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach() endforeach()
foreach(flag ${GPU_COMMON_FLAGS}) foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag}) safe_set_nvflag(${flag})
endforeach() endforeach()
if(MSVC) if(WIN32)
safe_set_static_flag() safe_set_static_flag()
endif(MSVC) foreach(flag_var
\ No newline at end of file CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/W3")
string(REGEX REPLACE "/W3" "/w" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/W3")
endforeach(flag_var)
endif(WIN32)
...@@ -243,6 +243,7 @@ function(cc_library TARGET_NAME) ...@@ -243,6 +243,7 @@ function(cc_library TARGET_NAME)
# add libxxx.lib prefix in windows # add libxxx.lib prefix in windows
set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
endif(WIN32) endif(WIN32)
message("flags" ${CMAKE_CXX_FLAGS})
if(cc_library_SRCS) if(cc_library_SRCS)
if(cc_library_SHARED OR cc_library_shared) # build *.so if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
...@@ -305,7 +306,7 @@ function(cc_test TARGET_NAME) ...@@ -305,7 +306,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog shlwapi openblas)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
...@@ -375,7 +376,7 @@ function(nv_test TARGET_NAME) ...@@ -375,7 +376,7 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog shlwapi)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL) if (nv_test_SERIAL)
......
...@@ -149,8 +149,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -149,8 +149,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
} }
VLOG(3) << "start pool";
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place)); platform::RecordEvent record_event(Type(), pool.Get(place));
VLOG(3) << "start RunImpl";
RunImpl(scope, place); RunImpl(scope, place);
VLOG(3) << place << " " << DebugStringEx(&scope); VLOG(3) << place << " " << DebugStringEx(&scope);
} }
...@@ -660,12 +662,16 @@ static void CheckTensorNANOrInf(const std::string& name, ...@@ -660,12 +662,16 @@ static void CheckTensorNANOrInf(const std::string& name,
void OperatorWithKernel::RunImpl(const Scope& scope, void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const { const platform::Place& place) const {
RuntimeInferShapeContext infer_shape_ctx(*this, scope); RuntimeInferShapeContext infer_shape_ctx(*this, scope);
VLOG(3) << "start Infershape";
this->InferShape(&infer_shape_ctx); this->InferShape(&infer_shape_ctx);
VLOG(3) << "Infershape Pass";
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place); auto* dev_ctx = pool.Get(place);
// check if op[type] has kernel registered. // check if op[type] has kernel registered.
VLOG(3) << "Start Kernels";
auto& all_op_kernels = AllOpKernels(); auto& all_op_kernels = AllOpKernels();
VLOG(3) << "Kernel map finish";
auto kernels_iter = all_op_kernels.find(type_); auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) { if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW( PADDLE_THROW(
......
...@@ -20,6 +20,8 @@ limitations under the License. */ ...@@ -20,6 +20,8 @@ limitations under the License. */
#include <tuple> #include <tuple>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG #include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
......
...@@ -114,7 +114,9 @@ if(WITH_GPU) ...@@ -114,7 +114,9 @@ if(WITH_GPU)
if(NOT WIN32) if(NOT WIN32)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
else() else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
endif() endif()
endif() endif()
......
...@@ -186,7 +186,12 @@ void Main(bool use_gpu) { ...@@ -186,7 +186,12 @@ void Main(bool use_gpu) {
std::cout << "begin to process data" << std::endl; std::cout << "begin to process data" << std::endl;
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::cout << "data : " << std::endl;
std::ifstream file(DATA); std::ifstream file(DATA);
if(!file.is_open()) {
std::cout << "failed open data" << DATA << std::endl;
exit(0);
}
std::getline(file, line); std::getline(file, line);
auto record = ProcessALine(line); auto record = ProcessALine(line);
file.close(); file.close();
...@@ -207,6 +212,7 @@ void Main(bool use_gpu) { ...@@ -207,6 +212,7 @@ void Main(bool use_gpu) {
std::cout << "output: " << SummaryTensor(tensor) << std::endl; std::cout << "output: " << SummaryTensor(tensor) << std::endl;
// compare with reference result // compare with reference result
std::cout << "refer result : " << REFER << std::endl;
CheckOutput(REFER, tensor); CheckOutput(REFER, tensor);
} }
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. #include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include "paddle/fluid/inference/demo_ci/utils.h" //#include "paddle/fluid/inference/demo_ci/utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -36,6 +36,47 @@ DEFINE_bool(use_gpu, false, "Whether use gpu."); ...@@ -36,6 +36,47 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle { namespace paddle {
namespace demo { namespace demo {
static void split(const std::string& str, char sep,
std::vector<std::string>* pieces) {
pieces->clear();
if (str.empty()) {
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}
/*
* Get a summary of a PaddleTensor content.
*/
static std::string SummaryTensor(const PaddleTensor& tensor) {
std::stringstream ss;
int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype);
ss << "data[:10]\t";
switch (tensor.dtype) {
case PaddleDType::INT64: {
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<int64_t*>(tensor.data.data())[i] << " ";
}
break;
}
case PaddleDType::FLOAT32:
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<float*>(tensor.data.data())[i] << " ";
}
break;
}
return ss.str();
}
struct Record { struct Record {
std::vector<float> data; std::vector<float> data;
......
...@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/operators/conv_op.h"
...@@ -35,6 +38,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -35,6 +38,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of ConvOp should not be null."); "Output(Output) of ConvOp should not be null.");
VLOG(3) << "Conv op infershape";
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
...@@ -42,32 +46,51 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -42,32 +46,51 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int groups = ctx->Attrs().Get<int>("groups"); int groups = ctx->Attrs().Get<int>("groups");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations"); std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
VLOG(3) << "Conv op Before check";
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, in_dims.size() == 4 || in_dims.size() == 5;
"Conv intput should be 4-D or 5-D tensor."); //PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
PADDLE_ENFORCE_EQ( // "Conv intput should be 4-D or 5-D tensor.");
in_dims.size(), filter_dims.size(), VLOG(3) << "check0";
"Conv input dimension and filter dimension should be the same.");
//PADDLE_ENFORCE_EQ(
// in_dims.size(), filter_dims.size(),
// "Conv input dimension and filter dimension should be the same.");
in_dims.size() == filter_dims.size();
VLOG(3) << "enforce check0";
PADDLE_ENFORCE( PADDLE_ENFORCE(
in_dims.size() - strides.size() == 2U, in_dims.size() - strides.size() == 2U,
"Conv input dimension and strides dimension should be consistent."); "Conv input dimension and strides dimension should be consistent.");
VLOG(3) << "check1";
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
paddings.size(), strides.size(), paddings.size(), strides.size(),
"Conv paddings dimension and Conv strides dimension should be the same."); "Conv paddings dimension and Conv strides dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups, VLOG(3) << "check2";
"The number of input channels should be equal to filter " //in_dims[1] == filter_dims[1] * groups;
"channels * groups."); //PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
PADDLE_ENFORCE_EQ( // "The number of input channels should be equal to filter "
filter_dims[0] % groups, 0, // "channels * groups.");
"The number of output channels should be divided by groups."); VLOG(3) << "check3";
//filter_dims[0] % groups == 0 ;
//PADDLE_ENFORCE_EQ(
// filter_dims[0] % groups, 0,
// "The number of output channels should be divided by groups.");
VLOG(3) << "filter" << filter_dims.size();
VLOG(3) << "filter" << filter_dims[0];
VLOG(3) << "check4";
VLOG(3) << "filter" << filter_dims[1];
VLOG(3) << "dims" << in_dims[0];
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
VLOG(3) << "output shape";
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
VLOG(3) << "check5";
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i], dilations[i], paddings[i],
strides[i])); strides[i]));
VLOG(3) << "check pass";
} }
VLOG(3) << "Conv InferShape Pass";
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Output"); ctx->ShareLoD("Input", "Output");
} }
......
...@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/cudnn_helper.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -40,18 +40,20 @@ DeviceContextPool::DeviceContextPool( ...@@ -40,18 +40,20 @@ DeviceContextPool::DeviceContextPool(
for (auto& p : places) { for (auto& p : places) {
set.insert(p); set.insert(p);
} }
VLOG(3) << "pool start";
for (auto& p : set) { for (auto& p : set) {
if (platform::is_cpu_place(p)) { if (platform::is_cpu_place(p)) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
device_contexts_.emplace( device_contexts_.emplace(
p, PtrType(new MKLDNNDeviceContext(boost::get<CPUPlace>(p)))); p, PtrType(new MKLDNNDeviceContext(boost::get<CPUPlace>(p))));
#else #else
VLOG(3) << "cpu context start";
device_contexts_.emplace( device_contexts_.emplace(
p, PtrType(new CPUDeviceContext(boost::get<CPUPlace>(p)))); p, PtrType(new CPUDeviceContext(boost::get<CPUPlace>(p))));
#endif #endif
} else if (platform::is_gpu_place(p)) { } else if (platform::is_gpu_place(p)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
VLOG(3) << "gpu context start";
device_contexts_.emplace( device_contexts_.emplace(
p, PtrType(new CUDADeviceContext(boost::get<CUDAPlace>(p)))); p, PtrType(new CUDADeviceContext(boost::get<CUDAPlace>(p))));
#else #else
...@@ -61,6 +63,7 @@ DeviceContextPool::DeviceContextPool( ...@@ -61,6 +63,7 @@ DeviceContextPool::DeviceContextPool(
#endif #endif
} else if (platform::is_cuda_pinned_place(p)) { } else if (platform::is_cuda_pinned_place(p)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
VLOG(3) << "gpu pin start";
device_contexts_.emplace( device_contexts_.emplace(
p, p,
PtrType(new CUDAPinnedDeviceContext(boost::get<CUDAPinnedPlace>(p)))); PtrType(new CUDAPinnedDeviceContext(boost::get<CUDAPinnedPlace>(p))));
...@@ -70,6 +73,7 @@ DeviceContextPool::DeviceContextPool( ...@@ -70,6 +73,7 @@ DeviceContextPool::DeviceContextPool(
"option"); "option");
#endif #endif
} }
VLOG(3) << "pool finish";
} }
} }
...@@ -147,18 +151,28 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) { ...@@ -147,18 +151,28 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
compute_capability = GetCUDAComputeCapability(place_.device); compute_capability = GetCUDAComputeCapability(place_.device);
multi_process = GetCUDAMultiProcessors(place_.device); multi_process = GetCUDAMultiProcessors(place_.device);
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device); max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
VLOG(3) << "cuda info pass";
PADDLE_ENFORCE(cudaStreamCreate(&stream_)); PADDLE_ENFORCE(cudaStreamCreate(&stream_));
VLOG(3) << "cuda stream pass";
eigen_stream_.reset(new EigenCudaStreamDevice()); eigen_stream_.reset(new EigenCudaStreamDevice());
eigen_stream_->Reinitialize(&stream_, place); eigen_stream_->Reinitialize(&stream_, place);
eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_)); VLOG(3) << "eigen pass";
if (dynload::HasCUDNN()) { if (dynload::HasCUDNN()) {
VLOG(3) << "cudnn start";
PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
VLOG(3) << "cudnn create pass";
PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_)); PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
} else { } else {
cudnn_handle_ = nullptr; cudnn_handle_ = nullptr;
} }
VLOG(3) << "cudnn pass";
PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
VLOG(3) << "cublas pass";
PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
VLOG(3) << "cublas pass";
} }
CUDADeviceContext::~CUDADeviceContext() { CUDADeviceContext::~CUDADeviceContext() {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include <iostream>
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
...@@ -23,6 +24,7 @@ TEST(Device, Init) { ...@@ -23,6 +24,7 @@ TEST(Device, Init) {
using paddle::platform::CUDADeviceContext; using paddle::platform::CUDADeviceContext;
using paddle::platform::CUDAPlace; using paddle::platform::CUDAPlace;
VLOG(3) << "before Init";
int count = paddle::platform::GetCUDADeviceCount(); int count = paddle::platform::GetCUDADeviceCount();
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i));
...@@ -30,20 +32,25 @@ TEST(Device, Init) { ...@@ -30,20 +32,25 @@ TEST(Device, Init) {
ASSERT_NE(nullptr, gpu_device); ASSERT_NE(nullptr, gpu_device);
delete device_context; delete device_context;
} }
VLOG(3) << "eigen pass";
} }
TEST(Device, CUDADeviceContext) { TEST(Device, CUDADeviceContext) {
using paddle::platform::CUDADeviceContext; using paddle::platform::CUDADeviceContext;
using paddle::platform::CUDAPlace; using paddle::platform::CUDAPlace;
VLOG(3) << "cudnn start";
int count = paddle::platform::GetCUDADeviceCount(); int count = paddle::platform::GetCUDADeviceCount();
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i));
VLOG(3) << "device context start";
Eigen::GpuDevice* gpu_device = device_context->eigen_device(); Eigen::GpuDevice* gpu_device = device_context->eigen_device();
ASSERT_NE(nullptr, gpu_device); ASSERT_NE(nullptr, gpu_device);
cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
VLOG(3) << "cudnn pass";
ASSERT_NE(nullptr, cudnn_handle); ASSERT_NE(nullptr, cudnn_handle);
cublasHandle_t cublas_handle = device_context->cublas_handle(); cublasHandle_t cublas_handle = device_context->cublas_handle();
VLOG(3) << "cublas pass";
ASSERT_NE(nullptr, cublas_handle); ASSERT_NE(nullptr, cublas_handle);
ASSERT_NE(nullptr, device_context->stream()); ASSERT_NE(nullptr, device_context->stream());
delete device_context; delete device_context;
...@@ -57,7 +64,9 @@ TEST(Device, DeviceContextPool) { ...@@ -57,7 +64,9 @@ TEST(Device, DeviceContextPool) {
using paddle::platform::CPUPlace; using paddle::platform::CPUPlace;
using paddle::platform::CUDAPlace; using paddle::platform::CUDAPlace;
VLOG(3) << "before instance";
DeviceContextPool& pool = DeviceContextPool::Instance(); DeviceContextPool& pool = DeviceContextPool::Instance();
VLOG(3) << "after instance";
auto cpu_dev_ctx1 = pool.Get(CPUPlace()); auto cpu_dev_ctx1 = pool.Get(CPUPlace());
auto cpu_dev_ctx2 = pool.Get(CPUPlace()); auto cpu_dev_ctx2 = pool.Get(CPUPlace());
ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1); ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1);
......
...@@ -55,7 +55,7 @@ extern void *cublas_dso_handle; ...@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
inline cublasStatus_t operator()(Args... args) { \ inline cublasStatus_t operator()(Args... args) { \
return __name(args...); \ return ::__name(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <cudnn.h> #include <cudnn.h>
#include <mutex> // NOLINT #include <mutex> // NOLINT
...@@ -51,7 +54,8 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -51,7 +54,8 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
inline cudnnStatus_t operator()(Args... args) { \ inline cudnnStatus_t operator()(Args... args) { \
return __name(args...); \ VLOG(3) << "cudnn call"; \
return ::__name(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
......
...@@ -44,7 +44,7 @@ extern void *curand_dso_handle; ...@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
curandStatus_t operator()(Args... args) { \ curandStatus_t operator()(Args... args) { \
return __name(args...); \ return ::__name(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
......
...@@ -295,7 +295,7 @@ inline void throw_on_error(T e) { ...@@ -295,7 +295,7 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example: * extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/ */
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \ #define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \ #define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
...@@ -309,7 +309,7 @@ inline void throw_on_error(T e) { ...@@ -309,7 +309,7 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#if !defined(_WIN32)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ #define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \ do { \
if (UNLIKELY(nullptr == (__VAL))) { \ if (UNLIKELY(nullptr == (__VAL))) { \
...@@ -330,6 +330,13 @@ inline void throw_on_error(T e) { ...@@ -330,6 +330,13 @@ inline void throw_on_error(T e) {
} \ } \
} while (0) } while (0)
#else #else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0)==(__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0)!=(__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0)>(__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0)>=(__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0)<(__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0)<=(__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \ do { \
if (!((__VAL0)__CMP(__VAL1))) { \ if (!((__VAL0)__CMP(__VAL1))) { \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册