提交 85f8dd1c 编写于 作者: D dzhwinter

debug version

上级 e1999538
......@@ -172,7 +172,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if (NOT WIN32) # windows msvc2015 support c++11 natively.
# -std=c++11 -fPIC not recoginize by msvc
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
list(APPEND CUDA_NVCC_FLAGS "-w" "-Xcompiler -fPIC" "-Xcompiler /w")
endif(NOT WIN32)
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
......
......@@ -150,7 +150,7 @@ set(COMMON_FLAGS
"/w") #disable all warnings.
set(GPU_COMMON_FLAGS
"") #disable all warnings
"/w") #disable all warnings
endif(NOT WIN32)
......@@ -177,12 +177,22 @@ endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach()
foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag})
endforeach()
if(MSVC)
if(WIN32)
safe_set_static_flag()
endif(MSVC)
\ No newline at end of file
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/W3")
string(REGEX REPLACE "/W3" "/w" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/W3")
endforeach(flag_var)
endif(WIN32)
......@@ -243,6 +243,7 @@ function(cc_library TARGET_NAME)
# add libxxx.lib prefix in windows
set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
endif(WIN32)
message("flags" ${CMAKE_CXX_FLAGS})
if(cc_library_SRCS)
if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
......@@ -305,7 +306,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog shlwapi openblas)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS}
......@@ -375,7 +376,7 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog shlwapi)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL)
......
......@@ -149,8 +149,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id);
#endif
}
VLOG(3) << "start pool";
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
VLOG(3) << "start RunImpl";
RunImpl(scope, place);
VLOG(3) << place << " " << DebugStringEx(&scope);
}
......@@ -660,12 +662,16 @@ static void CheckTensorNANOrInf(const std::string& name,
void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const {
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
VLOG(3) << "start Infershape";
this->InferShape(&infer_shape_ctx);
VLOG(3) << "Infershape Pass";
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
// check if op[type] has kernel registered.
VLOG(3) << "Start Kernels";
auto& all_op_kernels = AllOpKernels();
VLOG(3) << "Kernel map finish";
auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW(
......
......@@ -20,6 +20,8 @@ limitations under the License. */
#include <tuple>
#include <unordered_map>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h"
......
......@@ -114,7 +114,9 @@ if(WITH_GPU)
if(NOT WIN32)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
endif()
endif()
......
......@@ -186,7 +186,12 @@ void Main(bool use_gpu) {
std::cout << "begin to process data" << std::endl;
// Just a single batch of data.
std::string line;
std::cout << "data : " << std::endl;
std::ifstream file(DATA);
if(!file.is_open()) {
std::cout << "failed open data" << DATA << std::endl;
exit(0);
}
std::getline(file, line);
auto record = ProcessALine(line);
file.close();
......@@ -207,6 +212,7 @@ void Main(bool use_gpu) {
std::cout << "output: " << SummaryTensor(tensor) << std::endl;
// compare with reference result
std::cout << "refer result : " << REFER << std::endl;
CheckOutput(REFER, tensor);
}
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/demo_ci/utils.h"
//#include "paddle/fluid/inference/demo_ci/utils.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA
......@@ -36,6 +36,47 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle {
namespace demo {
static void split(const std::string& str, char sep,
std::vector<std::string>* pieces) {
pieces->clear();
if (str.empty()) {
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}
/*
* Get a summary of a PaddleTensor content.
*/
static std::string SummaryTensor(const PaddleTensor& tensor) {
std::stringstream ss;
int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype);
ss << "data[:10]\t";
switch (tensor.dtype) {
case PaddleDType::INT64: {
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<int64_t*>(tensor.data.data())[i] << " ";
}
break;
}
case PaddleDType::FLOAT32:
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<float*>(tensor.data.data())[i] << " ";
}
break;
}
return ss.str();
}
struct Record {
std::vector<float> data;
......
......@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include "paddle/fluid/operators/conv_op.h"
......@@ -35,6 +38,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of ConvOp should not be null.");
VLOG(3) << "Conv op infershape";
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
......@@ -42,32 +46,51 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int groups = ctx->Attrs().Get<int>("groups");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"Conv intput should be 4-D or 5-D tensor.");
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
"Conv input dimension and filter dimension should be the same.");
VLOG(3) << "Conv op Before check";
in_dims.size() == 4 || in_dims.size() == 5;
//PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
// "Conv intput should be 4-D or 5-D tensor.");
VLOG(3) << "check0";
//PADDLE_ENFORCE_EQ(
// in_dims.size(), filter_dims.size(),
// "Conv input dimension and filter dimension should be the same.");
in_dims.size() == filter_dims.size();
VLOG(3) << "enforce check0";
PADDLE_ENFORCE(
in_dims.size() - strides.size() == 2U,
"Conv input dimension and strides dimension should be consistent.");
VLOG(3) << "check1";
PADDLE_ENFORCE_EQ(
paddings.size(), strides.size(),
"Conv paddings dimension and Conv strides dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
"The number of input channels should be equal to filter "
"channels * groups.");
PADDLE_ENFORCE_EQ(
filter_dims[0] % groups, 0,
"The number of output channels should be divided by groups.");
VLOG(3) << "check2";
//in_dims[1] == filter_dims[1] * groups;
//PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
// "The number of input channels should be equal to filter "
// "channels * groups.");
VLOG(3) << "check3";
//filter_dims[0] % groups == 0 ;
//PADDLE_ENFORCE_EQ(
// filter_dims[0] % groups, 0,
// "The number of output channels should be divided by groups.");
VLOG(3) << "filter" << filter_dims.size();
VLOG(3) << "filter" << filter_dims[0];
VLOG(3) << "check4";
VLOG(3) << "filter" << filter_dims[1];
VLOG(3) << "dims" << in_dims[0];
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
VLOG(3) << "output shape";
for (size_t i = 0; i < strides.size(); ++i) {
VLOG(3) << "check5";
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i],
strides[i]));
VLOG(3) << "check pass";
}
VLOG(3) << "Conv InferShape Pass";
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Output");
}
......
......@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/platform/cudnn_helper.h"
#include <gtest/gtest.h>
......
......@@ -40,18 +40,20 @@ DeviceContextPool::DeviceContextPool(
for (auto& p : places) {
set.insert(p);
}
VLOG(3) << "pool start";
for (auto& p : set) {
if (platform::is_cpu_place(p)) {
#ifdef PADDLE_WITH_MKLDNN
device_contexts_.emplace(
p, PtrType(new MKLDNNDeviceContext(boost::get<CPUPlace>(p))));
#else
VLOG(3) << "cpu context start";
device_contexts_.emplace(
p, PtrType(new CPUDeviceContext(boost::get<CPUPlace>(p))));
#endif
} else if (platform::is_gpu_place(p)) {
#ifdef PADDLE_WITH_CUDA
VLOG(3) << "gpu context start";
device_contexts_.emplace(
p, PtrType(new CUDADeviceContext(boost::get<CUDAPlace>(p))));
#else
......@@ -61,6 +63,7 @@ DeviceContextPool::DeviceContextPool(
#endif
} else if (platform::is_cuda_pinned_place(p)) {
#ifdef PADDLE_WITH_CUDA
VLOG(3) << "gpu pin start";
device_contexts_.emplace(
p,
PtrType(new CUDAPinnedDeviceContext(boost::get<CUDAPinnedPlace>(p))));
......@@ -70,6 +73,7 @@ DeviceContextPool::DeviceContextPool(
"option");
#endif
}
VLOG(3) << "pool finish";
}
}
......@@ -147,18 +151,28 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) {
compute_capability = GetCUDAComputeCapability(place_.device);
multi_process = GetCUDAMultiProcessors(place_.device);
max_threads_per_mp = GetCUDAMaxThreadsPerMultiProcessor(place_.device);
VLOG(3) << "cuda info pass";
PADDLE_ENFORCE(cudaStreamCreate(&stream_));
VLOG(3) << "cuda stream pass";
eigen_stream_.reset(new EigenCudaStreamDevice());
eigen_stream_->Reinitialize(&stream_, place);
eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
if (dynload::HasCUDNN()) {
VLOG(3) << "eigen pass";
if (dynload::HasCUDNN()) {
VLOG(3) << "cudnn start";
PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
VLOG(3) << "cudnn create pass";
PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_));
} else {
cudnn_handle_ = nullptr;
}
VLOG(3) << "cudnn pass";
PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_));
VLOG(3) << "cublas pass";
PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_));
VLOG(3) << "cublas pass";
}
CUDADeviceContext::~CUDADeviceContext() {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#include <iostream>
#include <vector>
#include "glog/logging.h"
......@@ -23,6 +24,7 @@ TEST(Device, Init) {
using paddle::platform::CUDADeviceContext;
using paddle::platform::CUDAPlace;
VLOG(3) << "before Init";
int count = paddle::platform::GetCUDADeviceCount();
for (int i = 0; i < count; i++) {
CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i));
......@@ -30,20 +32,25 @@ TEST(Device, Init) {
ASSERT_NE(nullptr, gpu_device);
delete device_context;
}
VLOG(3) << "eigen pass";
}
TEST(Device, CUDADeviceContext) {
using paddle::platform::CUDADeviceContext;
using paddle::platform::CUDAPlace;
VLOG(3) << "cudnn start";
int count = paddle::platform::GetCUDADeviceCount();
for (int i = 0; i < count; i++) {
CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i));
VLOG(3) << "device context start";
Eigen::GpuDevice* gpu_device = device_context->eigen_device();
ASSERT_NE(nullptr, gpu_device);
cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
VLOG(3) << "cudnn pass";
ASSERT_NE(nullptr, cudnn_handle);
cublasHandle_t cublas_handle = device_context->cublas_handle();
VLOG(3) << "cublas pass";
ASSERT_NE(nullptr, cublas_handle);
ASSERT_NE(nullptr, device_context->stream());
delete device_context;
......@@ -57,7 +64,9 @@ TEST(Device, DeviceContextPool) {
using paddle::platform::CPUPlace;
using paddle::platform::CUDAPlace;
VLOG(3) << "before instance";
DeviceContextPool& pool = DeviceContextPool::Instance();
VLOG(3) << "after instance";
auto cpu_dev_ctx1 = pool.Get(CPUPlace());
auto cpu_dev_ctx2 = pool.Get(CPUPlace());
ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1);
......
......@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
struct DynLoad__##__name { \
template <typename... Args> \
inline cublasStatus_t operator()(Args... args) { \
return __name(args...); \
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <cudnn.h>
#include <mutex> // NOLINT
......@@ -51,7 +54,8 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
struct DynLoad__##__name { \
template <typename... Args> \
inline cudnnStatus_t operator()(Args... args) { \
return __name(args...); \
VLOG(3) << "cudnn call"; \
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
......
......@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
struct DynLoad__##__name { \
template <typename... Args> \
curandStatus_t operator()(Args... args) { \
return __name(args...); \
return ::__name(args...); \
} \
}; \
extern DynLoad__##__name __name
......
......@@ -295,7 +295,7 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
......@@ -309,7 +309,7 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#if !defined(_WIN32)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
......@@ -330,6 +330,13 @@ inline void throw_on_error(T e) {
} \
} while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0)==(__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0)!=(__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0)>(__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0)>=(__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0)<(__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0)<=(__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册