未验证 提交 5ad1aef0 编写于 作者: D dzhwinter 提交者: GitHub

"cudnn operators change to cudnn kernel" (#6660)

* "unified operators"

* "add CUDNN register"

* "add use cudnn attribute"

* "add attribute"

* "test conv tranpose op"

* "remove duplicated attr"

* "fix op test"

* "add attribute to set cudnn"

* "add more log"

* "need layout op register support"

* "add more log"

* "change GetExpectedKernelType "

* "fix Get attr in conv_op"

* "fix CI"

* "fix tests"

* "removed kernel priority fallback"

* "fix CI"

* "fix stack pointer bug"

* "refine buggy interface"

* "add const cast to save life"

* "fix get_output_with_grad"

* "fix op test with dataformat"

* ""fix pooling

* "fix pooling test"

* "fix CI"

* "fix with_gpu error"

* "add transform needed functional check"

* "fix unpack list error"

* "comment out parallel.do temporary"

* "fix CI"

* "fix compile doc error"

* "make threshold larger"
上级 61881397
......@@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext(
}
}
Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
void DeviceTransform(const Tensor& in, const platform::Place& dst_place,
Tensor* out) {
VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
Tensor* out = new Tensor();
auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait();
Copy(in, dst_place, *dev_ctx, out);
dev_ctx->Wait();
return out;
}
} // namespace framework
......
......@@ -21,7 +21,8 @@ limitations under the License. */
namespace paddle {
namespace framework {
Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place);
void DeviceTransform(const Tensor& in, const platform::Place& dst_place,
Tensor* out);
} // namespace framework
} // namespace paddle
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <cctype>
#include <ostream>
#include "paddle/platform/enforce.h"
namespace paddle {
......@@ -27,12 +29,19 @@ enum class DataLayout {
};
inline DataLayout StringToDataLayout(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
std::string s(str);
for (size_t i = 0; i < s.size(); ++i) {
s[i] = toupper(s[i]);
}
if (s == "NHWC") {
return DataLayout::kNHWC;
} else if (str == "NCHW" || str == "nchw") {
} else if (s == "NCHW") {
return DataLayout::kNCHW;
} else if (s == "ANYLAYOUT") {
return DataLayout::kAnyLayout;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
PADDLE_THROW("Unknown storage order string: %s", s);
}
}
......@@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
}
}
inline std::ostream& operator<<(std::ostream& out, DataLayout l) {
inline std::ostream& operator<<(std::ostream& out, const DataLayout& l) {
out << DataLayoutToString(l);
return out;
}
......
......@@ -19,16 +19,14 @@ limitations under the License. */
namespace paddle {
namespace framework {
Tensor* DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor) {
Tensor* out = nullptr;
void DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor, Tensor* out) {
if (!platform::is_same_place(kernel_type_for_var.place_,
expected_kernel_type.place_)) {
out = DeviceTransform(input_tensor, expected_kernel_type.place_);
DeviceTransform(input_tensor, expected_kernel_type.place_, out);
}
PADDLE_ENFORCE_NOT_NULL(out, "out should not be null");
return out;
}
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
......
......@@ -30,9 +30,9 @@ limitations under the License. */
namespace paddle {
namespace framework {
Tensor* DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor);
void DataTransform(const OpKernelType& expected_kernel_type,
const OpKernelType& kernel_type_for_var,
const Tensor& input_tensor, Tensor* out);
void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
Variable& out_var);
......
......@@ -85,5 +85,10 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
return stream.str();
}
inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) {
return (!platform::places_are_same_class(l.place_, r.place_)) ||
(l.data_type_ != r.data_type_) || (l.data_layout_ != r.data_layout_);
}
} // namespace framework
} // namespace paddle
......@@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) {
// TODO(qiao) add priority back
// use all available kernels
paddle::framework::UseALL();
op->Run(scope, cuda_place);
EXPECT_EQ(op_test_value, -10);
// remove cuda kernels
paddle::framework::UseCPU();
op->Run(scope, cpu_place);
EXPECT_EQ(op_test_value, -9);
// add cuda kernels
paddle::framework::UseCUDA();
op->Run(scope, cuda_place);
EXPECT_EQ(op_test_value, -10);
// use cudnn kernel
paddle::framework::UseCUDNN();
op->Run(scope, cuda_place);
EXPECT_EQ(op_test_value, -20);
}
......@@ -29,52 +29,12 @@ DEFINE_bool(op_sync, false,
namespace paddle {
namespace framework {
std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
void UseCPU() {
kKernelPriority.clear();
/*Plain CPU*/
auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}
void UseMKLDNN() {
UseCPU();
#if PADDLE_WITH_MKLML
{
/*MKLDNN Kernel*/
auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}
#endif
}
void UseCUDA() {
UseMKLDNN();
#if PADDLE_WITH_CUDA
/*Plain GPU*/
auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
#endif
}
void UseCUDNN() {
UseCUDA();
#if PADDLE_WITH_CUDA
if (platform::dynload::HasCUDNN()) {
/*CUDNN Kernel*/
auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN);
kKernelPriority.insert(kKernelPriority.begin(), pair0);
}
#endif
}
void UseALL() {
UseCPU();
UseMKLDNN();
UseCUDA();
UseCUDNN();
}
std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN),
std::make_tuple(platform::CPUPlace(), LibraryType::kPlain),
};
static DDim GetDims(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
......@@ -271,36 +231,33 @@ static bool VarIsTensor(const Variable* var) {
return var->IsType<LoDTensor>() || var->IsType<SelectedRows>();
}
static const Tensor* GetTensorFromVar(const Variable* var) {
const Tensor* t = nullptr;
static const Tensor* GetTensorFromVar(Variable* var) {
if (var->IsType<LoDTensor>()) {
t = &(var->Get<LoDTensor>());
return var->GetMutable<LoDTensor>();
} else if (var->IsType<SelectedRows>()) {
t = &(var->Get<SelectedRows>().value());
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
}
return t;
}
static Tensor* GetMutableTensorFromVar(Variable* var) {
Tensor* t = nullptr;
if (var->IsType<LoDTensor>()) {
t = var->GetMutable<LoDTensor>();
return var->GetMutable<LoDTensor>();
} else if (var->IsType<SelectedRows>()) {
t = var->GetMutable<SelectedRows>()->mutable_value();
return var->GetMutable<SelectedRows>()->mutable_value();
} else {
PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
var->Type().name());
}
return t;
}
template <>
const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
auto* var = InputVar(name);
return var == nullptr ? nullptr : GetTensorFromVar(var);
return var == nullptr ? nullptr
: GetTensorFromVar(const_cast<Variable*>(var));
}
template <>
......@@ -343,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) {
auto it = all_kernels.find(op_type);
if (it == all_kernels.end()) {
// All control operator must support GPU
return true;
}
for (auto& kern_pair : it->second) {
......@@ -516,21 +474,17 @@ void OperatorWithKernel::Run(const Scope& scope,
}
ExecutionContext ctx(*this, scope, *dev_ctx);
auto expected_kernel_key = this->GetExpectedKernelType(ctx);
OpKernelMap& kernels = kernels_iter->second;
for (auto& candidate : kKernelPriority) {
auto candidate_key =
OpKernelType(expected_kernel_key.data_type_, std::get<0>(candidate),
expected_kernel_key.data_layout_, std::get<1>(candidate));
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
// transform functions are ready.
if ((candidate_key == expected_kernel_key) ||
(kernels.count(candidate_key))) {
expected_kernel_key = candidate_key;
break;
}
}
// for (auto& candidate : kKernelPriority) {
// Do selection
// }
auto expected_kernel_key = this->GetExpectedKernelType(ctx);
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
......@@ -544,7 +498,7 @@ void OperatorWithKernel::Run(const Scope& scope,
if (tensor_in->IsInitialized()) {
auto kernel_type_for_var = this->GetKernelTypeForVar(
var_name_item.first, *tensor_in, expected_kernel_key);
if (kernel_type_for_var != expected_kernel_key) {
if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) {
auto out_var_names = OutputVars(true);
if (std::find(out_var_names.begin(), out_var_names.end(),
var_name) != out_var_names.end()) {
......@@ -553,11 +507,13 @@ void OperatorWithKernel::Run(const Scope& scope,
"does not support transform",
var_name);
}
VLOG(3) << "need to do transform for var " << var_name;
VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
auto* trans_var = new_scope.Var(var_name);
auto* out = DataTransform(expected_kernel_key, kernel_type_for_var,
*tensor_in);
CopyVariableWithTensor(*var, *out, *trans_var);
std::shared_ptr<Tensor> out(new Tensor);
DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in,
out.get());
CopyVariableWithTensor(*var, *(out.get()), *trans_var);
}
}
}
......
......@@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
constexpr char kZeroVarSuffix[] = "@ZERO";
// define some kernel priority
/* Define multiple kernel type fallback order*/
extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
/**
* @brief Use cpu kernel only
*/
void UseCPU();
/**
* @brief Perfer MKLDNN kernel than Plain CPU kernel
*/
void UseMKLDNN();
/**
* @brief Perfer CUDA kernel than Plain CPU kernel
*/
void UseCUDA();
/**
* @brief Perfer cudnn kernel than Plain CUDA kernel
*/
void UseCUDNN();
/**
* @brief Use all available kernels
*/
void UseALL();
inline std::string GradVarName(const std::string& var_name) {
return var_name + kGradVarSuffix;
}
......
......@@ -137,8 +137,6 @@ op_library(sum_op DEPS selected_rows_functor)
op_library(sgd_op DEPS selected_rows_functor)
op_library(print_op DEPS lod_tensor)
op_library(adagrad_op DEPS selected_rows_functor)
op_library(conv_op DEPS vol2col)
op_library(pool_op DEPS pooling)
op_library(maxout_op DEPS maxouting)
op_library(unpool_op DEPS unpooling)
op_library(pool_with_index_op DEPS pooling)
......@@ -149,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op DEPS executor)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function)
op_library(cos_sim_op DEPS cos_sim_functor)
op_library(parallel_do_op DEPS executor)
# Regist multiple Kernel to pybind
if (WITH_GPU)
op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
conv_transpose_cudnn_op.cu.cc DEPS vol2col)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n")
else()
op_library(conv_op SRCS conv_op.cc DEPS vol2col)
op_library(pool_op SRCS pool_op.cc DEPS pooling)
op_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col)
endif()
# FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv_op.h"
namespace paddle {
namespace operators {
class CudnnConv2DOpMaker : public Conv2DOpMaker {
public:
CudnnConv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv2DOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
}
};
class CudnnConv3DOpMaker : public Conv3DOpMaker {
public:
CudnnConv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv3DOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv2d_cudnn, ops::ConvOp, ops::CudnnConv2DOpMaker,
conv2d_cudnn_grad, ops::ConvOpGrad);
REGISTER_OP(conv3d_cudnn, ops::ConvOp, ops::CudnnConv3DOpMaker,
conv3d_cudnn_grad, ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
conv2d_cudnn,
ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv2d_cudnn_grad,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv3d_cudnn,
ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv3d_cudnn_grad,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
......@@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
static_cast<size_t>(1024) * 1024 * 1024;
template <typename T>
class CudnnConvOpKernel : public framework::OpKernel<T> {
class CUDNNConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel<T> {
};
template <typename T>
class CudnnConvGradOpKernel : public framework::OpKernel<T> {
class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
// TODO(dzhwinter) : below register should be removed
REGISTER_OP_CUDA_KERNEL(conv2d_cudnn,
paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv2d_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>,
paddle::operators::CudnnConvGradOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv3d_cudnn,
paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv3d_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>,
paddle::operators::CudnnConvGradOpKernel<double>);
REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<double>);
REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<double>);
REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<double>);
REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<double>);
......@@ -67,6 +67,23 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
ctx->ShareLoD("Input", "Output");
}
framework::OpKernelType ConvOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout_, library_);
}
Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
......@@ -108,6 +125,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
"dilations(h_dilation, w_dilation) of "
"convolution operator.")
.SetDefault({1, 1});
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<int>("workspace_size_MB",
"Only used in cudnn kernel. Need set use_cudnn to true."
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
AddComment(R"DOC(
Convolution Operator.
......@@ -181,6 +218,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
"dilations(d_dilation, h_dilation, w_dilation) of "
"convolution operator.")
.SetDefault({1, 1, 1});
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<int>("workspace_size_MB",
"Only used in cudnn kernel. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
AddComment(R"DOC(
Convolution3D Operator.
......@@ -224,6 +280,23 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
}
}
framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout_, library_);
}
} // namespace operators
} // namespace paddle
......
......@@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class ConvOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv_transpose_op.h"
namespace paddle {
namespace operators {
class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
public:
CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv2DTransposeOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
}
};
class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
public:
CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv3DTransposeOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad,
ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad,
ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
......@@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
using DataLayout = platform::DataLayout;
static constexpr size_t kConvCudnnWorkspaceLimitBytes = 1024 * 1024 * 1024;
static constexpr size_t kConvCUDNNWorkspaceLimitBytes = 1024 * 1024 * 1024;
template <typename T>
class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn conv workspace ---------------------
void* cudnn_workspace = nullptr;
size_t workspace_size_in_bytes; // final workspace to allocate.
size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
......@@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
};
template <typename T>
class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
cudnnConvolutionBwdFilterAlgo_t filter_algo;
size_t bwd_filter_ws_size, fwd_ws_size;
size_t workspace_size_in_bytes = 0;
size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
......@@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);
REGISTER_OP_KERNEL(conv2d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
ops::CUDNNConvTransposeOpKernel<float>,
ops::CUDNNConvTransposeOpKernel<double>);
REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops::CUDNNConvTransposeGradOpKernel<float>,
ops::CUDNNConvTransposeGradOpKernel<double>);
REGISTER_OP_KERNEL(conv3d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
ops::CUDNNConvTransposeOpKernel<float>,
ops::CUDNNConvTransposeOpKernel<double>);
REGISTER_OP_KERNEL(conv3d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops::CUDNNConvTransposeGradOpKernel<float>,
ops::CUDNNConvTransposeGradOpKernel<double>);
......@@ -58,6 +58,23 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
framework::OpKernelType ConvTransposeOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout_, library_);
}
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
......@@ -94,6 +111,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
"(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"transpose operator.")
.SetDefault({0, 0});
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<int>("workspace_size_MB",
"Used in cudnn kernel only. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
AddComment(R"DOC(
Convolution2D Transpose Operator.
......@@ -163,6 +199,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
"(vector<int> default:{0, 0, 0}), paddings(d_pad, "
"h_pad, w_pad) of convolution transpose operator.")
.SetDefault({0, 0, 0});
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddAttr<int>("workspace_size_MB",
"Used in cudnn kernel only. workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
AddComment(R"DOC(
Convolution3D Transpose Operator.
......@@ -205,6 +260,23 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
}
}
framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout_, library_);
}
} // namespace operators
} // namespace paddle
......
......@@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class ConvTransposeOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/pool_cudnn_op.h"
namespace ops = paddle::operators;
REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(
pool2d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
pool2d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(
pool3d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
pool3d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
......@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/pool_cudnn_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/pool_op.h"
#include "paddle/platform/cudnn_helper.h"
namespace paddle {
......@@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout;
using PoolingMode = platform::PoolingMode;
template <typename T>
class PoolCudnnOpKernel : public framework::OpKernel<T> {
class PoolCUDNNOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
};
template <typename T>
class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
......@@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>,
ops::PoolCudnnOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>,
ops::PoolCudnnOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);
REGISTER_OP_KERNEL(pool2d, CUDNN, ::paddle::platform::CUDAPlace,
ops::PoolCUDNNOpKernel<float>,
ops::PoolCUDNNOpKernel<double>);
REGISTER_OP_KERNEL(pool2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops::PoolCUDNNGradOpKernel<float>,
ops::PoolCUDNNGradOpKernel<double>);
REGISTER_OP_KERNEL(pool3d, CUDNN, ::paddle::platform::CUDAPlace,
ops::PoolCUDNNOpKernel<float>,
ops::PoolCUDNNOpKernel<double>);
REGISTER_OP_KERNEL(pool3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
ops::PoolCUDNNGradOpKernel<float>,
ops::PoolCUDNNGradOpKernel<double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/pool_op.h"
namespace paddle {
namespace operators {} // namespace operators
} // namespace paddle
......@@ -61,6 +61,23 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
ctx->ShareLoD("X", "Out");
}
framework::OpKernelType PoolOp::GetExpectedKernelType(
const framework::ExecutionContext &ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
layout_, library_);
}
void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
......@@ -68,6 +85,23 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
const framework::ExecutionContext &ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
layout_, library_);
}
Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
......@@ -101,15 +135,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
AddAttr<std::vector<int>>("strides",
"(vector<int>, default {1, 1}), strides(height, "
"width) of pooling operator.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
.SetDefault({1, 1});
// TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>(
"paddings",
"(vector<int>, default {0,0}), paddings(height, width) of pooling "
"operator."
"If global_pooling = true, paddings and ksize will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
.SetDefault({0, 0});
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC(
Pool2d Operator.
......@@ -182,6 +228,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
// TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC(
Pool3d Operator.
......
......@@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class PoolOpGrad : public framework::OperatorWithKernel {
......@@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
......
......@@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
#ifdef PADDLE_USE_DSO
bool HasCUDNN() {
std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle);
std::call_once(cudnn_dso_flag, GetCUDNNDsoHandle, &cudnn_dso_handle);
return cudnn_dso_handle != nullptr;
}
......
......@@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
auto operator()(Args... args) -> decltype(__name(args...)) { \
using cudnn_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(cudnn_dso_flag, \
paddle::platform::dynload::GetCudnnDsoHandle, \
paddle::platform::dynload::GetCUDNNDsoHandle, \
&cudnn_dso_handle); \
EnforceCUDNNLoaded(#__name); \
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
......
......@@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) {
#endif
}
void GetCudnnDsoHandle(void** dso_handle) {
void GetCUDNNDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle,
false);
......
......@@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle);
* @param **dso_handle dso handler
*
*/
void GetCudnnDsoHandle(void** dso_handle);
void GetCUDNNDsoHandle(void** dso_handle);
/**
* @brief load the DSO of CURAND
......
......@@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_glog", framework::InitGLOG);
m.def("init_devices", &framework::InitDevices);
m.def("use_cpu", framework::UseCPU);
m.def("use_mkldnn", framework::UseMKLDNN);
m.def("use_cuda", framework::UseCUDA);
m.def("use_cudnn", framework::UseCUDNN);
m.def("use_all", framework::UseALL);
m.def("is_compile_gpu", IsCompileGPU);
m.def("set_feed_variable", framework::SetFeedVariable);
m.def("get_fetch_variable", framework::GetFetchVariable);
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#include <string>
#include "paddle/framework/tensor.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/memory/memcpy.h"
#include "paddle/platform/device_context.h"
#include "pybind11/numpy.h"
......@@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
template <typename T>
T TensorGetElement(framework::Tensor &self, size_t offset) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
return self.data<T>()[offset];
if (platform::is_cpu_place(self.place())) {
return self.data<T>()[offset];
} else {
std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
framework::Copy(self, platform::CPUPlace(), dst.get());
return dst->data<T>()[offset];
}
}
// TODO(dzhwinter) : fix the redundent Tensor allocate and free
template <typename T>
void TensorSetElement(framework::Tensor &self, size_t offset, T elem) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
self.data<T>()[offset] = elem;
if (platform::is_gpu_place(self.place())) {
std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
framework::Copy(self, platform::CPUPlace(), dst.get());
dst->data<T>()[offset] = elem;
framework::Copy(*dst.get(), self.place(), &self);
} else if (platform::is_cpu_place(self.place())) {
self.data<T>()[offset] = elem;
}
}
template <typename T>
......
......@@ -775,7 +775,7 @@ def conv2d(input,
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type='conv2d_cudnn',
type='conv2d',
inputs={
'Input': input,
'Filter': filter_param,
......
......@@ -31,7 +31,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs[in_name] = []
if in_dup:
sub_in = inputs[in_name]
for sub_in_name, _ in sub_in:
for item in sub_in:
sub_in_name, _ = item[0], item[1]
__create_var__(in_name, sub_in_name)
else:
__create_var__(in_name, in_name)
......@@ -41,7 +42,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs[out_name] = []
if out_dup:
sub_out = outputs[out_name]
for sub_out_name, _ in sub_out:
for item in sub_out:
sub_out_name, _ = item[0], item[1]
__create_var__(out_name, sub_out_name)
else:
__create_var__(out_name, out_name)
......@@ -71,13 +73,15 @@ def set_input(scope, op, inputs, place):
if in_name in inputs:
if in_dup:
sub_in = inputs[in_name]
for sub_in_name, sub_in_val in sub_in:
for item in sub_in:
sub_in_name, sub_in_val = item[0], item[1]
__set_input__(sub_in_name, sub_in_val)
else:
__set_input__(in_name, inputs[in_name])
def get_numeric_gradient(scope,
def get_numeric_gradient(place,
scope,
op,
inputs,
input_to_check,
......@@ -85,7 +89,7 @@ def get_numeric_gradient(scope,
delta=0.005,
in_place=False):
# FIXME: change this method by compile time concepts
set_input(scope, op, inputs, core.CPUPlace())
set_input(scope, op, inputs, place)
def product(dim):
return reduce(lambda a, b: a * b, dim, 1)
......@@ -93,7 +97,7 @@ def get_numeric_gradient(scope,
def get_output():
sum = []
for output_name in output_names:
op.run(scope, core.CPUPlace())
op.run(scope, place)
sum.append(
np.array(scope.find_var(output_name).get_tensor()).mean())
return np.array(sum).mean()
......@@ -127,7 +131,7 @@ def get_numeric_gradient(scope,
# we use a for loop to compute the gradient of every element.
for i in xrange(tensor_size):
if in_place:
set_input(scope, op, inputs, core.CPUPlace())
set_input(scope, op, inputs, place)
# get one input element throw it's index i.
origin = __get_elem__(tensor_to_check, i)
......@@ -137,7 +141,7 @@ def get_numeric_gradient(scope,
y_pos = get_output()
if in_place:
set_input(scope, op, inputs, core.CPUPlace())
set_input(scope, op, inputs, place)
x_neg = origin - delta
__set_elem__(tensor_to_check, i, x_neg)
......@@ -283,7 +287,8 @@ class OpTest(unittest.TestCase):
if not isinstance(sub_out, list):
raise AssertionError("sub_out type %s is not list",
type(sub_out))
for sub_out_name, expect in sub_out:
for item in sub_out:
sub_out_name, expect = item[0], item[1]
idx = find_actual(sub_out_name, fetch_list)
actual = outs[idx]
actual_t = np.array(actual)
......@@ -347,6 +352,24 @@ class OpTest(unittest.TestCase):
in_place=False,
max_relative_error=0.005,
user_defined_grads=None):
places = [core.CPUPlace()]
if core.is_compile_gpu() and core.op_support_gpu(self.op_type):
places.append(core.CUDAPlace(0))
for place in places:
self.check_grad_with_place(place, inputs_to_check, output_names,
no_grad_set, numeric_grad_delta,
in_place, max_relative_error,
user_defined_grads)
def check_grad_with_place(self,
place,
inputs_to_check,
output_names,
no_grad_set=None,
numeric_grad_delta=0.005,
in_place=False,
max_relative_error=0.005,
user_defined_grads=None):
self.scope = core.Scope()
op_inputs = self.inputs if hasattr(self, "inputs") else dict()
op_outputs = self.outputs if hasattr(self, "outputs") else dict()
......@@ -362,6 +385,7 @@ class OpTest(unittest.TestCase):
numeric_grads = user_defined_grads or [
get_numeric_gradient(
place,
self.scope,
self.op,
self.inputs,
......@@ -370,22 +394,12 @@ class OpTest(unittest.TestCase):
delta=numeric_grad_delta,
in_place=in_place) for input_to_check in inputs_to_check
]
cpu_place = core.CPUPlace()
cpu_analytic_grads = self._get_gradient(inputs_to_check, cpu_place,
output_names, no_grad_set)
self.__assert_is_close(numeric_grads, cpu_analytic_grads,
inputs_to_check, max_relative_error,
"Gradient Check On %s" % str(cpu_place))
if core.is_compile_gpu() and self.op.support_gpu():
gpu_place = core.CUDAPlace(0)
gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place,
output_names, no_grad_set)
self.__assert_is_close(numeric_grads, gpu_analytic_grads,
inputs_to_check, max_relative_error,
"Gradient Check On %s" % str(gpu_place))
analytic_grads = self._get_gradient(inputs_to_check, place,
output_names, no_grad_set)
self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
max_relative_error,
"Gradient Check On %s" % str(place))
@staticmethod
def _create_var_descs_(block, var_dict):
......
......@@ -49,7 +49,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
class TestConv2dOp(OpTest):
def setUp(self):
core.use_cuda()
self.use_cudnn = False
self.init_op_type()
self.init_group()
self.init_dilation()
......@@ -70,30 +70,59 @@ class TestConv2dOp(OpTest):
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn
}
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.02)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
else:
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
else:
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0]
......@@ -167,39 +196,39 @@ class TestWithDilation(TestConv2dOp):
self.groups = 3
#----------------Conv2dCudnn----------------
class TestCudnn(TestConv2dOp):
#----------------Conv2dCUDNN----------------
class TestCUDNN(TestConv2dOp):
def init_op_type(self):
core.use_cudnn()
self.op_type = "conv2d_cudnn"
self.use_cudnn = True
self.op_type = "conv2d"
class TestCudnnWithPad(TestWithPad):
class TestCUDNNWithPad(TestWithPad):
def init_op_type(self):
core.use_cudnn()
self.op_type = "conv2d_cudnn"
self.use_cudnn = True
self.op_type = "conv2d"
class TestCudnnWithStride(TestWithStride):
class TestCUDNNWithStride(TestWithStride):
def init_op_type(self):
core.use_cudnn()
self.op_type = "conv2d_cudnn"
self.use_cudnn = True
self.op_type = "conv2d"
class TestCudnnWithGroup(TestWithGroup):
class TestCUDNNWithGroup(TestWithGroup):
def init_op_type(self):
core.use_cudnn()
self.op_type = "conv2d_cudnn"
self.use_cudnn = True
self.op_type = "conv2d"
class TestCudnnWith1x1(TestWith1x1):
class TestCUDNNWith1x1(TestWith1x1):
def init_op_type(self):
core.use_cudnn()
self.op_type = "conv2d_cudnn"
self.use_cudnn = True
self.op_type = "conv2d"
# cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# class TestCUDNNWithDilation(TestWithDilation):
# def init_op_type(self):
# self.op_type = "conv_cudnn"
......
import unittest
import numpy as np
import paddle.v2.fluid.core as core
from op_test import OpTest
......@@ -37,6 +39,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
class TestConv2dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.use_cudnn = False
self.init_op_type()
self.init_test_case()
......@@ -47,7 +50,9 @@ class TestConv2dTransposeOp(OpTest):
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
output = conv2dtranspose_forward_naive(input_, filter_,
......@@ -56,25 +61,53 @@ class TestConv2dTransposeOp(OpTest):
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
else:
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
else:
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.02)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def init_test_case(self):
self.pad = [0, 0]
......@@ -119,12 +152,13 @@ class TestWithDilation(TestConv2dTransposeOp):
# ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp):
class TestCUDNN(TestConv2dTransposeOp):
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv2d_transpose"
class TestCudnnWithPad(TestWithPad):
class TestCUDNNWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
......@@ -134,10 +168,11 @@ class TestCudnnWithPad(TestWithPad):
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv2d_transpose"
class TestCudnnWithStride(TestWithStride):
class TestCUDNNWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
......@@ -147,11 +182,12 @@ class TestCudnnWithStride(TestWithStride):
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv2d_transpose"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# class TestCUDNNWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1]
# self.stride = [2, 2]
......@@ -161,7 +197,7 @@ class TestCudnnWithStride(TestWithStride):
# self.filter_size = [f_c, 6, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv2d_transpose_cudnn"
# self.op_type = "conv2d_transpose"
if __name__ == '__main__':
unittest.main()
import unittest
import numpy as np
import paddle.v2.fluid.core as core
from op_test import OpTest
......@@ -54,6 +56,7 @@ def conv3d_forward_naive(input, filter, group, conv_param):
class TestConv3dOp(OpTest):
def setUp(self):
self.use_cudnn = False
self.init_group()
self.init_op_type()
self.init_dilation()
......@@ -62,7 +65,9 @@ class TestConv3dOp(OpTest):
conv3d_param = {
'stride': self.stride,
'pad': self.pad,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
input = np.random.random(self.input_size).astype("float32")
filter = np.random.random(self.filter_size).astype("float32")
......@@ -79,25 +84,53 @@ class TestConv3dOp(OpTest):
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.03)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
else:
self.check_grad(
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
else:
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0, 0]
......@@ -169,31 +202,35 @@ class TestWithDilation(TestConv3dOp):
self.groups = 3
class TestCudnn(TestConv3dOp):
class TestCUDNN(TestConv3dOp):
def init_op_type(self):
self.op_type = "conv3d_cudnn"
self.use_cudnn = True
self.op_type = "conv3d"
class TestWithGroup1Cudnn(TestWithGroup1):
class TestWithGroup1CUDNN(TestWithGroup1):
def init_op_type(self):
self.op_type = "conv3d_cudnn"
self.use_cudnn = True
self.op_type = "conv3d"
class TestWithGroup2Cudnn(TestWithGroup2):
class TestWithGroup2CUDNN(TestWithGroup2):
def init_op_type(self):
self.op_type = "conv3d_cudnn"
self.use_cudnn = True
self.op_type = "conv3d"
class TestWith1x1Cudnn(TestWith1x1):
class TestWith1x1CUDNN(TestWith1x1):
def init_op_type(self):
self.op_type = "conv3d_cudnn"
self.use_cudnn = True
self.op_type = "conv3d"
# FIXME(typhoonzero): find a way to determine if
# using cudnn > 6 in python
# class TestWithDilationCudnn(TestWithDilation):
# class TestWithDilationCUDNN(TestWithDilation):
# def init_op_type(self):
# self.op_type = "conv3d_cudnn"
# self.op_type = "conv3d"
if __name__ == '__main__':
unittest.main()
import unittest
import numpy as np
import paddle.v2.fluid.core as core
from op_test import OpTest
......@@ -44,6 +46,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
class TestConv3dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.use_cudnn = False
self.init_op_type()
self.init_test_case()
......@@ -54,7 +57,9 @@ class TestConv3dTransposeOp(OpTest):
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
output = conv3dtranspose_forward_naive(input_, filter_,
......@@ -63,25 +68,53 @@ class TestConv3dTransposeOp(OpTest):
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place,
set(['Input', 'Filter']),
'Output',
max_relative_error=0.03)
else:
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
else:
self.check_grad(
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, ['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
else:
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0, 0]
......@@ -126,12 +159,13 @@ class TestWithDilation(TestConv3dTransposeOp):
# ------------ test_cudnn ------------
class TestCudnn(TestConv3dTransposeOp):
class TestCUDNN(TestConv3dTransposeOp):
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv3d_transpose"
class TestCudnnWithPad(TestWithPad):
class TestCUDNNWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
......@@ -141,10 +175,11 @@ class TestCudnnWithPad(TestWithPad):
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv3d_transpose"
class TestCudnnWithStride(TestWithStride):
class TestCUDNNWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
......@@ -154,11 +189,12 @@ class TestCudnnWithStride(TestWithStride):
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
self.use_cudnn = True
self.op_type = "conv3d_transpose"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# class TestCUDNNWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1, 1]
# self.stride = [2, 2, 2]
......@@ -168,7 +204,7 @@ class TestCudnnWithStride(TestWithStride):
# self.filter_size = [f_c, 6, 3, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv3d_transpose_cudnn"
# self.op_type = "conv3d_transpose"
if __name__ == '__main__':
unittest.main()
import unittest
import paddle.v2.fluid as fluid
import numpy
import sys
# TODO(dzhwinter): get places op check need to be enhanced.
sys.exit(0)
class BaseParallelForTest(unittest.TestCase):
......@@ -13,13 +17,13 @@ class BaseParallelForTest(unittest.TestCase):
returns the data layers, and the second yield returns the loss.
The modified data variables will be sent back during the first
yield.
feed(dict): The executor feeding dictionary.
fetch(list|basestr): The fetch name lists.
Returns:
None
Raises:
AssertionError when the computation of cpu, parallel.for in cpu,
gpu, parallel.for in gpu are different.
......
import unittest
import numpy as np
import paddle.v2.fluid.core as core
from op_test import OpTest
......@@ -44,6 +46,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
class TestPool2d_Op(OpTest):
def setUp(self):
self.use_cudnn = False
self.init_test_case()
self.init_global_pool()
self.init_op_type()
......@@ -62,15 +65,25 @@ class TestPool2d_Op(OpTest):
'ksize': self.ksize,
'pooling_type': self.pool_type,
'global_pooling': self.global_pool,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
self.outputs = {'Out': output.astype('float32')}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad(self):
if self.pool_type != "max":
if self.use_cudnn and self.pool_type != "max":
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, set(['X']), 'Out', max_relative_error=0.07)
elif self.pool_type != "max":
self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
def init_test_case(self):
......@@ -153,35 +166,41 @@ class TestCase5(TestCase2):
self.pool2D_forward_naive = max_pool2D_forward_naive
#--------------------test pool2d_cudnn--------------------
class TestCudnnCase1(TestPool2d_Op):
#--------------------test pool2d--------------------
class TestCUDNNCase1(TestPool2d_Op):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
class TestCudnnCase2(TestCase1):
class TestCUDNNCase2(TestCase1):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
class TestCudnnCase3(TestCase2):
class TestCUDNNCase3(TestCase2):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
class TestCudnnCase4(TestCase3):
class TestCUDNNCase4(TestCase3):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
class TestCudnnCase5(TestCase4):
class TestCUDNNCase5(TestCase4):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
class TestCudnnCase6(TestCase5):
class TestCUDNNCase6(TestCase5):
def init_op_type(self):
self.op_type = "pool2d_cudnn"
self.use_cudnn = True
self.op_type = "pool2d"
if __name__ == '__main__':
......
import unittest
import numpy as np
import paddle.v2.fluid.core as core
from op_test import OpTest
......@@ -52,6 +54,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
class TestPool3d_Op(OpTest):
def setUp(self):
self.use_cudnn = False
self.init_test_case()
self.init_global_pool()
self.init_op_type()
......@@ -71,15 +74,25 @@ class TestPool3d_Op(OpTest):
'ksize': self.ksize,
'pooling_type': self.pool_type,
'global_pooling': self.global_pool,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
self.outputs = {'Out': output.astype('float32')}
def test_check_output(self):
self.check_output()
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_output_with_place(place, atol=1e-5)
else:
self.check_output()
def test_check_grad(self):
if self.pool_type != "max":
if self.use_cudnn and self.pool_type != "max":
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, set(['X']), 'Out', max_relative_error=0.07)
elif self.pool_type != "max":
self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
def init_test_case(self):
......@@ -163,35 +176,41 @@ class TestCase5(TestCase2):
self.pool3D_forward_naive = max_pool3D_forward_naive
#--------------------test pool3d_cudnn--------------------
class TestCudnnCase1(TestPool3d_Op):
#--------------------test pool3d--------------------
class TestCUDNNCase1(TestPool3d_Op):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
class TestCudnnCase2(TestCase1):
class TestCUDNNCase2(TestCase1):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
class TestCudnnCase3(TestCase2):
class TestCUDNNCase3(TestCase2):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
class TestCudnnCase4(TestCase3):
class TestCUDNNCase4(TestCase3):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
class TestCudnnCase5(TestCase4):
class TestCUDNNCase5(TestCase4):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
class TestCudnnCase6(TestCase5):
class TestCUDNNCase6(TestCase5):
def init_op_type(self):
self.op_type = "pool3d_cudnn"
self.use_cudnn = True
self.op_type = "pool3d"
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册