提交 4d33bfbd 编写于 作者: S sweetsky0901

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into cross_channel_norm

...@@ -29,7 +29,7 @@ cc_test(variable_test SRCS variable_test.cc) ...@@ -29,7 +29,7 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc DEPS glog) cc_library(scope SRCS scope.cc DEPS glog)
cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_test(scope_test SRCS scope_test.cc DEPS scope)
cc_library(data_transform SRCS data_transform.cc DEPS tensor framework_proto) cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto)
cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context) cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context)
cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto)
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/framework/data_transform.h" #include "paddle/framework/data_transform.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -23,5 +24,83 @@ DataTransformFnMap& DataTransformFnMap::Instance() { ...@@ -23,5 +24,83 @@ DataTransformFnMap& DataTransformFnMap::Instance() {
return data_transform_map; return data_transform_map;
} }
auto KernelFP32 = OpKernelType(proto::DataType::FP32, platform::CPUPlace(),
DataLayout::kNHWC, LibraryType::kPlain);
auto KernelFP64 = OpKernelType(proto::DataType::FP64, platform::CPUPlace(),
DataLayout::kNHWC, LibraryType::kPlain);
auto KernelNHWC = OpKernelType(proto::DataType::FP64, platform::CPUPlace(),
DataLayout::kNHWC, LibraryType::kPlain);
auto KernelNCHW = OpKernelType(proto::DataType::FP64, platform::CPUPlace(),
DataLayout::kNCHW, LibraryType::kPlain);
void TransDataType(const platform::DeviceContext* ctx,
const KernelTypePair& kernel_pair, const Variable& in,
Variable* out) {
PADDLE_ENFORCE(in.IsType<Tensor>(), "Only Support Tensor transform!.");
PADDLE_ENFORCE(
platform::places_are_same_class(kernel_pair.first.place_,
kernel_pair.second.place_),
"TransDataType Only Support DataType transform on same place!");
auto src = in.Get<Tensor>();
auto* dst = out->GetMutable<Tensor>();
auto dims = src.dims();
dst->Resize(dims);
auto dst_type = kernel_pair.second.data_type_;
auto src_type = kernel_pair.first.data_type_;
switch (src_type) {
case proto::DataType::FP32:
framework::VisitDataType(dst_type, CastDataType<float>(src, dst, ctx));
break;
case proto::DataType::FP64:
framework::VisitDataType(dst_type, CastDataType<double>(src, dst, ctx));
break;
case proto::DataType::INT32:
framework::VisitDataType(dst_type, CastDataType<int>(src, dst, ctx));
break;
case proto::DataType::INT64:
framework::VisitDataType(dst_type, CastDataType<int64_t>(src, dst, ctx));
break;
case proto::DataType::BOOL:
framework::VisitDataType(dst_type, CastDataType<bool>(src, dst, ctx));
break;
default:
PADDLE_THROW("Not support type %d", src_type);
}
}
void TransDataLayout(const platform::DeviceContext* ctx,
const KernelTypePair& kernel_pair, const Variable& in,
Variable* out) {
PADDLE_ENFORCE(in.IsType<Tensor>(), "Only Support Tensor transform!.");
PADDLE_ENFORCE(
platform::places_are_same_class(kernel_pair.first.place_,
kernel_pair.second.place_),
"TransDataType Only Support DataType transform on same place!");
auto src = in.Get<Tensor>();
auto* dst = out->GetMutable<Tensor>();
PADDLE_ENFORCE(arity(src.dims()) == 4, "Input Arity Only Suppport 4!");
dst->Resize(src.dims());
auto place = kernel_pair.second.place_;
CopyFrom(src, place, *ctx, dst);
const std::vector<int> axis = {0, 2, 3, 1};
auto src_type = kernel_pair.first.data_type_;
framework::VisitDataType(src_type, CastDataLayout(src, dst, ctx, axis));
dst->set_layout(kernel_pair.second.data_layout_);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
namespace f = paddle::framework;
REGISTER_DATA_TRANSFORM_FN(f::KernelFP32, f::KernelFP64, f::TransDataType);
REGISTER_DATA_TRANSFORM_FN(f::KernelNHWC, f::KernelNCHW, f::TransDataLayout);
...@@ -21,16 +21,20 @@ limitations under the License. */ ...@@ -21,16 +21,20 @@ limitations under the License. */
#include "paddle/framework/op_kernel_type.h" #include "paddle/framework/op_kernel_type.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/framework/variable.h" #include "paddle/framework/variable.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "paddle/platform/macros.h" #include "paddle/platform/macros.h"
#include "paddle/platform/transform.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using DataTransformFn = std::function<void(const platform::DeviceContext* ctx,
const Variable& in, Variable* out)>;
using KernelTypePair = std::pair<OpKernelType, OpKernelType>; using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
using DataTransformFn =
std::function<void(const platform::DeviceContext*, const KernelTypePair&,
const Variable&, Variable*)>;
struct KernelTypePairHash { struct KernelTypePairHash {
static void HashCombine(const OpKernelType& t, std::size_t* seed) { static void HashCombine(const OpKernelType& t, std::size_t* seed) {
OpKernelType::Hash kernel_type_hasher; OpKernelType::Hash kernel_type_hasher;
...@@ -45,6 +49,65 @@ struct KernelTypePairHash { ...@@ -45,6 +49,65 @@ struct KernelTypePairHash {
} }
}; };
template <typename InType, typename OutType>
struct CastDataTypeFunctor {
HOSTDEVICE inline OutType operator()(InType in) const {
return static_cast<OutType>(in);
}
};
template <typename InType>
struct CastDataType {
CastDataType(const framework::Tensor& in, framework::Tensor* out,
const platform::DeviceContext* ctx)
: in_(in), out_(out), ctx_(ctx) {}
const framework::Tensor in_;
framework::Tensor* out_;
const platform::DeviceContext* ctx_;
template <typename OutType>
void operator()() {
auto place = ctx_->GetPlace();
auto* in_begin = in_.data<InType>();
auto numel = in_.numel();
auto* in_end = in_begin + numel;
auto* out_begin = out_->mutable_data<OutType>(place);
if (platform::is_cpu_place(place)) {
platform::Transform<platform::CPUDeviceContext> trans;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
} else {
// TODO(dzhwinter): enhance CopyFrom CPU<->GPU with different data type?
PADDLE_THROW("Unsupport CPU <-> GPU!");
}
}
};
struct CastDataLayout {
CastDataLayout(const framework::Tensor& in, framework::Tensor* out,
const platform::DeviceContext* ctx,
const std::vector<int>& axis)
: in_(in), out_(out), ctx_(ctx), axis_(axis) {}
const framework::Tensor in_;
framework::Tensor* out_;
const platform::DeviceContext* ctx_;
const std::vector<int> axis_;
template <typename T>
void operator()() {
auto place = ctx_->GetPlace();
if (platform::is_cpu_place(place)) {
operators::math::Transpose<platform::CPUDeviceContext, T, 4> trans4;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
trans4(*context, in_, out_, axis_);
} else {
PADDLE_THROW("Unsupport CPU <-> GPU!");
}
}
};
using DataTransformMap = using DataTransformMap =
std::unordered_map<KernelTypePair, DataTransformFn, KernelTypePairHash>; std::unordered_map<KernelTypePair, DataTransformFn, KernelTypePairHash>;
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/data_transform.h" #include "paddle/framework/data_transform.h"
#include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -31,16 +32,18 @@ using namespace platform; ...@@ -31,16 +32,18 @@ using namespace platform;
* 1111 -> FP64, GPUPlace, kNCHW, kMKLDNN * 1111 -> FP64, GPUPlace, kNCHW, kMKLDNN
*/ */
std::array<proto::DataType, 2> kDataType = { std::array<proto::DataType, 2> kDataType = {proto::DataType::FP32,
{proto::DataType::FP32, proto::DataType::FP64}}; proto::DataType::FP64};
std::array<Place, 2> kPlace = {{CPUPlace(), CUDAPlace(0)}}; std::array<Place, 2> kPlace = {CPUPlace(), CUDAPlace(0)};
std::array<DataLayout, 2> kDataLayout = { std::array<DataLayout, 2> kDataLayout = {
{DataLayout::kNHWC, DataLayout::kNCHW}}; DataLayout::kNHWC, DataLayout::kNCHW,
};
std::array<LibraryType, 2> kLibraryType = { std::array<LibraryType, 2> kLibraryType = {
{LibraryType::kPlain, LibraryType::kMKLDNN}}; LibraryType::kPlain, LibraryType::kMKLDNN,
};
OpKernelType GenFromBit(const std::vector<bool> bits) { OpKernelType GenFromBit(const std::vector<bool> bits) {
return OpKernelType(kDataType[bits[0]], kPlace[bits[1]], kDataLayout[bits[2]], return OpKernelType(kDataType[bits[0]], kPlace[bits[1]], kDataLayout[bits[2]],
...@@ -54,17 +57,20 @@ auto kernel1 = GenFromBit({0, 0, 0, 1}); ...@@ -54,17 +57,20 @@ auto kernel1 = GenFromBit({0, 0, 0, 1});
auto kernel2 = GenFromBit({0, 0, 1, 0}); auto kernel2 = GenFromBit({0, 0, 1, 0});
auto kernel3 = GenFromBit({0, 0, 1, 1}); auto kernel3 = GenFromBit({0, 0, 1, 1});
void TransDataType_t(const platform::DeviceContext* ctx, const Variable& in, void TransDataType_t(const platform::DeviceContext* ctx,
const KernelTypePair& p, const Variable& in,
Variable* out) { Variable* out) {
test_value++; test_value++;
} }
void TransDataLayout_t(const platform::DeviceContext* ctx, const Variable& in, void TransDataLayout_t(const platform::DeviceContext* ctx,
const KernelTypePair& p, const Variable& in,
Variable* out) { Variable* out) {
test_value--; test_value--;
} }
void TransLibraryType_t(const platform::DeviceContext* ctx, const Variable& in, void TransLibraryType_t(const platform::DeviceContext* ctx,
const KernelTypePair& p, const Variable& in,
Variable* out) { Variable* out) {
test_value += 2; test_value += 2;
} }
...@@ -83,17 +89,68 @@ TEST(DataTransform, Register) { ...@@ -83,17 +89,68 @@ TEST(DataTransform, Register) {
using namespace paddle::platform; using namespace paddle::platform;
auto& instance = DataTransformFnMap::Instance(); auto& instance = DataTransformFnMap::Instance();
ASSERT_EQ(instance.Map().size(), 3UL);
DeviceContext* ctx = nullptr;
paddle::framework::Variable in; paddle::framework::Variable in;
paddle::framework::Variable out; paddle::framework::Variable out;
instance.Get(std::make_pair(frw::kernel0, frw::kernel1))(ctx, in, &out); DeviceContext* ctx = new CPUDeviceContext();
auto pair0 = std::make_pair(frw::kernel0, frw::kernel1);
instance.Get(pair0)(ctx, pair0, in, &out);
ASSERT_EQ(test_value, 1); ASSERT_EQ(test_value, 1);
instance.Get(std::make_pair(frw::kernel1, frw::kernel2))(ctx, in, &out); auto pair1 = std::make_pair(frw::kernel1, frw::kernel2);
instance.Get(pair1)(ctx, pair1, in, &out);
ASSERT_EQ(test_value, 0); ASSERT_EQ(test_value, 0);
instance.Get(std::make_pair(frw::kernel0, frw::kernel2))(ctx, in, &out); auto pair3 = std::make_pair(frw::kernel0, frw::kernel2);
instance.Get(pair3)(ctx, pair3, in, &out);
ASSERT_EQ(test_value, 2); ASSERT_EQ(test_value, 2);
} }
TEST(DataTransform, Layout) {
using namespace paddle::framework;
using namespace paddle::platform;
auto& instance = DataTransformFnMap::Instance();
Variable in;
Variable out;
Tensor* src = in.GetMutable<Tensor>();
src->mutable_data<double>(make_ddim({2, 3, 1, 2}), CPUPlace());
src->set_layout(DataLayout::kNHWC);
DeviceContext* ctx = new CPUDeviceContext();
{
auto kernel1 = GenFromBit({1, 0, 0, 0});
auto kernel2 = GenFromBit({1, 0, 1, 0});
auto pair0 = std::make_pair(kernel1, kernel2);
instance.Get(pair0)(ctx, pair0, in, &out);
}
Tensor dst = out.Get<Tensor>();
EXPECT_TRUE(dst.layout() != src->layout());
}
TEST(DataTransform, DataType) {
using namespace paddle::framework;
using namespace paddle::platform;
auto& instance = DataTransformFnMap::Instance();
DeviceContext* ctx = new CPUDeviceContext();
Variable in;
Variable out;
Tensor* src = in.GetMutable<Tensor>();
float* ptr = src->mutable_data<float>(make_ddim({2, 3}), CPUPlace());
for (int i = 0; i < 6; ++i) {
ptr[i] = i / 3;
}
{
auto kernel1 = GenFromBit({0, 0, 0, 0});
auto kernel2 = GenFromBit({1, 0, 0, 0});
auto pair0 = std::make_pair(kernel1, kernel2);
instance.Get(pair0)(ctx, pair0, in, &out);
}
Tensor dst = out.Get<Tensor>();
EXPECT_TRUE(dst.data<double>() != nullptr);
}
...@@ -461,7 +461,7 @@ void OperatorWithKernel::Run(const Scope& scope, ...@@ -461,7 +461,7 @@ void OperatorWithKernel::Run(const Scope& scope,
dev_ctx->Wait(); dev_ctx->Wait();
for (auto var_name : need_trans) { for (auto var_name : need_trans) {
(*trans_fun)(trans_dev_ctx, *(scope.FindVar(var_name)), (*trans_fun)(trans_dev_ctx, kernel_pair, *(scope.FindVar(var_name)),
scope.FindVar(var_name + framework::KernelTypeToString( scope.FindVar(var_name + framework::KernelTypeToString(
expected_kernel_key))); expected_kernel_key)));
} }
......
...@@ -247,7 +247,10 @@ template struct SetConstant<platform::CPUDeviceContext, bool>; ...@@ -247,7 +247,10 @@ template struct SetConstant<platform::CPUDeviceContext, bool>;
#define DEFINE_CPU_TRANS(RANK) \ #define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, float, RANK>; \ template struct Transpose<platform::CPUDeviceContext, float, RANK>; \
template struct Transpose<platform::CPUDeviceContext, double, RANK>; template struct Transpose<platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>;
DEFINE_CPU_TRANS(1); DEFINE_CPU_TRANS(1);
DEFINE_CPU_TRANS(2); DEFINE_CPU_TRANS(2);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册