From 15da2f9a0d555edbddacb3e5f4c747f1059602df Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 13 Feb 2019 14:00:31 +0000 Subject: [PATCH] add embseqpool jitkernel refer code, test and benchmark test=develop --- paddle/fluid/operators/jit/benchmark.cc | 36 ++++++++++ paddle/fluid/operators/jit/helper.cc | 1 + paddle/fluid/operators/jit/helper.h | 9 +++ paddle/fluid/operators/jit/kernel_base.h | 66 +++++++++++++------ paddle/fluid/operators/jit/kernel_key.cc | 5 ++ .../fluid/operators/jit/refer/CMakeLists.txt | 1 + paddle/fluid/operators/jit/refer/refer.cc | 2 + paddle/fluid/operators/jit/refer/refer.h | 34 ++++++++++ paddle/fluid/operators/jit/test.cc | 65 ++++++++++++++++++ 9 files changed, 200 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 97ddf223aef..9831b6ef922 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -301,6 +301,37 @@ void BenchSeqPoolKernel() { } } +template +void BenchEmbSeqPoolKernel() { + std::vector pool_types = {jit::SeqPoolType::kSum}; + int64_t tbl_h = 1e4; + for (int tbl_w : {10, 16, 256}) { + Tensor table; + table.Resize({tbl_h, tbl_w}); + RandomVec(tbl_h * tbl_w, table.mutable_data(PlaceType()), -2.f, 2.f); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 10, 16}) { + int64_t out_w = tbl_w * idx_w; + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + Tensor idx, out; + idx.Resize({idx_h, idx_w}); + out.Resize({out_w}); + RandomVec(idx_h * idx_w, + idx.mutable_data(PlaceType()), 0, + tbl_h - 1); + const int64_t* idx_data = idx.data(); + T* o_data = out.mutable_data(PlaceType()); + BenchAllImpls, PlaceType>( + attr, table_data, idx_data, o_data, &attr); + } + } + } + } +} + template void BenchMatMulKernel() { for (int m : {1, 2, 3, 4}) { @@ -376,6 +407,11 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel(); } // seq pool function BENCH_FP32_CPU(kSeqPool) { BenchSeqPoolKernel(); } +// embedding seq pool function +BENCH_FP32_CPU(kEmbSeqPool) { + BenchEmbSeqPoolKernel(); +} + // matmul BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel(); } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index e7292fe2bd8..a7665361328 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -54,6 +54,7 @@ const char* to_string(KernelType kt) { ONE_CASE(kHMax); ONE_CASE(kHSum); ONE_CASE(kSoftmax); + ONE_CASE(kEmbSeqPool); default: PADDLE_THROW("Not support type: %d, or forget to add it.", kt); return "NOT JITKernel"; diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index d5773d65940..07998588a5a 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) { return os; } +inline std::ostream& operator<<(std::ostream& os, + const emb_seq_pool_attr_t& attr) { + os << "table_height[" << attr.table_height << "],table_width[" + << attr.table_width << "],index_height[" << attr.index_height + << "],index_width[" << attr.index_width << "],output_width[" + << attr.out_width << "],pool_type[" << to_string(attr.pool_type) << "]"; + return os; +} + inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) { os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]"; return os; diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 4a8f61146a1..20b6a32bef9 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -13,6 +13,7 @@ * limitations under the License. */ #pragma once +#include #include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" @@ -20,34 +21,35 @@ namespace paddle { namespace operators { namespace jit { -// TODO(TJ): reorder by alphabet typedef enum { kNone = 0, - kVMul = 1, - kVAdd = 2, - kVAddRelu, - kVSub, - kVScal, - kVAddBias, - kVRelu, - kVIdentity, - kVSquare, - kVExp, - kVSigmoid, - kVTanh, - kLSTMCtHt, - kLSTMC1H1, + // sort by alphabet + kCRFDecoding = 1, + kEmbSeqPool = 2, kGRUH1, kGRUHtPart1, kGRUHtPart2, - kCRFDecoding, + kHSum, // horizontal max + kHMax, // horizontal sum + kLSTMCtHt, + kLSTMC1H1, kLayerNorm, + kMatMul, kNCHW16CMulNC, kSeqPool, - kMatMul, - kHSum, // horizontal max - kHMax, // horizontal sum kSoftmax, + kVAdd, + kVAddBias, + kVAddRelu, + kVExp, + kVIdentity, + kVMul, + kVRelu, + kVScal, + kVSigmoid, + kVSquare, + kVSub, + kVTanh, } KernelType; typedef enum { @@ -145,6 +147,32 @@ struct SeqPoolTuples { typedef void (*func_type)(const T*, T*, const seq_pool_attr_t*); }; +typedef struct emb_seq_pool_attr_s { + int64_t table_height, table_width; + int64_t index_height, index_width; + int64_t out_width; + SeqPoolType pool_type; + emb_seq_pool_attr_s() = default; + explicit emb_seq_pool_attr_s(int64_t tbl_height, int64_t tbl_width, + int64_t idx_height, int64_t idx_width, + int64_t output_width, + SeqPoolType seqpool_type = SeqPoolType::kSum) + : table_height(tbl_height), + table_width(tbl_width), + index_height(idx_height), + index_width(idx_width), + out_width(output_width), + pool_type(seqpool_type) {} +} emb_seq_pool_attr_t; + +template +struct EmbSeqPoolTuples { + typedef T data_type; + typedef emb_seq_pool_attr_t attr_type; + typedef void (*func_type)(const T*, const int64_t*, T*, + const emb_seq_pool_attr_t*); +}; + typedef struct matmul_attr_s { int m, n, k; void* packed_weight{nullptr}; diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc index 1e4a8884e78..e659c6d2543 100644 --- a/paddle/fluid/operators/jit/kernel_key.cc +++ b/paddle/fluid/operators/jit/kernel_key.cc @@ -56,6 +56,11 @@ size_t JitCodeKey(const matmul_attr_t& attr) { return (key << shift * 2) + ((static_cast(attr.n)) << shift) + attr.k; } +template <> +size_t JitCodeKey(const emb_seq_pool_attr_t& attr) { + return attr.table_width; +} + } // namespace jit } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index 9f2935828ca..218d801c084 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare) USE_JITKERNEL_REFER(kHSum) USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kSoftmax) +USE_JITKERNEL_REFER(kEmbSeqPool) diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index b8adb40ec7e..7e7dd6960b6 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum); REGISTER_REFER_KERNEL(kSoftmax, Softmax); +REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool); + #undef REGISTER_REFER_KERNEL diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 0c4a985f8e8..fd1193aa41e 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -16,6 +16,7 @@ #include #include +#include #include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" @@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) { } } +// embedding seq pool +// table is a matrix with (tbl_h, tbl_w) +// idx is a matrix with (idx_h, idx_w) +// output is a vector with length tbl_w * idx_w +template +void EmbSeqPool(const T* table, const int64_t* idx, T* out, + const emb_seq_pool_attr_t* attr) { + PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + + auto check_idx_value_valid = [&](int64_t i) { + PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", + idx[i], i); + PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + }; + + for (int64_t w = 0; w != attr->index_width; ++w) { + check_idx_value_valid(w); + std::memcpy(out + w * attr->table_width, table + idx[w] * attr->table_width, + attr->table_width * sizeof(T)); + } + + for (int64_t h = 1; h < attr->index_height; ++h) { + for (int64_t w = 0; w < attr->index_width; ++w) { + int64_t i = h * attr->index_width + w; + check_idx_value_valid(i); + VAdd(table + idx[i] * attr->table_width, out + w * attr->table_width, + out + w * attr->table_width, attr->table_width); + } + } +} + #define DECLARE_REFER_KERNEL(name, tuples) \ template \ class name##Kernel : public ReferKernel> { \ @@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples); DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples); +DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples); + #undef DECLARE_REFER_KERNEL } // namespace refer diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 237e588d35c..c35b6aef232 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -270,6 +270,32 @@ struct TestFuncWithRefer, std::vector, std::vector, } }; +template +struct TestFuncWithRefer, std::vector, + std::vector, std::vector, + typename jit::EmbSeqPoolTuples::attr_type> { + void operator()(const typename jit::EmbSeqPoolTuples::func_type tgt, + const std::vector& table, const std::vector& idx, + const std::vector& oref, + const typename jit::EmbSeqPoolTuples::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(table.size(), + static_cast(attr.table_height * attr.table_width)); + EXPECT_EQ(idx.size(), + static_cast(attr.index_height * attr.index_width)); + EXPECT_EQ(oref.size(), + static_cast(attr.table_width * attr.index_width)); + const T* table_data = table.data(); + const int64_t* idx_data = idx.data(); + const T* oref_data = oref.data(); + int o_w = oref.size(); + std::vector out(o_w); + T* o_data = out.data(); + tgt(table_data, idx_data, o_data, &attr); + ExpectEQ(o_data, oref_data, o_w); + } +}; + template struct TestFuncWithRefer, std::vector, std::vector, std::vector, @@ -587,6 +613,40 @@ void TestSoftmaxKernel() { } } +template +void TestEmbSeqPoolKernel() { + VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); + int64_t tbl_h = 1e4; + std::vector pool_types = { + jit::SeqPoolType::kSum}; // only support sum yet + for (int tbl_w : TestSizes()) { + std::vector table(tbl_h * tbl_w); + RandomVec(tbl_h * tbl_w, table.data(), -2.f, 2.f); + const T* table_data = table.data(); + for (auto type : pool_types) { + for (int idx_w : {1, 2, 10, 16}) { + for (int idx_h : {1, 2, 10, 16}) { + auto ref = jit::GetRefer>(); + EXPECT_TRUE(ref != nullptr); + std::vector idx(idx_h * idx_w); + RandomVec(idx_h * idx_w, idx.data(), 0, tbl_h - 1); + int64_t out_w = tbl_w * idx_w; + std::vector oref(out_w); + const int64_t* idx_data = idx.data(); + T* o_data = oref.data(); + jit::emb_seq_pool_attr_t attr(tbl_h, tbl_w, idx_h, idx_w, out_w, + type); + ref(table_data, idx_data, o_data, &attr); + + TestAllImpls, PlaceType, std::vector, + std::vector, std::vector>(attr, table, idx, + oref, attr); + } + } + } + } +} + template void TestNCHW16CMulNCKernel() { VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); @@ -756,6 +816,11 @@ TEST(JITKernel, kSoftmax) { TestSoftmaxKernel(); } +TEST(JITKernel, kEmbSeqPool) { + TestEmbSeqPoolKernel(); + TestEmbSeqPoolKernel(); +} + TEST(JITKernel, kNCHW16CMulNC) { TestNCHW16CMulNCKernel(); TestNCHW16CMulNCKernel(); -- GitLab