提交 e9216e82 编写于 作者: T tensor-tang

add refer vscal, vaddbias and test and benchmark

上级 a3703888
......@@ -37,10 +37,12 @@ PaddlePaddle/Paddle/paddle/fluid/
## 测试
- 逻辑测试
所有实现都要与refer的code对比,需要满足精度要求
所有实现都要与refer的code对比,需要满足精度要求, 包括float和double的数据类型
- 性能测试
所有实现的性能对比,并且与最终的`jit::Get`方法对比,该方法拿到的性能需要是最好的。
# 如何添加新的算子
-`KernelType` 中添加 `your_key`
- 实现Reference 的逻辑,每个jitkernel的Reference 实现是必须的。不要依赖任何第三方库。并在`refer/CmakeLists.txt``USE_JITKERNEL_REFER(your_key)`
-`KernelType` 中添加 `your_key` .
- 实现Reference 的逻辑,每个jitkernel的Reference 实现是必须的。不要依赖任何第三方库。并在`refer/CmakeLists.txt``USE_JITKERNEL_REFER(your_key)`.
- 必要时可以添加新的`KernelTuples`,可以参考`XYZNTuples`.
......@@ -53,9 +53,9 @@ std::vector<int> TestSizes() {
// return this function avg time
template <typename T, typename KernelTuples>
double BenchTartgetFunc(const typename KernelTuples::func_type tgt,
const std::vector<T>& x, const std::vector<T>& y,
std::vector<T>& z) { // NOLINT
double BenchXYZNFunc(const typename KernelTuples::func_type tgt,
const std::vector<T>& x, const std::vector<T>& y,
std::vector<T>& z) { // NOLINT
const T* x_data = x.data();
const T* y_data = y.data();
const int d = z.size();
......@@ -83,14 +83,14 @@ void BenchXYZNKernel() {
// refer
auto refer = jit::GetRefer<KT, jit::XYZNTuples<T>>();
if (refer) {
auto res = BenchTartgetFunc<T, jit::XYZNTuples<T>>(refer, x, y, z);
auto res = BenchXYZNFunc<T, jit::XYZNTuples<T>>(refer, x, y, z);
infos.push_back(std::make_pair("Refer", res));
}
// test jitcode
auto jitcode = jit::GetJitCode<KT, jit::XYZNTuples<T>, PlaceType>(d);
if (jitcode) {
auto res = BenchTartgetFunc<T, jit::XYZNTuples<T>>(jitcode, x, y, z);
auto res = BenchXYZNFunc<T, jit::XYZNTuples<T>>(jitcode, x, y, z);
infos.push_back(std::make_pair("JitCode", res));
}
......@@ -105,7 +105,7 @@ void BenchXYZNKernel() {
impl.get());
if (i && i->UseMe(d)) {
auto more = i->GetFunc();
auto res = BenchTartgetFunc<T, jit::XYZNTuples<T>>(more, x, y, z);
auto res = BenchXYZNFunc<T, jit::XYZNTuples<T>>(more, x, y, z);
infos.push_back(std::make_pair("More", res));
}
}
......@@ -116,7 +116,7 @@ void BenchXYZNKernel() {
if (!tgt) {
LOG(ERROR) << "Target can not be empty!";
}
auto res = BenchTartgetFunc<T, jit::XYZNTuples<T>>(tgt, x, y, z);
auto res = BenchXYZNFunc<T, jit::XYZNTuples<T>>(tgt, x, y, z);
infos.push_back(std::make_pair("Target", res));
// print
......@@ -129,6 +129,78 @@ void BenchXYZNKernel() {
}
}
// return this function avg time
template <typename T, typename KernelTuples>
double BenchAXYNFunc(const typename KernelTuples::func_type tgt, const T a,
const std::vector<T>& x,
std::vector<T>& y) { // NOLINT
const T* x_data = x.data();
T* y_data = y.data();
const int d = y.size();
for (int i = 0; i < FLAGS_burning; ++i) {
tgt(&a, x_data, y_data, d);
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeat; ++i) {
tgt(&a, x_data, y_data, d);
}
auto end = GetCurrentUS();
return (end - start) / FLAGS_repeat;
}
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void BenchAXYNKernel() {
namespace jit = paddle::operators::jit;
for (int d : TestSizes()) {
std::vector<std::pair<std::string, double>> infos;
const T a = static_cast<T>(3);
std::vector<T> x(d), y(d);
RandomVec<T>(d, x.data());
// test refer
auto refer = jit::GetRefer<KT, jit::AXYNTuples<T>>();
if (refer) {
auto res = BenchAXYNFunc<T, jit::AXYNTuples<T>>(refer, a, x, y);
infos.push_back(std::make_pair("Refer", res));
}
// test jitcode
auto jitcode = jit::GetJitCode<KT, jit::AXYNTuples<T>, PlaceType>(d);
if (jitcode) {
auto res = BenchAXYNFunc<T, jit::AXYNTuples<T>>(jitcode, a, x, y);
infos.push_back(std::make_pair("JitCode", res));
}
// test all impls in more
jit::KernelKey kkey(KT, PlaceType());
auto& pool = jit::KernelPool().Instance().AllKernels();
auto iter = pool.find(kkey);
if (iter != pool.end()) {
auto& impls = iter->second;
for (auto& impl : impls) {
auto i = dynamic_cast<const jit::KernelImpl<jit::AXYNTuples<T>>*>(
impl.get());
if (i && i->UseMe(d)) {
auto more = i->GetFunc();
auto res = BenchAXYNFunc<T, jit::AXYNTuples<T>>(more, a, x, y);
infos.push_back(std::make_pair("More", res));
}
}
}
// Test result from Get function
auto tgt = jit::Get<KT, jit::AXYNTuples<T>, PlaceType>(d);
if (!tgt) {
LOG(ERROR) << "Target can not be empty!";
}
auto res = BenchAXYNFunc<T, jit::AXYNTuples<T>>(tgt, a, x, y);
infos.push_back(std::make_pair("Target", res));
// print
std::ostringstream loginfos;
loginfos << "Kernel Type: " << jit::to_string(KT) << ", size " << d << ": ";
for (auto pair : infos) {
loginfos << pair.first << " takes " << pair.second << " us; ";
}
LOG(INFO) << loginfos.str();
}
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// Options:
......@@ -147,4 +219,7 @@ int main(int argc, char* argv[]) {
BenchXYZNKernel<jit::vadd, T, PlaceType>();
BenchXYZNKernel<jit::vaddrelu, T, PlaceType>();
BenchXYZNKernel<jit::vsub, T, PlaceType>();
BenchAXYNKernel<jit::vscal, T, PlaceType>();
BenchAXYNKernel<jit::vaddbias, T, PlaceType>();
}
......@@ -13,6 +13,7 @@
* limitations under the License. */
#include "paddle/fluid/operators/jit/helper.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
......@@ -32,7 +33,10 @@ const char* to_string(KernelType kt) {
return "vscal";
case vexp:
return "vexp";
case vaddbias:
return "vaddbias";
default:
PADDLE_THROW("Not support type: %d", kt);
return "NOT JITKernel";
}
return nullptr;
......
......@@ -19,7 +19,15 @@ namespace paddle {
namespace operators {
namespace jit {
typedef enum { vmul = 0, vadd = 1, vaddrelu, vsub, vscal, vexp } KernelType;
typedef enum {
vmul = 0,
vadd = 1,
vaddrelu,
vsub,
vscal,
vaddbias,
vexp
} KernelType;
template <typename T>
struct XYZNTuples {
......@@ -28,6 +36,9 @@ struct XYZNTuples {
typedef void (*func_type)(const T*, const T*, T*, int);
};
template <typename T>
struct AXYNTuples : public XYZNTuples<T> {};
// Just for adding to kernel pool without template
class Kernel {
public:
......
......@@ -8,3 +8,8 @@ endfunction()
# use refer kernel by name
USE_JITKERNEL_REFER(vmul)
USE_JITKERNEL_REFER(vadd)
USE_JITKERNEL_REFER(vaddrelu)
USE_JITKERNEL_REFER(vsub)
USE_JITKERNEL_REFER(vscal)
USE_JITKERNEL_REFER(vaddbias)
......@@ -26,4 +26,7 @@ REGISTER_REFER_KERNEL(vadd, VAdd);
REGISTER_REFER_KERNEL(vaddrelu, VAddRelu);
REGISTER_REFER_KERNEL(vsub, VSub);
REGISTER_REFER_KERNEL(vscal, VScal);
REGISTER_REFER_KERNEL(vaddbias, VAddBias);
#undef REGISTER_REFER_KERNEL
......@@ -59,6 +59,13 @@ void VScal(const T* a, const T* x, T* y, int n) {
}
}
template <typename T>
void VAddBias(const T* a, const T* x, T* y, int n) {
for (int i = 0; i < n; ++i) {
y[i] = a[0] + x[i];
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \
......@@ -66,11 +73,16 @@ void VScal(const T* a, const T* x, T* y, int n) {
name##Kernel() { this->func = name<T>; } \
}
// const T* x, const T* y, T* z, int n
DECLARE_REFER_KERNEL(VMul, XYZNTuples);
DECLARE_REFER_KERNEL(VAdd, XYZNTuples);
DECLARE_REFER_KERNEL(VAddRelu, XYZNTuples);
DECLARE_REFER_KERNEL(VSub, XYZNTuples);
// const T* a, const T* x, T* y, int n
DECLARE_REFER_KERNEL(VScal, AXYNTuples);
DECLARE_REFER_KERNEL(VAddBias, AXYNTuples);
#undef DECLARE_REFER_KERNEL
} // namespace refer
......
......@@ -12,7 +12,6 @@
* See the License for the specific language governing permissions and
* limitations under the License. */
#include <cstring> // for memcpy
#include <random>
#include <string>
#include <vector>
......@@ -59,9 +58,9 @@ std::vector<int> TestSizes() {
}
template <typename T, typename KernelTuples>
void TestTartgetFunc(const typename KernelTuples::func_type tgt,
const std::vector<T>& x, const std::vector<T>& y,
const std::vector<T>& zref) {
void TestXYZNFunc(const typename KernelTuples::func_type tgt,
const std::vector<T>& x, const std::vector<T>& y,
const std::vector<T>& zref) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(zref.size(), x.size());
EXPECT_EQ(zref.size(), y.size());
......@@ -88,9 +87,8 @@ void TestTartgetFunc(const typename KernelTuples::func_type tgt,
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestXYZNKernel() {
namespace jit = paddle::operators::jit;
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT)
<< ", size: " << d;
auto ref = jit::GetRefer<KT, jit::XYZNTuples<T>>();
EXPECT_TRUE(ref != nullptr);
......@@ -119,7 +117,7 @@ void TestXYZNKernel() {
auto jitcode = jit::GetJitCode<KT, jit::XYZNTuples<T>, PlaceType>(d);
if (jitcode) {
VLOG(10) << "Test Jitcode Kernel, size: " << d;
TestTartgetFunc<T, jit::XYZNTuples<T>>(jitcode, x, y, zref);
TestXYZNFunc<T, jit::XYZNTuples<T>>(jitcode, x, y, zref);
}
// test all impls in more
......@@ -134,14 +132,14 @@ void TestXYZNKernel() {
if (i && i->UseMe(d)) {
auto more = i->GetFunc();
VLOG(10) << "Test More Kernel, size: " << d;
TestTartgetFunc<T, jit::XYZNTuples<T>>(more, x, y, zref);
TestXYZNFunc<T, jit::XYZNTuples<T>>(more, x, y, zref);
}
}
}
// Test result from Get function
VLOG(10) << "Test Get function, size: " << d;
auto tgt = jit::Get<KT, jit::XYZNTuples<T>, PlaceType>(d);
TestTartgetFunc<T, jit::XYZNTuples<T>>(tgt, x, y, zref);
TestXYZNFunc<T, jit::XYZNTuples<T>>(tgt, x, y, zref);
}
}
......@@ -169,4 +167,89 @@ TEST(JITKernel, vsub) {
TestXYZNKernel<jit::vsub, double, paddle::platform::CPUPlace>();
}
TEST(JITKernel, pool) {}
template <typename T, typename KernelTuples>
void TestAXYNFunc(const typename KernelTuples::func_type tgt, const T a,
const std::vector<T>& x, const std::vector<T>& yref) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(yref.size(), x.size());
const T* x_data = x.data();
const T* yref_data = yref.data();
const int d = yref.size();
std::vector<T> ytgt(d);
T* ytgt_data = ytgt.data();
// test normal
tgt(&a, x_data, ytgt_data, d);
ExpectEQ<T>(ytgt_data, yref_data, d);
// test inplace x
std::copy(x.begin(), x.end(), ytgt.begin());
tgt(&a, ytgt_data, ytgt_data, d);
ExpectEQ<T>(ytgt_data, yref_data, d);
}
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestAXYNKernel() {
namespace jit = paddle::operators::jit;
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::AXYNTuples<T>>();
EXPECT_TRUE(ref != nullptr);
const T a = static_cast<T>(3);
std::vector<T> x(d), yref(d);
std::vector<T> xinp(d); // inplace test
RandomVec<T>(d, x.data());
std::copy(x.begin(), x.end(), xinp.begin());
const T* x_data = x.data();
T* yref_data = yref.data();
T* xinp_data = xinp.data();
// test refer code inplace
ref(&a, x_data, yref_data, d);
ref(&a, xinp_data, xinp_data, d);
ExpectEQ<T>(xinp_data, yref_data, d);
// test jitcode
auto jitcode = jit::GetJitCode<KT, jit::AXYNTuples<T>, PlaceType>(d);
if (jitcode) {
VLOG(10) << "Test Jitcode Kernel, size: " << d;
TestAXYNFunc<T, jit::AXYNTuples<T>>(jitcode, a, x, yref);
}
// test all impls in more
jit::KernelKey kkey(KT, PlaceType());
auto& pool = jit::KernelPool().Instance().AllKernels();
auto iter = pool.find(kkey);
if (iter != pool.end()) {
auto& impls = iter->second;
for (auto& impl : impls) {
auto i = dynamic_cast<const jit::KernelImpl<jit::AXYNTuples<T>>*>(
impl.get());
if (i && i->UseMe(d)) {
auto more = i->GetFunc();
VLOG(10) << "Test More Kernel, size: " << d;
TestAXYNFunc<T, jit::AXYNTuples<T>>(more, a, x, yref);
}
}
}
// Test result from Get function
VLOG(10) << "Test Get function, size: " << d;
auto tgt = jit::Get<KT, jit::AXYNTuples<T>, PlaceType>(d);
TestAXYNFunc<T, jit::AXYNTuples<T>>(tgt, a, x, yref);
}
}
TEST(JITKernel, vscal) {
namespace jit = paddle::operators::jit;
TestAXYNKernel<jit::vscal, float, paddle::platform::CPUPlace>();
TestAXYNKernel<jit::vscal, double, paddle::platform::CPUPlace>();
}
TEST(JITKernel, vaddbias) {
namespace jit = paddle::operators::jit;
TestAXYNKernel<jit::vaddbias, float, paddle::platform::CPUPlace>();
TestAXYNKernel<jit::vaddbias, double, paddle::platform::CPUPlace>();
}
TEST(JITKernel, pool) {
// TODO(TJ): add some test
}
......@@ -24,13 +24,6 @@ namespace math {
namespace jitkernel {
namespace refer {
template <typename T>
void VAddBias(const T* a, const T* x, T* y, int n) {
for (int i = 0; i < n; ++i) {
y[i] = a[0] + x[i];
}
}
template <typename T>
void VRelu(const T* x, T* y, int n) {
for (int i = 0; i < n; ++i) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册