提交 564833a5 编写于 作者: L Liangliang He

Merge branch 'refactor_ops_code_style' into 'master'

Refactor ops code style

See merge request !311
...@@ -23,7 +23,7 @@ static void ReluBenchmark( ...@@ -23,7 +23,7 @@ static void ReluBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluBM") OpDefBuilder("Activation", "ReluBM")
...@@ -83,7 +83,7 @@ static void ReluxBenchmark( ...@@ -83,7 +83,7 @@ static void ReluxBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxBM") OpDefBuilder("Activation", "ReluxBM")
...@@ -146,9 +146,9 @@ static void PreluBenchmark( ...@@ -146,9 +146,9 @@ static void PreluBenchmark(
net.AddRandomInput<D, float>("Alpha", {channels}); net.AddRandomInput<D, float>("Alpha", {channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Alpha", "AlphaImage", BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Activation", "PreluBM") OpDefBuilder("Activation", "PreluBM")
...@@ -210,7 +210,7 @@ static void TanhBenchmark( ...@@ -210,7 +210,7 @@ static void TanhBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "TanhBM") OpDefBuilder("Activation", "TanhBM")
...@@ -270,7 +270,7 @@ static void SigmoidBenchmark( ...@@ -270,7 +270,7 @@ static void SigmoidBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "SigmoidBM") OpDefBuilder("Activation", "SigmoidBM")
......
...@@ -21,7 +21,7 @@ void TestSimpleRelu() { ...@@ -21,7 +21,7 @@ void TestSimpleRelu() {
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
...@@ -34,7 +34,7 @@ void TestSimpleRelu() { ...@@ -34,7 +34,7 @@ void TestSimpleRelu() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
...@@ -67,7 +67,7 @@ void TestUnalignedSimpleRelu() { ...@@ -67,7 +67,7 @@ void TestUnalignedSimpleRelu() {
net.AddInputFromArray<D, float>("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5}); net.AddInputFromArray<D, float>("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
...@@ -80,7 +80,7 @@ void TestUnalignedSimpleRelu() { ...@@ -80,7 +80,7 @@ void TestUnalignedSimpleRelu() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluTest") OpDefBuilder("Activation", "ReluTest")
...@@ -116,7 +116,7 @@ void TestSimpleRelux() { ...@@ -116,7 +116,7 @@ void TestSimpleRelux() {
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
...@@ -130,7 +130,7 @@ void TestSimpleRelux() { ...@@ -130,7 +130,7 @@ void TestSimpleRelux() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
...@@ -166,7 +166,7 @@ void TestSimpleReluRelux() { ...@@ -166,7 +166,7 @@ void TestSimpleReluRelux() {
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
...@@ -180,7 +180,7 @@ void TestSimpleReluRelux() { ...@@ -180,7 +180,7 @@ void TestSimpleReluRelux() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "ReluxTest") OpDefBuilder("Activation", "ReluxTest")
...@@ -219,9 +219,9 @@ void TestSimplePrelu() { ...@@ -219,9 +219,9 @@ void TestSimplePrelu() {
net.AddInputFromArray<D, float>("Alpha", {2}, {2.0, 3.0}); net.AddInputFromArray<D, float>("Alpha", {2}, {2.0, 3.0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Alpha", "AlphaImage", BufferToImage<D, float>(&net, "Alpha", "AlphaImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Activation", "PreluTest") OpDefBuilder("Activation", "PreluTest")
...@@ -235,7 +235,7 @@ void TestSimplePrelu() { ...@@ -235,7 +235,7 @@ void TestSimplePrelu() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "PreluTest") OpDefBuilder("Activation", "PreluTest")
...@@ -272,7 +272,7 @@ void TestSimpleTanh() { ...@@ -272,7 +272,7 @@ void TestSimpleTanh() {
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "TanhTest") OpDefBuilder("Activation", "TanhTest")
...@@ -285,7 +285,7 @@ void TestSimpleTanh() { ...@@ -285,7 +285,7 @@ void TestSimpleTanh() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "TanhTest") OpDefBuilder("Activation", "TanhTest")
...@@ -323,7 +323,7 @@ void TestSimpleSigmoid() { ...@@ -323,7 +323,7 @@ void TestSimpleSigmoid() {
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Activation", "SigmoidTest") OpDefBuilder("Activation", "SigmoidTest")
...@@ -336,7 +336,7 @@ void TestSimpleSigmoid() { ...@@ -336,7 +336,7 @@ void TestSimpleSigmoid() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Activation", "SigmoidTest") OpDefBuilder("Activation", "SigmoidTest")
......
...@@ -24,7 +24,7 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { ...@@ -24,7 +24,7 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
for (int i = 0; i < inputs; ++i) { for (int i = 0; i < inputs; ++i) {
BufferToImage<D, T>(net, MakeString("Input", i).c_str(), BufferToImage<D, T>(&net, MakeString("Input", i).c_str(),
MakeString("InputImage", i).c_str(), MakeString("InputImage", i).c_str(),
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} }
......
...@@ -67,7 +67,7 @@ void RandomTest() { ...@@ -67,7 +67,7 @@ void RandomTest() {
static unsigned int seed = time(NULL); static unsigned int seed = time(NULL);
for (int round = 0; round < 10; ++round) { for (int round = 0; round < 10; ++round) {
// generate random input // generate random input
index_t n = 1 + (rand_r(&seed) % 5); index_t n = 1 + (rand_r(&seed) % 5);
index_t h = 1 + (rand_r(&seed) % 100); index_t h = 1 + (rand_r(&seed) % 100);
index_t w = 1 + (rand_r(&seed) % 100); index_t w = 1 + (rand_r(&seed) % 100);
...@@ -94,7 +94,7 @@ void RandomTest() { ...@@ -94,7 +94,7 @@ void RandomTest() {
// run on gpu // run on gpu
for (int i = 0; i < input_num; ++i) { for (int i = 0; i < input_num; ++i) {
BufferToImage<D, half>(net, MakeString("Input", i), BufferToImage<D, half>(&net, MakeString("Input", i),
MakeString("InputImage", i), MakeString("InputImage", i),
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} }
...@@ -110,7 +110,7 @@ void RandomTest() { ...@@ -110,7 +110,7 @@ void RandomTest() {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.1); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.1);
......
...@@ -26,15 +26,15 @@ static void BatchNorm( ...@@ -26,15 +26,15 @@ static void BatchNorm(
net.AddRandomInput<D, T>("Var", {channels}, true); net.AddRandomInput<D, T>("Var", {channels}, true);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Scale", "ScaleImage", BufferToImage<D, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", BufferToImage<D, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage", BufferToImage<D, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", BufferToImage<D, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormBM") OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("InputImage") .Input("InputImage")
......
...@@ -24,15 +24,15 @@ void Simple() { ...@@ -24,15 +24,15 @@ void Simple() {
net.AddInputFromArray<D, float>("Var", {1}, {11.67f}); net.AddInputFromArray<D, float>("Var", {1}, {11.67f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Scale", "ScaleImage", BufferToImage<D, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", BufferToImage<D, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Mean", "MeanImage", BufferToImage<D, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Var", "VarImage", BufferToImage<D, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -48,7 +48,7 @@ void Simple() { ...@@ -48,7 +48,7 @@ void Simple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -112,15 +112,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -112,15 +112,15 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -142,7 +142,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -142,7 +142,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -183,15 +183,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -183,15 +183,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -214,7 +214,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -214,7 +214,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
...@@ -255,15 +255,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -255,15 +255,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Mean", "MeanImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Var", "VarImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -285,7 +285,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -285,7 +285,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -326,15 +326,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -326,15 +326,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Mean", "MeanImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Mean", "MeanImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Var", "VarImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Var", "VarImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BatchNorm", "BatchNormTest") OpDefBuilder("BatchNorm", "BatchNormTest")
...@@ -357,7 +357,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -357,7 +357,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
......
...@@ -17,7 +17,7 @@ static void BMBatchToSpace( ...@@ -17,7 +17,7 @@ static void BMBatchToSpace(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage") .Input("InputImage")
......
...@@ -22,9 +22,9 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) { ...@@ -22,9 +22,9 @@ static void BiasAdd(int iters, int batch, int channels, int height, int width) {
net.AddRandomInput<D, T>("Bias", {channels}, true); net.AddRandomInput<D, T>("Bias", {channels}, true);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddBM") OpDefBuilder("BiasAdd", "BiasAddBM")
.Input("InputImage") .Input("InputImage")
......
...@@ -21,9 +21,9 @@ void BiasAddSimple() { ...@@ -21,9 +21,9 @@ void BiasAddSimple() {
net.AddInputFromArray<D, float>("Bias", {1}, {0.5f}); net.AddInputFromArray<D, float>("Bias", {1}, {0.5f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
...@@ -35,7 +35,7 @@ void BiasAddSimple() { ...@@ -35,7 +35,7 @@ void BiasAddSimple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
...@@ -90,9 +90,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -90,9 +90,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
...@@ -105,7 +105,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { ...@@ -105,7 +105,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -139,9 +139,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -139,9 +139,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("BiasAdd", "BiasAddTest") OpDefBuilder("BiasAdd", "BiasAddTest")
...@@ -154,7 +154,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { ...@@ -154,7 +154,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
......
...@@ -21,7 +21,7 @@ static void ChannelShuffle( ...@@ -21,7 +21,7 @@ static void ChannelShuffle(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
......
...@@ -44,7 +44,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { ...@@ -44,7 +44,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
"Input", {1, 1, 2, 16}, "Input", {1, 1, 2, 16},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
...@@ -57,7 +57,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { ...@@ -57,7 +57,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
// Transfer output // Transfer output
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
// Check // Check
......
...@@ -64,9 +64,9 @@ static void OpenclConcatHelper(int iters, ...@@ -64,9 +64,9 @@ static void OpenclConcatHelper(int iters,
net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0); net.AddRandomInput<DeviceType::OPENCL, float>("Input0", shape0);
net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1); net.AddRandomInput<DeviceType::OPENCL, float>("Input1", shape1);
BufferToImage<DeviceType::OPENCL, T>(net, "Input0", "InputImage0", BufferToImage<DeviceType::OPENCL, T>(&net, "Input0", "InputImage0",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(net, "Input1", "InputImage1", BufferToImage<DeviceType::OPENCL, T>(&net, "Input1", "InputImage1",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Concat", "ConcatBM") OpDefBuilder("Concat", "ConcatBM")
.Input("InputImage0") .Input("InputImage0")
......
...@@ -27,9 +27,9 @@ TEST_F(ConcatOpTest, CPUSimpleHorizon) { ...@@ -27,9 +27,9 @@ TEST_F(ConcatOpTest, CPUSimpleHorizon) {
std::vector<index_t> input_shape = {4, 4}; std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0; std::vector<float> input0;
GenerateRandomRealTypeData(input_shape, input0); GenerateRandomRealTypeData(input_shape, &input0);
std::vector<float> input1; std::vector<float> input1;
GenerateRandomRealTypeData(input_shape, input1); GenerateRandomRealTypeData(input_shape, &input1);
// Add inputs // Add inputs
net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0); net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0);
net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1); net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1);
...@@ -64,9 +64,9 @@ TEST_F(ConcatOpTest, CPUSimpleVertical) { ...@@ -64,9 +64,9 @@ TEST_F(ConcatOpTest, CPUSimpleVertical) {
std::vector<index_t> input_shape = {4, 4}; std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0; std::vector<float> input0;
GenerateRandomRealTypeData(input_shape, input0); GenerateRandomRealTypeData(input_shape, &input0);
std::vector<float> input1; std::vector<float> input1;
GenerateRandomRealTypeData(input_shape, input1); GenerateRandomRealTypeData(input_shape, &input1);
// Add inputs // Add inputs
net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0); net.AddInputFromArray<DeviceType::CPU, float>("Input0", input_shape, input0);
net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1); net.AddInputFromArray<DeviceType::CPU, float>("Input1", input_shape, input1);
...@@ -107,7 +107,7 @@ TEST_F(ConcatOpTest, CPURandom) { ...@@ -107,7 +107,7 @@ TEST_F(ConcatOpTest, CPURandom) {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data; std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim); GenerateRandomIntTypeData<index_t>({dim}, &shape_data, 1, dim);
std::vector<std::vector<index_t>> input_shapes(num_inputs, shape_data); std::vector<std::vector<index_t>> input_shapes(num_inputs, shape_data);
std::vector<std::vector<float>> inputs(num_inputs, std::vector<float>()); std::vector<std::vector<float>> inputs(num_inputs, std::vector<float>());
std::vector<float *> input_ptrs(num_inputs, nullptr); std::vector<float *> input_ptrs(num_inputs, nullptr);
...@@ -115,7 +115,7 @@ TEST_F(ConcatOpTest, CPURandom) { ...@@ -115,7 +115,7 @@ TEST_F(ConcatOpTest, CPURandom) {
for (int i = 0; i < num_inputs; ++i) { for (int i = 0; i < num_inputs; ++i) {
input_shapes[i][axis] = 1 + rand_r(&seed) % dim; input_shapes[i][axis] = 1 + rand_r(&seed) % dim;
concat_axis_size += input_shapes[i][axis]; concat_axis_size += input_shapes[i][axis];
GenerateRandomRealTypeData(input_shapes[i], inputs[i]); GenerateRandomRealTypeData(input_shapes[i], &inputs[i]);
input_ptrs[i] = inputs[i].data(); input_ptrs[i] = inputs[i].data();
net.AddInputFromArray<DeviceType::CPU, float>(MakeString("Input", i), net.AddInputFromArray<DeviceType::CPU, float>(MakeString("Input", i),
input_shapes[i], inputs[i]); input_shapes[i], inputs[i]);
...@@ -157,7 +157,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -157,7 +157,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const std::string image_name = MakeString("InputImage", i); const std::string image_name = MakeString("InputImage", i);
concat_axis_size += shapes[i][axis]; concat_axis_size += shapes[i][axis];
net.AddRandomInput<DeviceType::OPENCL, float>(input_name, shapes[i]); net.AddRandomInput<DeviceType::OPENCL, float>(input_name, shapes[i]);
BufferToImage<DeviceType::OPENCL, T>(net, input_name, image_name, BufferToImage<DeviceType::OPENCL, T>(&net, input_name, image_name,
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} }
...@@ -174,7 +174,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes, ...@@ -174,7 +174,7 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
// Run // Run
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "Output", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
// Check // Check
......
...@@ -36,11 +36,11 @@ static void Conv2d(int iters, ...@@ -36,11 +36,11 @@ static void Conv2d(int iters,
net.AddRandomInput<D, float>("Bias", {output_channels}); net.AddRandomInput<D, float>("Bias", {output_channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -82,30 +82,32 @@ static void Conv2d(int iters, ...@@ -82,30 +82,32 @@ static void Conv2d(int iters,
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is // approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order. // in-order.
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \ #define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \
DEVICE) \ DEVICE) \
static void \ static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\
int iters) { \ ##_##P##_##OC##_##TYPE##_##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ int iters) { \
int64_t pad_h = 0, pad_w = 0; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
if (P == SAME) { \ int64_t pad_h = 0, pad_w = 0; \
pad_h = KH / 2; \ if (P == SAME) { \
pad_w = KW / 2; \ pad_h = KH / 2; \
} \ pad_w = KW / 2; \
int64_t oh = \ } \
(H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ int64_t oh = \
int64_t ow = \ (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \
(W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ int64_t ow = \
const int64_t macc = \ (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \
static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \ const int64_t macc = \
mace::testing::MaccProcessed(macc); \ static_cast<int64_t>(iters) * N * OC * oh * ow * (KH * KW * C + 1); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::MaccProcessed(macc); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::Padding::P, OC); \ Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \
} \ mace::Padding::P, OC); \
BENCHMARK( \ } \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE) BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\
##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ #define BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
......
...@@ -28,11 +28,11 @@ void TestNHWCSimple3x3VALID() { ...@@ -28,11 +28,11 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -48,7 +48,7 @@ void TestNHWCSimple3x3VALID() { ...@@ -48,7 +48,7 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -85,11 +85,11 @@ void TestNHWCSimple3x3SAME() { ...@@ -85,11 +85,11 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -105,7 +105,7 @@ void TestNHWCSimple3x3SAME() { ...@@ -105,7 +105,7 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -154,9 +154,9 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -154,9 +154,9 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -171,7 +171,7 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -171,7 +171,7 @@ void TestNHWCSimple3x3WithoutBias() {
// Run // Run
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -220,11 +220,11 @@ static void TestNHWCCombined3x3() { ...@@ -220,11 +220,11 @@ static void TestNHWCCombined3x3() {
net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, T>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
...@@ -240,7 +240,7 @@ static void TestNHWCCombined3x3() { ...@@ -240,7 +240,7 @@ static void TestNHWCCombined3x3() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
...@@ -293,11 +293,11 @@ void TestConv1x1() { ...@@ -293,11 +293,11 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Filter", "FilterImage", BufferToImage<D, float>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
...@@ -312,7 +312,7 @@ void TestConv1x1() { ...@@ -312,7 +312,7 @@ void TestConv1x1() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Conv2D", "Conv2DTest") OpDefBuilder("Conv2D", "Conv2DTest")
...@@ -384,11 +384,11 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -384,11 +384,11 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -404,7 +404,7 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -404,7 +404,7 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -460,13 +460,13 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -460,13 +460,13 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
std::vector<float> float_input_data; std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data); &float_input_data);
std::vector<float> float_filter_data; std::vector<float> float_filter_data;
GenerateRandomRealTypeData( GenerateRandomRealTypeData(
{kernel_h, kernel_w, output_channels, input_channels}, {kernel_h, kernel_w, output_channels, input_channels},
float_filter_data); &float_filter_data);
std::vector<float> float_bias_data; std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data); GenerateRandomRealTypeData({output_channels}, &float_bias_data);
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Input", {batch, height, width, input_channels}, float_input_data); "Input", {batch, height, width, input_channels}, float_input_data);
...@@ -482,11 +482,11 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -482,11 +482,11 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(net, "Filter", "FilterImage", BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage", BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -502,7 +502,7 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -502,7 +502,7 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
...@@ -606,11 +606,11 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape, ...@@ -606,11 +606,11 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -626,7 +626,7 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape, ...@@ -626,7 +626,7 @@ static void TestDilationConvNxN(const std::vector<index_t> &shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -689,11 +689,11 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, ...@@ -689,11 +689,11 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
...@@ -708,7 +708,7 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, ...@@ -708,7 +708,7 @@ static void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
......
...@@ -35,11 +35,11 @@ static void DepthwiseConv2d(int iters, ...@@ -35,11 +35,11 @@ static void DepthwiseConv2d(int iters,
net.AddRandomInput<D, float>("Bias", {input_channels * multiplier}); net.AddRandomInput<D, float>("Bias", {input_channels * multiplier});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); kernels::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2dTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -77,31 +77,33 @@ static void DepthwiseConv2d(int iters, ...@@ -77,31 +77,33 @@ static void DepthwiseConv2d(int iters,
} }
} }
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \ #define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \
DEVICE) \ DEVICE) \
static void \ static void \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \ BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\
int iters) { \ ##P##_##M##_##TYPE##_##DEVICE( \
const int64_t dilation = 1; \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t dilation = 1; \
int64_t pad_h = 0, pad_w = 0; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
if (P == SAME) { \ int64_t pad_h = 0, pad_w = 0; \
pad_h = KH / 2; \ if (P == SAME) { \
pad_w = KW / 2; \ pad_h = KH / 2; \
} \ pad_w = KW / 2; \
int64_t oh = \ } \
(H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \ int64_t oh = \
int64_t ow = \ (H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \
(W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \ int64_t ow = \
const int64_t macc = \ (W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \
static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \ const int64_t macc = \
mace::testing::MaccProcessed(macc); \ static_cast<int64_t>(iters) * N * C * M * oh * ow * (KH * KW + 1); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::MaccProcessed(macc); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::Padding::P, M); \ DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
} \ mace::Padding::P, M); \
BENCHMARK( \ } \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE) BENCHMARK( \
BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\
##P##_##M##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ #define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
......
...@@ -25,11 +25,11 @@ void SimpleValidTest() { ...@@ -25,11 +25,11 @@ void SimpleValidTest() {
"Filter", {2, 2, 2, 1}, {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f}); "Filter", {2, 2, 2, 1}, {1.0f, 2.0f, 2.0f, 4.0f, 3.0f, 6.0f, 4.0f, 8.0f});
net.AddInputFromArray<D, float>("Bias", {2}, {.1f, .2f}); net.AddInputFromArray<D, float>("Bias", {2}, {.1f, .2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); kernels::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -45,7 +45,7 @@ void SimpleValidTest() { ...@@ -45,7 +45,7 @@ void SimpleValidTest() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -129,11 +129,11 @@ void ComplexValidTest() { ...@@ -129,11 +129,11 @@ void ComplexValidTest() {
net.AddInputFromArray<D, float>("Bias", {6}, net.AddInputFromArray<D, float>("Bias", {6},
{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); kernels::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -149,7 +149,7 @@ void ComplexValidTest() { ...@@ -149,7 +149,7 @@ void ComplexValidTest() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -239,11 +239,11 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -239,11 +239,11 @@ void TestNxNS12(const index_t height, const index_t width) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::DW_CONV2D_FILTER); kernels::BufferType::DW_CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
.Input("InputImage") .Input("InputImage")
...@@ -259,7 +259,7 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -259,7 +259,7 @@ void TestNxNS12(const index_t height, const index_t width) {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput", ImageToBuffer<D, float>(&net, "OutputImage", "DeviceOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest")
......
...@@ -24,9 +24,9 @@ static void EltwiseBenchmark( ...@@ -24,9 +24,9 @@ static void EltwiseBenchmark(
net.AddRandomInput<D, T>("Input1", {n, h, w, c}); net.AddRandomInput<D, T>("Input1", {n, h, w, c});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, half>(net, "Input0", "InputImg0", BufferToImage<D, half>(&net, "Input0", "InputImg0",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(net, "Input1", "InputImg1", BufferToImage<D, half>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest") OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg0") .Input("InputImg0")
......
...@@ -38,9 +38,9 @@ void Simple(const kernels::EltwiseType type, ...@@ -38,9 +38,9 @@ void Simple(const kernels::EltwiseType type,
// Run // Run
net.RunOp(D); net.RunOp(D);
} else { } else {
BufferToImage<D, half>(net, "Input1", "InputImg1", BufferToImage<D, half>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(net, "Input2", "InputImg2", BufferToImage<D, half>(&net, "Input2", "InputImg2",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest") OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg1") .Input("InputImg1")
...@@ -53,7 +53,7 @@ void Simple(const kernels::EltwiseType type, ...@@ -53,7 +53,7 @@ void Simple(const kernels::EltwiseType type,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImg", "Output", ImageToBuffer<D, float>(&net, "OutputImg", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} }
...@@ -122,9 +122,9 @@ void RandomTest(const kernels::EltwiseType type, ...@@ -122,9 +122,9 @@ void RandomTest(const kernels::EltwiseType type,
// Run // Run
net.RunOp(); net.RunOp();
BufferToImage<D, T>(net, "Input1", "InputImg1", BufferToImage<D, T>(&net, "Input1", "InputImg1",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Input2", "InputImg2", BufferToImage<D, T>(&net, "Input2", "InputImg2",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Eltwise", "EltwiseTest") OpDefBuilder("Eltwise", "EltwiseTest")
.Input("InputImg1") .Input("InputImg1")
...@@ -138,7 +138,7 @@ void RandomTest(const kernels::EltwiseType type, ...@@ -138,7 +138,7 @@ void RandomTest(const kernels::EltwiseType type,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImg", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImg", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DT_FLOAT) { if (DataTypeToEnum<T>::value == DT_FLOAT) {
......
...@@ -39,11 +39,11 @@ void Simple() { ...@@ -39,11 +39,11 @@ void Simple() {
net.AddInputFromArray<D, float>("Offset", {1}, offset); net.AddInputFromArray<D, float>("Offset", {1}, offset);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Scale", "ScaleImage", BufferToImage<D, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<D, float>(net, "Offset", "OffsetImage", BufferToImage<D, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -56,7 +56,7 @@ void Simple() { ...@@ -56,7 +56,7 @@ void Simple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -204,11 +204,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { ...@@ -204,11 +204,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -222,7 +222,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { ...@@ -222,7 +222,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -258,11 +258,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -258,11 +258,11 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -277,7 +277,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -277,7 +277,7 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) {
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
net.Sync(); net.Sync();
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
...@@ -313,11 +313,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { ...@@ -313,11 +313,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, float>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, float>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, float>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -330,7 +330,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { ...@@ -330,7 +330,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) {
// Run on opencl // Run on opencl
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-2);
} }
...@@ -366,11 +366,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -366,11 +366,11 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, half>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, half>(net, "Scale", "ScaleImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Scale", "ScaleImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
BufferToImage<DeviceType::OPENCL, half>(net, "Offset", "OffsetImage", BufferToImage<DeviceType::OPENCL, half>(&net, "Offset", "OffsetImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest")
...@@ -384,7 +384,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -384,7 +384,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) {
// Run on opencl // Run on opencl
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.5);
} }
......
...@@ -28,11 +28,11 @@ static void FCBenchmark( ...@@ -28,11 +28,11 @@ static void FCBenchmark(
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
const int width_size = height * width * channel; const int width_size = height * width * channel;
kernels::BufferType weight_type = kernels::BufferType::WEIGHT_WIDTH; kernels::BufferType weight_type = kernels::BufferType::WEIGHT_WIDTH;
BufferToImage<D, T>(net, "Weight", "WeightImage", BufferToImage<D, T>(&net, "Weight", "WeightImage",
weight_type); weight_type);
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest") OpDefBuilder("FC", "FullyConnectedTest")
......
...@@ -30,11 +30,11 @@ void Simple(const std::vector<index_t> &input_shape, ...@@ -30,11 +30,11 @@ void Simple(const std::vector<index_t> &input_shape,
net.AddInputFromArray<D, float>("Bias", bias_shape, bias_value); net.AddInputFromArray<D, float>("Bias", bias_shape, bias_value);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Weight", "WeightImage", BufferToImage<D, float>(&net, "Weight", "WeightImage",
kernels::BufferType::WEIGHT_HEIGHT); kernels::BufferType::WEIGHT_HEIGHT);
BufferToImage<D, float>(net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest") OpDefBuilder("FC", "FullyConnectedTest")
...@@ -48,7 +48,7 @@ void Simple(const std::vector<index_t> &input_shape, ...@@ -48,7 +48,7 @@ void Simple(const std::vector<index_t> &input_shape,
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("FC", "FullyConnectedTest") OpDefBuilder("FC", "FullyConnectedTest")
...@@ -139,11 +139,11 @@ void Complex(const index_t batch, ...@@ -139,11 +139,11 @@ void Complex(const index_t batch,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, T>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(net, "Weight", "WeightImage", BufferToImage<DeviceType::OPENCL, T>(&net, "Weight", "WeightImage",
kernels::BufferType::WEIGHT_HEIGHT); kernels::BufferType::WEIGHT_HEIGHT);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest") OpDefBuilder("FC", "FullyConnectedTest")
...@@ -158,7 +158,7 @@ void Complex(const index_t batch, ...@@ -158,7 +158,7 @@ void Complex(const index_t batch,
// Run on opencl // Run on opencl
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) { if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1);
...@@ -221,11 +221,11 @@ void TestWXFormat(const index_t batch, ...@@ -221,11 +221,11 @@ void TestWXFormat(const index_t batch,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, T>(net, "Input", "InputImage", BufferToImage<DeviceType::OPENCL, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<DeviceType::OPENCL, T>(net, "Weight", "WeightImage", BufferToImage<DeviceType::OPENCL, T>(&net, "Weight", "WeightImage",
kernels::BufferType::WEIGHT_WIDTH); kernels::BufferType::WEIGHT_WIDTH);
BufferToImage<DeviceType::OPENCL, float>(net, "Bias", "BiasImage", BufferToImage<DeviceType::OPENCL, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FC", "FullyConnectedTest") OpDefBuilder("FC", "FullyConnectedTest")
...@@ -239,7 +239,7 @@ void TestWXFormat(const index_t batch, ...@@ -239,7 +239,7 @@ void TestWXFormat(const index_t batch,
// Run on opencl // Run on opencl
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) { if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1);
......
...@@ -27,11 +27,11 @@ void TestNHWCSimple3x3VALID() { ...@@ -27,11 +27,11 @@ void TestNHWCSimple3x3VALID() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -47,7 +47,7 @@ void TestNHWCSimple3x3VALID() { ...@@ -47,7 +47,7 @@ void TestNHWCSimple3x3VALID() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -84,11 +84,11 @@ void TestNHWCSimple3x3SAME() { ...@@ -84,11 +84,11 @@ void TestNHWCSimple3x3SAME() {
net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f}); net.AddInputFromArray<D, T>("Bias", {1}, {-0.1f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
.Input("InputImage") .Input("InputImage")
...@@ -104,7 +104,7 @@ void TestNHWCSimple3x3SAME() { ...@@ -104,7 +104,7 @@ void TestNHWCSimple3x3SAME() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
...@@ -152,9 +152,9 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -152,9 +152,9 @@ void TestNHWCSimple3x3WithoutBias() {
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -169,7 +169,7 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -169,7 +169,7 @@ void TestNHWCSimple3x3WithoutBias() {
// Run // Run
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "Output", ImageToBuffer<D, T>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -221,11 +221,11 @@ void TestConv1x1() { ...@@ -221,11 +221,11 @@ void TestConv1x1() {
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}); net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, float>(net, "Filter", "FilterImage", BufferToImage<D, float>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, float>(net, "Bias", "BiasImage", BufferToImage<D, float>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -240,7 +240,7 @@ void TestConv1x1() { ...@@ -240,7 +240,7 @@ void TestConv1x1() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -311,11 +311,11 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -311,11 +311,11 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -331,7 +331,7 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -331,7 +331,7 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -374,13 +374,13 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -374,13 +374,13 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
std::vector<float> float_input_data; std::vector<float> float_input_data;
GenerateRandomRealTypeData({batch, height, width, input_channels}, GenerateRandomRealTypeData({batch, height, width, input_channels},
float_input_data); &float_input_data);
std::vector<float> float_filter_data; std::vector<float> float_filter_data;
GenerateRandomRealTypeData( GenerateRandomRealTypeData(
{kernel_h, kernel_w, output_channels, input_channels}, {kernel_h, kernel_w, output_channels, input_channels},
float_filter_data); &float_filter_data);
std::vector<float> float_bias_data; std::vector<float> float_bias_data;
GenerateRandomRealTypeData({output_channels}, float_bias_data); GenerateRandomRealTypeData({output_channels}, &float_bias_data);
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Input", {batch, height, width, input_channels}, float_input_data); "Input", {batch, height, width, input_channels}, float_input_data);
...@@ -396,11 +396,11 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -396,11 +396,11 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(net, "Filter", "FilterImage", BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage", BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -416,7 +416,7 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) { ...@@ -416,7 +416,7 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.2); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.2);
...@@ -474,11 +474,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, ...@@ -474,11 +474,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -494,7 +494,7 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape, ...@@ -494,7 +494,7 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -553,11 +553,11 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape, ...@@ -553,11 +553,11 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", BufferToImage<D, T>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -573,7 +573,7 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape, ...@@ -573,7 +573,7 @@ static void TestAtrousConvNxN(const std::vector<index_t> &shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
}; };
...@@ -640,11 +640,11 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape, ...@@ -640,11 +640,11 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// run on gpu // run on gpu
BufferToImage<D, half>(net, "Input", "InputImage", BufferToImage<D, half>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, half>(net, "Filter", "FilterImage", BufferToImage<D, half>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, half>(net, "Bias", "BiasImage", BufferToImage<D, half>(&net, "Bias", "BiasImage",
kernels::BufferType::ARGUMENT); kernels::BufferType::ARGUMENT);
OpDefBuilder("FusedConv2D", "FusedConv2dTest") OpDefBuilder("FusedConv2D", "FusedConv2dTest")
...@@ -660,7 +660,7 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape, ...@@ -660,7 +660,7 @@ static void TestGeneralHalfAtrousConv(const std::vector<index_t> &image_shape,
// Run on device // Run on device
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.7); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 0.7);
}; };
......
...@@ -24,8 +24,9 @@ static void MatMulBenchmark( ...@@ -24,8 +24,9 @@ static void MatMulBenchmark(
net.AddRandomInput<D, float>("B", {batch, channels, out_width, 1}); net.AddRandomInput<D, float>("B", {batch, channels, out_width, 1});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "A", "AImage", kernels::BufferType::IN_OUT_WIDTH); BufferToImage<D, T>(&net, "A", "AImage", kernels::BufferType::IN_OUT_WIDTH);
BufferToImage<D, T>(net, "B", "BImage", kernels::BufferType::IN_OUT_HEIGHT); BufferToImage<D, T>(&net, "B", "BImage",
kernels::BufferType::IN_OUT_HEIGHT);
OpDefBuilder("MatMul", "MatMulBM") OpDefBuilder("MatMul", "MatMulBM")
.Input("AImage") .Input("AImage")
......
...@@ -27,9 +27,9 @@ void Simple(const std::vector<index_t> &A_shape, ...@@ -27,9 +27,9 @@ void Simple(const std::vector<index_t> &A_shape,
net.AddInputFromArray<D, float>("B", B_shape, B_value); net.AddInputFromArray<D, float>("B", B_shape, B_value);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "A", "AImage", BufferToImage<D, float>(&net, "A", "AImage",
kernels::BufferType::IN_OUT_WIDTH); kernels::BufferType::IN_OUT_WIDTH);
BufferToImage<D, float>(net, "B", "BImage", BufferToImage<D, float>(&net, "B", "BImage",
kernels::BufferType::IN_OUT_HEIGHT); kernels::BufferType::IN_OUT_HEIGHT);
OpDefBuilder("MatMul", "MatMulTest") OpDefBuilder("MatMul", "MatMulTest")
...@@ -41,7 +41,7 @@ void Simple(const std::vector<index_t> &A_shape, ...@@ -41,7 +41,7 @@ void Simple(const std::vector<index_t> &A_shape,
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_HEIGHT); kernels::BufferType::IN_OUT_HEIGHT);
} else { } else {
OpDefBuilder("MatMul", "MatMulTest") OpDefBuilder("MatMul", "MatMulTest")
...@@ -127,9 +127,9 @@ void Complex(const index_t batch, ...@@ -127,9 +127,9 @@ void Complex(const index_t batch,
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
// Run on opencl // Run on opencl
BufferToImage<DeviceType::OPENCL, T>(net, "A", "AImage", BufferToImage<DeviceType::OPENCL, T>(&net, "A", "AImage",
kernels::BufferType::IN_OUT_WIDTH); kernels::BufferType::IN_OUT_WIDTH);
BufferToImage<DeviceType::OPENCL, T>(net, "B", "BImage", BufferToImage<DeviceType::OPENCL, T>(&net, "B", "BImage",
kernels::BufferType::IN_OUT_HEIGHT); kernels::BufferType::IN_OUT_HEIGHT);
OpDefBuilder("MatMul", "MatMulTest") OpDefBuilder("MatMul", "MatMulTest")
...@@ -142,7 +142,7 @@ void Complex(const index_t batch, ...@@ -142,7 +142,7 @@ void Complex(const index_t batch,
// Run on opencl // Run on opencl
net.RunOp(DeviceType::OPENCL); net.RunOp(DeviceType::OPENCL);
ImageToBuffer<DeviceType::OPENCL, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<DeviceType::OPENCL, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_HEIGHT); kernels::BufferType::IN_OUT_HEIGHT);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) { if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-1); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-1);
......
...@@ -5,11 +5,13 @@ ...@@ -5,11 +5,13 @@
#ifndef MACE_OPS_OPS_TEST_UTIL_H_ #ifndef MACE_OPS_OPS_TEST_UTIL_H_
#define MACE_OPS_OPS_TEST_UTIL_H_ #define MACE_OPS_OPS_TEST_UTIL_H_
#include <type_traits>
#include <limits>
#include <functional> #include <functional>
#include <vector> #include <limits>
#include <memory>
#include <string> #include <string>
#include <type_traits>
#include <utility>
#include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "mace/core/net.h" #include "mace/core/net.h"
...@@ -210,37 +212,41 @@ class OpsTestBase : public ::testing::Test { ...@@ -210,37 +212,41 @@ class OpsTestBase : public ::testing::Test {
template <typename T> template <typename T>
void GenerateRandomRealTypeData(const std::vector<index_t> &shape, void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
std::vector<T> &res) { std::vector<T> *res) {
MACE_CHECK_NOTNULL(res);
std::random_device rd; std::random_device rd;
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1); std::normal_distribution<float> nd(0, 1);
index_t size = std::accumulate(shape.begin(), shape.end(), 1, index_t size = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<index_t>()); std::multiplies<index_t>());
res.resize(size); res->resize(size);
if (DataTypeToEnum<T>::value == DT_HALF) { if (DataTypeToEnum<T>::value == DT_HALF) {
std::generate(res.begin(), res.end(), std::generate(res->begin(), res->end(),
[&gen, &nd] { return half_float::half_cast<half>(nd(gen)); }); [&gen, &nd] { return half_float::half_cast<half>(nd(gen)); });
} else { } else {
std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); std::generate(res->begin(), res->end(), [&gen, &nd] { return nd(gen); });
} }
} }
template <typename T> template <typename T>
void GenerateRandomIntTypeData(const std::vector<index_t> &shape, void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
std::vector<T> &res, std::vector<T> *res,
const T a = 0, const T a = 0,
const T b = std::numeric_limits<T>::max()) { const T b = std::numeric_limits<T>::max()) {
MACE_CHECK_NOTNULL(res);
std::random_device rd; std::random_device rd;
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::uniform_int_distribution<> nd(a, b); std::uniform_int_distribution<> nd(a, b);
index_t size = std::accumulate(shape.begin(), shape.end(), 1, index_t size = std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<index_t>()); std::multiplies<index_t>());
res.resize(size); res->resize(size);
std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); std::generate(res->begin(), res->end(), [&gen, &nd] { return nd(gen); });
} }
template <typename T> template <typename T>
...@@ -383,39 +389,43 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { ...@@ -383,39 +389,43 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void BufferToImage(OpsTestNet &net, void BufferToImage(OpsTestNet *net,
const std::string &input_name, const std::string &input_name,
const std::string &output_name, const std::string &output_name,
const kernels::BufferType type) { const kernels::BufferType type) {
MACE_CHECK_NOTNULL(net);
OpDefBuilder("BufferToImage", "BufferToImageTest") OpDefBuilder("BufferToImage", "BufferToImageTest")
.Input(input_name) .Input(input_name)
.Output(output_name) .Output(output_name)
.AddIntArg("buffer_type", type) .AddIntArg("buffer_type", type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net->NewOperatorDef());
// Run // Run
net.RunOp(D); net->RunOp(D);
net.Sync(); net->Sync();
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void ImageToBuffer(OpsTestNet &net, void ImageToBuffer(OpsTestNet *net,
const std::string &input_name, const std::string &input_name,
const std::string &output_name, const std::string &output_name,
const kernels::BufferType type) { const kernels::BufferType type) {
MACE_CHECK_NOTNULL(net);
OpDefBuilder("ImageToBuffer", "ImageToBufferTest") OpDefBuilder("ImageToBuffer", "ImageToBufferTest")
.Input(input_name) .Input(input_name)
.Output(output_name) .Output(output_name)
.AddIntArg("buffer_type", type) .AddIntArg("buffer_type", type)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net->NewOperatorDef());
// Run // Run
net.RunOp(D); net->RunOp(D);
net.Sync(); net->Sync();
} }
} // namespace test } // namespace test
......
...@@ -50,18 +50,20 @@ static void Pooling(int iters, ...@@ -50,18 +50,20 @@ static void Pooling(int iters,
} }
} }
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \ #define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void \ static void \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \ BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\
int iters) { \ ##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ int iters) { \
mace::testing::MaccProcessed(tot); \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(float))); \ mace::testing::MaccProcessed(tot); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \ mace::testing::BytesProcessed(tot *(sizeof(float))); \
PoolingType::PO); \ Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
} \ PoolingType::PO); \
BENCHMARK( \ } \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE) BENCHMARK( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\
##DEVICE)
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \ #define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU);
......
...@@ -135,7 +135,7 @@ static void SimpleMaxPooling3S2() { ...@@ -135,7 +135,7 @@ static void SimpleMaxPooling3S2() {
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
...@@ -147,7 +147,7 @@ static void SimpleMaxPooling3S2() { ...@@ -147,7 +147,7 @@ static void SimpleMaxPooling3S2() {
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
// Run // Run
...@@ -200,7 +200,7 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape, ...@@ -200,7 +200,7 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
...@@ -213,7 +213,7 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape, ...@@ -213,7 +213,7 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<T>(expected, *net.GetOutput("OPENCLOutput"), 0.001); ExpectTensorNear<T>(expected, *net.GetOutput("OPENCLOutput"), 0.001);
...@@ -285,7 +285,7 @@ static void SimpleAvgPoolingTest() { ...@@ -285,7 +285,7 @@ static void SimpleAvgPoolingTest() {
"Input", {1, 2, 8, 1}, "Input", {1, 2, 8, 1},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
...@@ -298,7 +298,7 @@ static void SimpleAvgPoolingTest() { ...@@ -298,7 +298,7 @@ static void SimpleAvgPoolingTest() {
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
// Check // Check
...@@ -336,7 +336,7 @@ static void AvgPoolingTest(const std::vector<index_t> &shape, ...@@ -336,7 +336,7 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("InputImage") .Input("InputImage")
...@@ -349,7 +349,7 @@ static void AvgPoolingTest(const std::vector<index_t> &shape, ...@@ -349,7 +349,7 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, T>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float, T>(expected, *net.GetOutput("OPENCLOutput"), 0.01); ExpectTensorNear<float, T>(expected, *net.GetOutput("OPENCLOutput"), 0.01);
......
...@@ -29,7 +29,7 @@ static void ResizeBilinearBenchmark(int iters, ...@@ -29,7 +29,7 @@ static void ResizeBilinearBenchmark(int iters,
net.AddInputFromArray<D, index_t>("OutSize", {2}, net.AddInputFromArray<D, index_t>("OutSize", {2},
{output_height, output_width}); {output_height, output_width});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark")
.Input("InputImage") .Input("InputImage")
...@@ -60,18 +60,20 @@ static void ResizeBilinearBenchmark(int iters, ...@@ -60,18 +60,20 @@ static void ResizeBilinearBenchmark(int iters,
net.Sync(); net.Sync();
} }
#define BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, DEVICE) \ #define BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, DEVICE) \
static void \ static void \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE( \ BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
int iters) { \ ##DEVICE( \
const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \ const int64_t macc = static_cast<int64_t>(iters) * N * C * H1 * W1 * 3; \
mace::testing::MaccProcessed(macc); \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H0 * W0; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::MaccProcessed(macc); \
ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \ ResizeBilinearBenchmark<DEVICE, TYPE>(iters, N, C, H0, W0, H1, W1); \
BENCHMARK( \ } \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE) BENCHMARK( \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE)
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ #define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
......
...@@ -94,7 +94,7 @@ void TestRandomResizeBilinear() { ...@@ -94,7 +94,7 @@ void TestRandomResizeBilinear() {
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") OpDefBuilder("ResizeBilinear", "ResizeBilinearTest")
...@@ -106,7 +106,7 @@ void TestRandomResizeBilinear() { ...@@ -106,7 +106,7 @@ void TestRandomResizeBilinear() {
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "DeviceOutput", ImageToBuffer<D, float>(&net, "OutputImage", "DeviceOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
// TODO(someone): support NEON // TODO(someone): support NEON
......
...@@ -20,10 +20,12 @@ class SliceOp : public Operator<D, T> { ...@@ -20,10 +20,12 @@ class SliceOp : public Operator<D, T> {
: Operator<D, T>(op_def, ws) {} : Operator<D, T>(op_def, ws) {}
bool Run(StatsFuture *future) override { bool Run(StatsFuture *future) override {
MACE_CHECK(this->OutputSize() >= 2) << "There must be at least two outputs for slicing"; MACE_CHECK(this->OutputSize() >= 2)
<< "There must be at least two outputs for slicing";
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
const std::vector<Tensor *> output_list = this->Outputs(); const std::vector<Tensor *> output_list = this->Outputs();
MACE_CHECK((input->dim(3) % this->OutputSize()) == 0) << "Outputs do not split input equally."; MACE_CHECK((input->dim(3) % this->OutputSize()) == 0)
<< "Outputs do not split input equally.";
functor_(input, output_list, future); functor_(input, output_list, future);
return true; return true;
......
...@@ -19,13 +19,16 @@ static void BMSliceHelper(int iters, ...@@ -19,13 +19,16 @@ static void BMSliceHelper(int iters,
// Construct graph // Construct graph
OpsTestNet net; OpsTestNet net;
const index_t input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<index_t>()); const index_t input_size = std::accumulate(input_shape.begin(),
input_shape.end(),
1,
std::multiplies<index_t>());
std::vector<float> input_data(input_size); std::vector<float> input_data(input_size);
GenerateRandomRealTypeData(input_shape, input_data); GenerateRandomRealTypeData(input_shape, &input_data);
net.AddInputFromArray<D, float>("Input", input_shape, input_data); net.AddInputFromArray<D, float>("Input", input_shape, input_data);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
auto builder = OpDefBuilder("Slice", "SliceTest"); auto builder = OpDefBuilder("Slice", "SliceTest");
...@@ -58,14 +61,15 @@ static void BMSliceHelper(int iters, ...@@ -58,14 +61,15 @@ static void BMSliceHelper(int iters,
} }
} }
#define BM_SLICE_MACRO(N, H, W, C, NO, TYPE, DEVICE) \ #define BM_SLICE_MACRO(N, H, W, C, NO, TYPE, DEVICE) \
static void BM_SLICE_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE(int iters) { \ static void \
const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \ BM_SLICE_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE(int iters) { \
mace::testing::MaccProcessed(tot); \ const int64_t tot = static_cast<int64_t>(iters) * N * H * W * C; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::MaccProcessed(tot); \
BMSliceHelper<DEVICE, TYPE>(iters, {N, H, W, C}, NO); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \ BMSliceHelper<DEVICE, TYPE>(iters, {N, H, W, C}, NO); \
BENCHMARK(BM_SLICE_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE) } \
BENCHMARK(BM_SLICE_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE)
#define BM_SLICE(N, H, W, C, NO) \ #define BM_SLICE(N, H, W, C, NO) \
BM_SLICE_MACRO(N, H, W, C, NO, float, CPU); \ BM_SLICE_MACRO(N, H, W, C, NO, float, CPU); \
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
#include <functional> #include <functional>
#include <vector>
#include "gmock/gmock.h" #include "gmock/gmock.h"
#include "mace/ops/slice.h" #include "mace/ops/slice.h"
...@@ -27,13 +28,16 @@ void RandomTest(const int num_outputs) { ...@@ -27,13 +28,16 @@ void RandomTest(const int num_outputs) {
OpsTestNet net; OpsTestNet net;
std::vector<index_t> input_shape({batch, height, width, input_channels}); std::vector<index_t> input_shape({batch, height, width, input_channels});
const index_t input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<index_t>()); const index_t input_size = std::accumulate(input_shape.begin(),
input_shape.end(),
1,
std::multiplies<index_t>());
std::vector<float> input_data(input_size); std::vector<float> input_data(input_size);
GenerateRandomRealTypeData(input_shape, input_data); GenerateRandomRealTypeData(input_shape, &input_data);
net.AddInputFromArray<D, float>("Input", input_shape, input_data); net.AddInputFromArray<D, float>("Input", input_shape, input_data);
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
auto builder = OpDefBuilder("Slice", "SliceTest"); auto builder = OpDefBuilder("Slice", "SliceTest");
...@@ -58,15 +62,19 @@ void RandomTest(const int num_outputs) { ...@@ -58,15 +62,19 @@ void RandomTest(const int num_outputs) {
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
for (int i = 0; i < num_outputs; ++i) { for (int i = 0; i < num_outputs; ++i) {
ImageToBuffer<D, float>(net, MakeString("OutputImage", i), MakeString("Output", i), ImageToBuffer<D, float>(&net,
MakeString("OutputImage", i),
MakeString("Output", i),
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} }
} }
// Check // Check
std::vector<index_t> expected_shape({batch, height, width, output_channels}); std::vector<index_t> expected_shape({batch, height, width, output_channels});
const index_t outer_size = std::accumulate(expected_shape.begin(), expected_shape.end() - 1, const index_t outer_size = std::accumulate(expected_shape.begin(),
1, std::multiplies<index_t>()); expected_shape.end() - 1,
1,
std::multiplies<index_t>());
const float *input_ptr = input_data.data(); const float *input_ptr = input_data.data();
const float *output_ptr; const float *output_ptr;
for (int i = 0; i < num_outputs; ++i) { for (int i = 0; i < num_outputs; ++i) {
...@@ -77,7 +85,8 @@ void RandomTest(const int num_outputs) { ...@@ -77,7 +85,8 @@ void RandomTest(const int num_outputs) {
for (int outer_idx = 0; outer_idx < outer_size; ++outer_idx) { for (int outer_idx = 0; outer_idx < outer_size; ++outer_idx) {
const int idx = outer_idx * input_channels + i * output_channels; const int idx = outer_idx * input_channels + i * output_channels;
for (int j = 0; j < output_channels; ++j) { for (int j = 0; j < output_channels; ++j) {
ASSERT_NEAR(*output_ptr++, input_ptr[idx + j], 1e-2) << "with output " << i << " index " << idx + j; ASSERT_NEAR(*output_ptr++, input_ptr[idx + j], 1e-2) << "with output "
<< i << " index " << idx + j;
} }
} }
} }
......
...@@ -23,7 +23,7 @@ static void SoftmaxBenchmark( ...@@ -23,7 +23,7 @@ static void SoftmaxBenchmark(
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Softmax", "SoftmaxBM") OpDefBuilder("Softmax", "SoftmaxBM")
......
...@@ -20,7 +20,7 @@ void Simple() { ...@@ -20,7 +20,7 @@ void Simple() {
{1, 1, 1, 1, 1, 2, 3, 4}); {1, 1, 1, 1, 1, 2, 3, 4});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Softmax", "SoftmaxTest") OpDefBuilder("Softmax", "SoftmaxTest")
...@@ -32,7 +32,7 @@ void Simple() { ...@@ -32,7 +32,7 @@ void Simple() {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
} else { } else {
OpDefBuilder("Softmax", "SoftmaxTest") OpDefBuilder("Softmax", "SoftmaxTest")
...@@ -71,7 +71,7 @@ void Complex(const std::vector<index_t> &logits_shape) { ...@@ -71,7 +71,7 @@ void Complex(const std::vector<index_t> &logits_shape) {
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("Output")); expected.Copy(*net.GetOutput("Output"));
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Softmax", "SoftmaxTest") OpDefBuilder("Softmax", "SoftmaxTest")
...@@ -83,7 +83,7 @@ void Complex(const std::vector<index_t> &logits_shape) { ...@@ -83,7 +83,7 @@ void Complex(const std::vector<index_t> &logits_shape) {
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, float>(net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, float>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5); ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), 1e-5);
......
...@@ -18,7 +18,7 @@ static void BMSpaceToBatch( ...@@ -18,7 +18,7 @@ static void BMSpaceToBatch(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage") .Input("InputImage")
......
...@@ -20,7 +20,7 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape, ...@@ -20,7 +20,7 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
OpsTestNet net; OpsTestNet net;
net.AddInputFromArray<D, float>("Input", input_shape, input_data); net.AddInputFromArray<D, float>("Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest")
.Input("InputImage") .Input("InputImage")
...@@ -32,7 +32,7 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape, ...@@ -32,7 +32,7 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
// Check // Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
...@@ -48,7 +48,7 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape, ...@@ -48,7 +48,7 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", input_shape, input_data); net.AddInputFromArray<D, float>("Input", input_shape, input_data);
BufferToImage<D, float>(net, "Input", "InputImage", BufferToImage<D, float>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest")
.Input("InputImage") .Input("InputImage")
...@@ -60,7 +60,7 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape, ...@@ -60,7 +60,7 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Run // Run
net.RunOp(D); net.RunOp(D);
ImageToBuffer<D, float>(net, "OutputImage", "Output", ImageToBuffer<D, float>(&net, "OutputImage", "Output",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
// Check // Check
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-8);
......
...@@ -16,8 +16,9 @@ class WinogradConvlutionTest : public OpsTestBase {}; ...@@ -16,8 +16,9 @@ class WinogradConvlutionTest : public OpsTestBase {};
void TransposeFilter(const std::vector<float> &input, void TransposeFilter(const std::vector<float> &input,
const std::vector<index_t> &input_shape, const std::vector<index_t> &input_shape,
std::vector<float> &output) { std::vector<float> *output) {
output.resize(input.size()); MACE_CHECK_NOTNULL(output);
output->resize(input.size());
const float *input_ptr = input.data(); const float *input_ptr = input.data();
for (index_t h = 0; h < input_shape[0]; ++h) { for (index_t h = 0; h < input_shape[0]; ++h) {
...@@ -27,7 +28,7 @@ void TransposeFilter(const std::vector<float> &input, ...@@ -27,7 +28,7 @@ void TransposeFilter(const std::vector<float> &input,
int offset = ((oc * input_shape[3] + ic) * input_shape[0] + h) * int offset = ((oc * input_shape[3] + ic) * input_shape[0] + h) *
input_shape[1] + input_shape[1] +
w; w;
output[offset] = *input_ptr; (*output)[offset] = *input_ptr;
++input_ptr; ++input_ptr;
} }
} }
...@@ -49,16 +50,16 @@ void WinogradConvolution(const index_t batch, ...@@ -49,16 +50,16 @@ void WinogradConvolution(const index_t batch,
// Add input data // Add input data
std::vector<float> filter_data; std::vector<float> filter_data;
std::vector<index_t> filter_shape = {3, 3, out_channels, in_channels}; std::vector<index_t> filter_shape = {3, 3, out_channels, in_channels};
GenerateRandomRealTypeData<float>(filter_shape, filter_data); GenerateRandomRealTypeData<float>(filter_shape, &filter_data);
net.AddRandomInput<D, float>("Input", {batch, height, width, in_channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, in_channels});
net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data); net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data);
net.AddRandomInput<D, T>("Bias", {out_channels}); net.AddRandomInput<D, T>("Bias", {out_channels});
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -72,7 +73,7 @@ void WinogradConvolution(const index_t batch, ...@@ -72,7 +73,7 @@ void WinogradConvolution(const index_t batch,
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "ConvOutput", ImageToBuffer<D, T>(&net, "OutputImage", "ConvOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("ConvOutput")); expected.Copy(*net.GetOutput("ConvOutput"));
...@@ -81,10 +82,10 @@ void WinogradConvolution(const index_t batch, ...@@ -81,10 +82,10 @@ void WinogradConvolution(const index_t batch,
// Winograd convolution // Winograd convolution
// transform filter // transform filter
std::vector<float> wino_filter_data; std::vector<float> wino_filter_data;
TransposeFilter(filter_data, filter_shape, wino_filter_data); TransposeFilter(filter_data, filter_shape, &wino_filter_data);
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data); "WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data);
BufferToImage<D, T>(net, "WinoFilterData", "WinoFilter", BufferToImage<D, T>(&net, "WinoFilterData", "WinoFilter",
kernels::BufferType::WINOGRAD_FILTER); kernels::BufferType::WINOGRAD_FILTER);
// transform input // transform input
...@@ -122,7 +123,7 @@ void WinogradConvolution(const index_t batch, ...@@ -122,7 +123,7 @@ void WinogradConvolution(const index_t batch,
net.RunOp(D); net.RunOp(D);
net.Sync(); net.Sync();
ImageToBuffer<D, float>(net, "WinoOutputImage", "WinoOutput", ImageToBuffer<D, float>(&net, "WinoOutputImage", "WinoOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) { if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("WinoOutput"), 1e-1); ExpectTensorNear<float>(expected, *net.GetOutput("WinoOutput"), 1e-1);
...@@ -166,16 +167,16 @@ void WinogradConvolutionWithPad(const index_t batch, ...@@ -166,16 +167,16 @@ void WinogradConvolutionWithPad(const index_t batch,
// Add input data // Add input data
std::vector<float> filter_data; std::vector<float> filter_data;
std::vector<index_t> filter_shape = {3, 3, out_channels, in_channels}; std::vector<index_t> filter_shape = {3, 3, out_channels, in_channels};
GenerateRandomRealTypeData<float>(filter_shape, filter_data); GenerateRandomRealTypeData<float>(filter_shape, &filter_data);
net.AddRandomInput<D, float>("Input", {batch, height, width, in_channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, in_channels});
net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data); net.AddInputFromArray<D, float>("Filter", filter_shape, filter_data);
net.AddRandomInput<D, T>("Bias", {out_channels}); net.AddRandomInput<D, T>("Bias", {out_channels});
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
BufferToImage<D, T>(net, "Filter", "FilterImage", BufferToImage<D, T>(&net, "Filter", "FilterImage",
kernels::BufferType::CONV2D_FILTER); kernels::BufferType::CONV2D_FILTER);
BufferToImage<D, T>(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); BufferToImage<D, T>(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputImage") .Input("InputImage")
.Input("FilterImage") .Input("FilterImage")
...@@ -189,7 +190,7 @@ void WinogradConvolutionWithPad(const index_t batch, ...@@ -189,7 +190,7 @@ void WinogradConvolutionWithPad(const index_t batch,
net.RunOp(D); net.RunOp(D);
// Transfer output // Transfer output
ImageToBuffer<D, T>(net, "OutputImage", "ConvOutput", ImageToBuffer<D, T>(&net, "OutputImage", "ConvOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
Tensor expected; Tensor expected;
expected.Copy(*net.GetOutput("ConvOutput")); expected.Copy(*net.GetOutput("ConvOutput"));
...@@ -198,10 +199,10 @@ void WinogradConvolutionWithPad(const index_t batch, ...@@ -198,10 +199,10 @@ void WinogradConvolutionWithPad(const index_t batch,
// Winograd convolution // Winograd convolution
// transform filter // transform filter
std::vector<float> wino_filter_data; std::vector<float> wino_filter_data;
TransposeFilter(filter_data, filter_shape, wino_filter_data); TransposeFilter(filter_data, filter_shape, &wino_filter_data);
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data); "WinoFilterData", {out_channels, in_channels, 3, 3}, wino_filter_data);
BufferToImage<D, T>(net, "WinoFilterData", "WinoFilter", BufferToImage<D, T>(&net, "WinoFilterData", "WinoFilter",
kernels::BufferType::WINOGRAD_FILTER); kernels::BufferType::WINOGRAD_FILTER);
// transform input // transform input
...@@ -239,7 +240,7 @@ void WinogradConvolutionWithPad(const index_t batch, ...@@ -239,7 +240,7 @@ void WinogradConvolutionWithPad(const index_t batch,
net.RunOp(D); net.RunOp(D);
net.Sync(); net.Sync();
ImageToBuffer<D, float>(net, "WinoOutputImage", "WinoOutput", ImageToBuffer<D, float>(&net, "WinoOutputImage", "WinoOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
if (DataTypeToEnum<T>::value == DataType::DT_HALF) { if (DataTypeToEnum<T>::value == DataType::DT_HALF) {
ExpectTensorNear<float>(expected, *net.GetOutput("WinoOutput"), 1e-1); ExpectTensorNear<float>(expected, *net.GetOutput("WinoOutput"), 1e-1);
......
...@@ -18,7 +18,7 @@ static void BMWinogradTransform( ...@@ -18,7 +18,7 @@ static void BMWinogradTransform(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {batch, height, width, channels}); net.AddRandomInput<D, float>("Input", {batch, height, width, channels});
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("WinogradTransform", "WinogradTransformTest") OpDefBuilder("WinogradTransform", "WinogradTransformTest")
.Input("InputImage") .Input("InputImage")
...@@ -65,7 +65,7 @@ static void BMWinogradInverseTransform( ...@@ -65,7 +65,7 @@ static void BMWinogradInverseTransform(
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<D, float>("Input", {16, channels, p, 1}); net.AddRandomInput<D, float>("Input", {16, channels, p, 1});
BufferToImage<D, T>(net, "Input", "InputImage", BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_HEIGHT); kernels::BufferType::IN_OUT_HEIGHT);
OpDefBuilder("WinogradInverseTransform", "WinogradInverseTransformTest") OpDefBuilder("WinogradInverseTransform", "WinogradInverseTransformTest")
.Input("InputImage") .Input("InputImage")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册