提交 4fdb724c 编写于 作者: L liuqi

Minor fix.

上级 f8dfff0d
...@@ -54,8 +54,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i ...@@ -54,8 +54,8 @@ void WinogradTransformFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *i
kernel_.setArg(idx++, static_cast<uint32_t>(paddings[1] / 2)); kernel_.setArg(idx++, static_cast<uint32_t>(paddings[1] / 2));
} }
const uint32_t gws[2] = {static_cast<size_t>(out_width), const uint32_t gws[2] = {static_cast<uint32_t>(out_width),
static_cast<size_t>(RoundUpDiv4(input_tensor->dim(3)))}; static_cast<uint32_t>(RoundUpDiv4(input_tensor->dim(3)))};
const std::vector<uint32_t> lws = {128, 8, 1}; const std::vector<uint32_t> lws = {128, 8, 1};
std::stringstream ss; std::stringstream ss;
ss << "winograd_transform_kernel_" ss << "winograd_transform_kernel_"
...@@ -126,8 +126,8 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te ...@@ -126,8 +126,8 @@ void WinogradInverseTransformFunctor<DeviceType::OPENCL, T>::operator()(const Te
kernel_.setArg(idx++, prelu_alpha_); kernel_.setArg(idx++, prelu_alpha_);
} }
const uint32_t gws[2] = {static_cast<size_t>(input_tensor->dim(2)), const uint32_t gws[2] = {static_cast<uint32_t>(input_tensor->dim(2)),
static_cast<size_t>(RoundUpDiv4(input_tensor->dim(1)))}; static_cast<uint32_t>(RoundUpDiv4(input_tensor->dim(1)))};
const std::vector<uint32_t> lws = {128, 8, 1}; const std::vector<uint32_t> lws = {128, 8, 1};
std::stringstream ss; std::stringstream ss;
......
...@@ -20,6 +20,7 @@ struct ReshapeFunctor { ...@@ -20,6 +20,7 @@ struct ReshapeFunctor {
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
output->Resize(out_shape); output->Resize(out_shape);
// TODO copy on write to avoid this copy.
output->CopyBytes(input->raw_data(), input->size() * sizeof(T)); output->CopyBytes(input->raw_data(), input->size() * sizeof(T));
} }
}; };
......
...@@ -15,8 +15,8 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, ...@@ -15,8 +15,8 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
OpsTestNet net; OpsTestNet net;
// Add input data // Add input data
net.AddRandomInput<D, float>("Input0", {n, h, w, c}); net.AddRandomInput<D, T>("Input0", {n, h, w, c});
net.AddRandomInput<D, float>("Input1", {n, h, w, c}); net.AddRandomInput<D, T>("Input1", {n, h, w, c});
if (D == DeviceType::OPENCL) { if (D == DeviceType::OPENCL) {
BufferToImage<D, half>(net, "Input0", "InputImg0", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage<D, half>(net, "Input0", "InputImg0", kernels::BufferType::IN_OUT_CHANNEL);
...@@ -26,7 +26,7 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, ...@@ -26,7 +26,7 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
.Input("InputImg1") .Input("InputImg1")
.AddIntArg("type", static_cast<int>(type)) .AddIntArg("type", static_cast<int>(type))
.AddFloatsArg("coeff", {1.2, 2.1}) .AddFloatsArg("coeff", {1.2, 2.1})
.AddIntArg("T", static_cast<int>(DT_HALF)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Output("OutputImg") .Output("OutputImg")
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
...@@ -63,16 +63,17 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h, ...@@ -63,16 +63,17 @@ static void EltwiseBenchmark(int iters, kernels::EltwiseType type, int n, int h,
} \ } \
BENCHMARK(BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) BENCHMARK(BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#define BM_ELTWISE(ELT_TYPE, N, H, W, C, TYPE) \ #define BM_ELTWISE(ELT_TYPE, N, H, W, C, ) \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, CPU); \ BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, TYPE, OPENCL); BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, OPENCL); \
BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, OPENCL);
BM_ELTWISE(0, 1, 256, 256, 32, float); BM_ELTWISE(0, 1, 256, 256, 32);
BM_ELTWISE(0, 1, 128, 128, 32, float); BM_ELTWISE(0, 1, 128, 128, 32);
BM_ELTWISE(1, 1, 128, 128, 32, float); BM_ELTWISE(1, 1, 128, 128, 32);
BM_ELTWISE(2, 1, 128, 128, 32, float); BM_ELTWISE(2, 1, 128, 128, 32);
BM_ELTWISE(0, 1, 240, 240, 256, float); BM_ELTWISE(0, 1, 240, 240, 256);
BM_ELTWISE(1, 1, 240, 240, 256, float); BM_ELTWISE(1, 1, 240, 240, 256);
BM_ELTWISE(2, 1, 240, 240, 256, float); BM_ELTWISE(2, 1, 240, 240, 256);
} // namespace mace } // namespace mace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册