diff --git a/mace/ops/infer_conv2d_shape.cc b/mace/ops/infer_conv2d_shape.cc index 50b9a9e13022f31d36874c4f1b3e435b84e25616..38f711f57ad824f146a4cd0abf306300b5122735 100644 --- a/mace/ops/infer_conv2d_shape.cc +++ b/mace/ops/infer_conv2d_shape.cc @@ -36,7 +36,8 @@ class InferConv2dShapeOp : public Operation { auto has_data_format = Operation::GetOptionalArg("has_data_format", 0); - const bool isNCHW = (has_data_format && D == DeviceType::CPU); + const bool isNCHW = (has_data_format && + input->data_format() == DataFormat::NCHW); Padding padding_type = static_cast(Operation::GetOptionalArg( diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 7462548a34ae4fb6fc37c3ac1a6db325021c9274..dec3bd50db64468a5ee566c2b8fa2d63b50f2231 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -129,7 +129,8 @@ OpTestContext::OpTestContext(int num_threads, device_map_[DeviceType::GPU] = make_unique( gpu_context_->opencl_tuner(), gpu_context_->opencl_cache_storage(), - GPUPriorityHint::PRIORITY_NORMAL); + GPUPriorityHint::PRIORITY_NORMAL, + GPUPerfHint::PERF_HIGH); } std::shared_ptr OpTestContext::gpu_context() const { diff --git a/mace/ops/shape.cc b/mace/ops/shape.cc index 1ee3ee02fce701edf62398dfce12c234b8df78c9..dcca202f3229f616a3ce89dddcd008cf998a1a69 100644 --- a/mace/ops/shape.cc +++ b/mace/ops/shape.cc @@ -37,8 +37,8 @@ class ShapeOp : public Operation { auto has_df = Operation::GetOptionalArg( "has_data_format", 0); - if (D == DeviceType::CPU && - has_df && input->dim_size() == 4) { + if (has_df && input->data_format() == DataFormat::NCHW && + input->dim_size() != 4) { // transpose NCHW to NHWC for cpu runtime output_data[0] = static_cast(input->dim(0)); output_data[1] = static_cast(input->dim(2));