diff --git a/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc old mode 100644 new mode 100755 index 6edd8c3e4de45ae1e1de58eb52b25f4fe5fddcc6..570da9a879fb61c79a37d6665716163c25c264ab --- a/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc +++ b/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc @@ -356,7 +356,7 @@ void GpuCpuMapMatmulV2ToMulPass::ApplyImpl(ir::Graph* graph) const { size_t x_rank = x_shape.size(); size_t y_rank = y_shape.size(); flag = flag && x_rank >= 2 && y_rank == 2; - + flag = flag && x_shape[x_rank - 1] == y_shape[0]; if (flag) { if (!IsCompat(subgraph, g)) { LOG(WARNING) << "GpuCpuMapMatmulV2ToMulPass in op compat failed."; diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index 4722a1fc9cc8a17ea3d18acedebb4c087687213e..610d6e1f48aadbc17edc41e329795457ac1ddabe 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -80,7 +80,7 @@ class TransposeOp : public framework::OperatorWithKernel { #ifdef PADDLE_WITH_MKLDNN // Here we need to match dims to paddle layout // as we are producing non-oneDNN result - if ((x_dims.size() >= 3) && + if (ctx->IsRunMKLDNNKernel() && (x_dims.size() >= 3) && (paddle::platform::MKLDNNDeviceContext::tls() .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC)) { auto dims = phi::vectorize(x_dims);