diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 95d7138381baec17d4969ef8b5287a1e70f2ac81..86926bec643ce2e8ed4d48d67e9f143c24b89cd6 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -44,87 +44,16 @@ namespace ir { GET_IR_NODE_FROM_SUBGRAPH(bn_saved_mean, bn_saved_mean, pattern_name); \ GET_IR_NODE_FROM_SUBGRAPH(bn_saved_variance, bn_saved_variance, pattern_name) -template -LoDTensor tensor_apply(const LoDTensor& vec, UnaryOperation f) { - LoDTensor vec_y; - vec_y.Resize(vec.dims()); - const float* x = vec.data(); - float* y = vec_y.mutable_data(platform::CPUPlace()); - for (int64_t i = 0; i < vec.numel(); i++) { - y[i] = f(x[i]); - } - return vec_y; -} - -void tensor_apply_inplace(LoDTensor* vec, float (*f)(float)) { - float* data = vec->mutable_data(platform::CPUPlace()); - for (int64_t i = 0; i < vec->numel(); i++) { - data[i] = f(data[i]); - } -} - -template -LoDTensor tensor_apply_eltwise(const LoDTensor& vec_a, const LoDTensor& vec_b, - BinaryOperation f) { - PADDLE_ENFORCE_EQ(vec_a.dims(), vec_b.dims()); - LoDTensor vec_y; - vec_y.Resize(vec_a.dims()); - const float* a = vec_a.data(); - const float* b = vec_b.data(); - float* y = vec_y.mutable_data(platform::CPUPlace()); - for (int64_t i = 0; i < vec_a.numel(); i++) { - y[i] = f(a[i], b[i]); - } - return vec_y; -} - -template -LoDTensor tensor_apply_eltwise_broadcast(const LoDTensor& vec_a, - const LoDTensor& vec_b, - BinaryOperation f) { - PADDLE_ENFORCE_EQ(vec_a.dims().size(), 2); - PADDLE_ENFORCE_EQ(vec_b.dims().size(), 2); - PADDLE_ENFORCE_EQ(vec_a.dims()[0], vec_b.dims()[0]); - PADDLE_ENFORCE_EQ(vec_b.dims()[1], 1); - LoDTensor vec_y; - vec_y.Resize(vec_a.dims()); - const float* a = vec_a.data(); - const float* b = vec_b.data(); - float* y = vec_y.mutable_data(platform::CPUPlace()); - size_t a_height = vec_a.dims()[0]; - size_t a_width = vec_a.dims()[1]; - for (size_t h = 0; h < a_height; h++) { - for (size_t w = 0; w < a_width; ++w) { - *(y++) = f(*(a++), b[h]); - } - } - return vec_y; -} - // reshape to two dimensions {A, B * C * ...} -void make_tensor_2d(LoDTensor* tensor_to_reshape) { - auto dims_count = tensor_to_reshape->dims().size(); +DDim make_dims_2d(DDim dims) { + auto dims_count = dims.size(); PADDLE_ENFORCE_GT(dims_count, 0); int size2 = 1; for (int i = 1; i < dims_count; i++) { - size2 *= tensor_to_reshape->dims()[i]; + size2 *= dims[i]; } - tensor_to_reshape->Resize(make_ddim({tensor_to_reshape->dims()[0], size2})); -} - -void recompute_conv_weights(LoDTensor* weights, LoDTensor* tmp) { - // remember the weights tensor shape {A, B, C, ...} - auto weights_shape = weights->dims(); - // reduce the weights to 2d {A, B * C * ...} - make_tensor_2d(weights); - // make tmp tensor 2d by adding 1 as second dim {A, 1} - make_tensor_2d(tmp); - - *weights = - tensor_apply_eltwise_broadcast(*weights, *tmp, std::multiplies()); - // reshape weights to the original dims {A, B, C, ...} - weights->Resize(weights_shape); + return make_ddim({dims[0], size2}); } void recompute_bias_and_weights(const Scope* scope, @@ -135,6 +64,13 @@ void recompute_bias_and_weights(const Scope* scope, const ir::Node& bn_variance, // LoDTensor* eltwise_y_in_tensor, // float epsilon) { + using EigenVectorArrayMap = + Eigen::Map>; + using ConstEigenVectorArrayMap = + Eigen::Map>; + using EigenMatrixArrayMap = Eigen::Map< + Eigen::Array>; + // Re-compute bias of conv2d from BN PADDLE_ENFORCE_EQ(eltwise_y_in_tensor->dims(), bn_bias_tensor.dims()); @@ -143,31 +79,38 @@ void recompute_bias_and_weights(const Scope* scope, scope->FindVar(bn_variance.Name())->GetMutable(); auto* mean_tensor = scope->FindVar(bn_mean.Name())->GetMutable(); - auto std_tensor = LoDTensor(); - std_tensor.Resize(bn_bias_tensor.dims()); - std_tensor = - tensor_apply(*variance_tensor, [&](float x) { return x + epsilon; }); + ConstEigenVectorArrayMap scale_array(scale_tensor->data(), + scale_tensor->numel(), 1); + EigenVectorArrayMap variance_array( + variance_tensor->mutable_data(platform::CPUPlace()), + variance_tensor->numel(), 1); + ConstEigenVectorArrayMap mean_array(mean_tensor->data(), + mean_tensor->numel(), 1); + ConstEigenVectorArrayMap bn_bias_array(bn_bias_tensor.data(), + bn_bias_tensor.numel(), 1); - using EigenVectorArrayMap = - Eigen::Map>; + // variance will not be used anymore, so make it std_array and then tmp_array + variance_array += epsilon; + variance_array = variance_array.sqrt(); + variance_array = scale_array / variance_array; + + EigenVectorArrayMap eltwise_y_in_array( + eltwise_y_in_tensor->mutable_data(platform::CPUPlace()), + eltwise_y_in_tensor->numel(), 1); - EigenVectorArrayMap std_vec( - std_tensor.mutable_data(platform::CPUPlace()), std_tensor.numel(), - 1); - std_vec = std_vec.sqrt(); - auto tmp_tensor = - tensor_apply_eltwise(*scale_tensor, std_tensor, std::divides()); - auto tensor_minus = tensor_apply_eltwise(*eltwise_y_in_tensor, *mean_tensor, - std::minus()); - auto tensor_mul = - tensor_apply_eltwise(tensor_minus, tmp_tensor, std::multiplies()); - *eltwise_y_in_tensor = - tensor_apply_eltwise(tensor_mul, bn_bias_tensor, std::plus()); + eltwise_y_in_array = + ((eltwise_y_in_array - mean_array) * variance_array) + bn_bias_array; // Re-compute weight of conv2d from BN - auto* current_param = - scope->FindVar(conv_weight->Name())->GetMutable(); - recompute_conv_weights(current_param, &tmp_tensor); + auto* weights = scope->FindVar(conv_weight->Name())->GetMutable(); + auto weights_shape = weights->dims(); + auto weights_shape_2d = make_dims_2d(weights_shape); + + EigenMatrixArrayMap weights_array_2d( + weights->mutable_data(platform::CPUPlace()), weights_shape_2d[0], + weights_shape_2d[1]); + + weights_array_2d.colwise() *= variance_array; } std::unique_ptr ConvBNFusePass::ApplyImpl(