diff --git a/src/framework/scope.cpp b/src/framework/scope.cpp index 026b1e6b72dcca081554152ea356204ba311b359..e60148f3c627aa96ed5aee4bdfd4a54d4b0cdc92 100644 --- a/src/framework/scope.cpp +++ b/src/framework/scope.cpp @@ -27,14 +27,20 @@ Scope &Scope::NewScope() const { return *kids_.back(); } +Variable *Scope::Var() { + auto *pvar = new Variable; + unnamed_vars_.push_back(pvar); + return pvar; +} + Variable *Scope::Var(const std::string &name) { auto *pvar = FindVarLocally(name); if (pvar != nullptr) { return pvar; } pvar = new Variable; - vars_[name] = pvar; - pvar->name_ = vars_.find(name)->first; + named_vars_[name] = pvar; + pvar->name_ = named_vars_.find(name)->first; return pvar; } @@ -47,7 +53,7 @@ Variable *Scope::FindVar(const std::string &name) const { } const Scope *Scope::FindScope(const Variable *var) const { - for (auto &name_var : vars_) { + for (auto &name_var : named_vars_) { if (name_var.second == var) { return this; } @@ -64,8 +70,8 @@ void Scope::DropKids() { std::vector Scope::LocalVarNames() const { std::vector known_vars; - known_vars.reserve(vars_.size()); - for (auto &name_var : vars_) { + known_vars.reserve(named_vars_.size()); + for (auto &name_var : named_vars_) { known_vars.emplace_back(name_var.first); } return known_vars; @@ -79,10 +85,10 @@ void Scope::DeleteScope(Scope *scope) const { void Scope::EraseVars(const std::vector &var_names) { std::set var_set(var_names.begin(), var_names.end()); - for (auto it = vars_.begin(); it != vars_.end();) { + for (auto it = named_vars_.begin(); it != named_vars_.end();) { if (var_set.find(it->first) != var_set.end()) { delete it->second; - it = vars_.erase(it); + it = named_vars_.erase(it); } else { ++it; } @@ -91,21 +97,21 @@ void Scope::EraseVars(const std::vector &var_names) { void Scope::Rename(const std::string &origin_name, const std::string &new_name) const { - auto origin_it = vars_.find(origin_name); - if (origin_it == vars_.end()) { + auto origin_it = named_vars_.find(origin_name); + if (origin_it == named_vars_.end()) { return; } - auto new_it = vars_.find(new_name); - if (new_it != vars_.end()) { + auto new_it = named_vars_.find(new_name); + if (new_it != named_vars_.end()) { return; } - vars_[new_name] = origin_it->second; - vars_.erase(origin_it); + named_vars_[new_name] = origin_it->second; + named_vars_.erase(origin_it); } Variable *Scope::FindVarLocally(const std::string &name) const { - auto it = vars_.find(name); - if (it != vars_.end()) { + auto it = named_vars_.find(name); + if (it != named_vars_.end()) { return it->second; } return nullptr; @@ -122,7 +128,7 @@ std::vector Scope::VarContain(const std::string substring, int temp = 9999; auto len0 = substring.length(); - for (auto pair : vars_) { + for (auto pair : named_vars_) { if (pair.first.find(substring) == 0) { v.push_back(pair.second); auto len1 = pair.first.length(); @@ -138,7 +144,7 @@ std::vector Scope::VarContain(const std::string substring, void Scope::print_vars() { DLOG << "====================start to print variables================="; - for (auto pair : vars_) { + for (auto pair : named_vars_) { DLOG << pair.first; } DLOG << "==================complete printing variables================"; diff --git a/src/framework/scope.h b/src/framework/scope.h index f1495761c938d3d73e17d1ee01b6a6512ff8e6a8..47642cc3f1bff018dea3cfeb3936ede5b74f1206 100644 --- a/src/framework/scope.h +++ b/src/framework/scope.h @@ -32,15 +32,17 @@ class Scope { Scope() = default; ~Scope() { - for (auto &var : vars_) { - DropKids(); + // clear named variables + for (auto &var : named_vars_) { delete var.second; } - vars_.clear(); - for (auto kid : kids_) { - delete kid; + named_vars_.clear(); + // clear unnamed variables + for (auto &var : unnamed_vars_) { + delete var; } - kids_.clear(); + unnamed_vars_.clear(); + DropKids(); #ifdef PADDLE_MOBILE_CL delete cl_scope_; @@ -49,12 +51,12 @@ class Scope { Scope &NewScope() const; + /// Create a variable without name if it doesn't exist. + Variable *Var(); + /// Create a variable with given name if it doesn't exist. Variable *Var(const std::string &name); - /// Create a variable with a scope-unique name. - Variable *Var(std::string *name = nullptr); - void EraseVars(const std::vector &var_names); /// Find a variable in the scope or any of its ancestors. Returns @@ -98,7 +100,8 @@ class Scope { // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const *parent) : parent_(parent) {} - mutable std::unordered_map vars_; + mutable std::unordered_map named_vars_; + mutable std::vector unnamed_vars_; mutable std::list kids_; Scope const *parent_{nullptr}; diff --git a/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp index a6bee4ee3f8514144cf17736defd8e1daa80d9e8..e55ebc4ace6c6a465bba7d2ee9e2d06d87dea347 100644 --- a/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp @@ -45,13 +45,13 @@ bool ConvAddBNReluKernel::Init( inv_std_ptr[i] = 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); } - // Tensor *new_scale = new Tensor(); - // Tensor *new_bias = new Tensor(); - auto *new_scale = param->CreateNewScale(); - auto *new_bias = param->CreateNewBiase(); - auto new_scale_ptr = new_scale->mutable_data({C}); - auto new_bias_ptr = new_bias->mutable_data({C}); + Variable *scale_var = param->GetScope()->Var(); + Variable *bias_var = param->GetScope()->Var(); + LoDTensor *new_scale = scale_var->GetMutable(); + LoDTensor *new_bias = bias_var->GetMutable(); + float *new_scale_ptr = new_scale->mutable_data({C}); + float *new_bias_ptr = new_bias->mutable_data({C}); for (int i = 0; i < C; i++) { new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] + (bias1_ptr[i] - mean_ptr[i]) * diff --git a/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp index 1bfbfccae49b185fdc221f7208f350b16719e353..9a1b9e199a89c4bc9fcd195d2069808e754d16fb 100644 --- a/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp @@ -43,10 +43,12 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); } - auto *new_scale = param->CreateNewScale(); - auto *new_bias = param->CreateNewBiase(); - auto new_scale_ptr = new_scale->mutable_data({C}); - auto new_bias_ptr = new_bias->mutable_data({C}); + Variable *scale_var = param->GetScope()->Var(); + Variable *bias_var = param->GetScope()->Var(); + LoDTensor *new_scale = scale_var->GetMutable(); + LoDTensor *new_bias = bias_var->GetMutable(); + float *new_scale_ptr = new_scale->mutable_data({C}); + float *new_bias_ptr = new_bias->mutable_data({C}); for (int i = 0; i < C; i++) { new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; diff --git a/src/operators/kernel/arm/convolution/conv_common.cpp b/src/operators/kernel/arm/convolution/conv_common.cpp index 3989dfe74ff2bc7edb7d0874009b365e7c0d6e81..7ae525be7efe1b23325e55c624a7db28506257fa 100644 --- a/src/operators/kernel/arm/convolution/conv_common.cpp +++ b/src/operators/kernel/arm/convolution/conv_common.cpp @@ -64,7 +64,9 @@ void InitBaseConvKernel(ConvParam *param) { ) { param->ExecMode() = ConvParam::EXEC_WINOGRAD3X3_FLOAT; // transform weight - param->transformed_filter_ = new framework::LoDTensor; + Variable *transformed_var = param->GetScope()->Var(); + param->transformed_filter_ = + transformed_var->GetMutable(); operators::math::winograd_transform_weight<8, 3>( *param->Filter(), param->transformed_filter_); } else if (conv3x3 && param->Groups() == 1 && diff --git a/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp b/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp index 2035ad4739e7b6cef9f60df6de5d6b5f0f2a2125..fa3a424a5e4ec5a253679e1dd9f6a2eb9797b20d 100644 --- a/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp @@ -42,10 +42,12 @@ bool DWConvBNReluKernel::Init(FusionDWConvBNReluParam *param) { inv_std_ptr[i] = 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); } - LoDTensor *new_scale = new LoDTensor(); - LoDTensor *new_bias = new LoDTensor(); - auto new_scale_ptr = new_scale->mutable_data({C}); - auto new_bias_ptr = new_bias->mutable_data({C}); + Variable *scale_var = param->GetScope()->Var(); + Variable *bias_var = param->GetScope()->Var(); + LoDTensor *new_scale = scale_var->GetMutable(); + LoDTensor *new_bias = bias_var->GetMutable(); + float *new_scale_ptr = new_scale->mutable_data({C}); + float *new_bias_ptr = new_bias->mutable_data({C}); for (int i = 0; i < C; i++) { new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; diff --git a/src/operators/math/gemm/gemm_kernel.h b/src/operators/math/gemm/gemm_kernel.h index a3c1eabf41fa7a325038824f7c518dd41a45b582..3742aa06b480458b67a3f39a38f1d1bbd4e36bdc 100644 --- a/src/operators/math/gemm/gemm_kernel.h +++ b/src/operators/math/gemm/gemm_kernel.h @@ -438,14 +438,14 @@ void sgemv_trans_mx1(const int M, const int N, const float alpha, #else const int tid = 0; #endif // _OPENMP - register float *thread_buf_c = buf_c + tid * M; - register const float *in0 = A + n * lda; - register const float *in1 = in0 + lda; - register const float *in2 = in1 + lda; - register const float *in3 = in2 + lda; - register float32x4_t _b = vld1q_f32(B + n); - register float32x4_t _sum0; - register int m = 0; + float *thread_buf_c = buf_c + tid * M; + const float *in0 = A + n * lda; + const float *in1 = in0 + lda; + const float *in2 = in1 + lda; + const float *in3 = in2 + lda; + float32x4_t _b = vld1q_f32(B + n); + float32x4_t _sum0; + int m = 0; for (; m < M - 3; m += 4) { float32x4_t _r0 = vld1q_f32(in0 + m); float32x4_t _r1 = vld1q_f32(in1 + m); @@ -495,11 +495,11 @@ void sgemv_trans_mx1(const int M, const int N, const float alpha, #else const int tid = 0; #endif // _OPENMP - register float *thread_buf_c = buf_c + tid * M; - register const float *in0 = A + n * lda; - register float32x4_t _b = vld1q_dup_f32(B + n); - register float32x4_t _sum0; - register int m = 0; + float *thread_buf_c = buf_c + tid * M; + const float *in0 = A + n * lda; + float32x4_t _b = vld1q_dup_f32(B + n); + float32x4_t _sum0; + int m = 0; for (; m < M - 3; m += 4) { float32x4_t _r0 = vld1q_f32(in0 + m); float32x4_t _vbuff_c = vld1q_f32(thread_buf_c + m); @@ -515,47 +515,39 @@ void sgemv_trans_mx1(const int M, const int N, const float alpha, // reduction operate for buf_c, sum to C and do left operations // y := alpha * A' * X + beta * y // reduction operate: sum multi-threadings result for over-all: A' * X - register float32x4_t _valpha = vdupq_n_f32(alpha); + float32x4_t _valpha = vdupq_n_f32(alpha); if (beta == 0.f) { #pragma omp parallel for for (int m = 0; m < M; m += 4) { - register float32x4_t _sum0 = vld1q_f32(buf_c + m); + float32x4_t _sum0 = vld1q_f32(buf_c + m); for (int tid = 1; tid < threads_num; ++tid) { _sum0 += vld1q_f32(buf_c + tid * M + m); } vst1q_f32(C + m, _sum0 * _valpha); } - #pragma omp parallel for + for (int m = (M & 0xfffffffc); m < M; ++m) { - register float _sum0 = *(buf_c + m); - for (register int tid = 1; tid < threads_num; ++tid) { + float _sum0 = *(buf_c + m); + for (int tid = 1; tid < threads_num; ++tid) { _sum0 += *(buf_c + tid * M + m); } C[m] = _sum0 * alpha; } } else { // beta != 0.f - register float32x4_t _vbeta = vdupq_n_f32(beta); + float32x4_t _vbeta = vdupq_n_f32(beta); #pragma omp parallel for for (int m = 0; m < M; m += 4) { - register float32x4_t _sum0 = vld1q_f32(buf_c + m); - for (register int tid = 1; tid < threads_num; ++tid) { + float32x4_t _sum0 = vld1q_f32(buf_c + m); + for (int tid = 1; tid < threads_num; ++tid) { _sum0 += vld1q_f32(buf_c + tid * M + m); } float32x4_t _vc = vld1q_f32(C + m); vst1q_f32(C + m, _sum0 * _valpha + _vbeta * _vc); } - #pragma omp parallel for - for (int m = (m & 0xfffffffc); m < M; ++m) { - register float _sum0 = *(buf_c + m); - for (register int tid = 1; tid < threads_num; ++tid) { - _sum0 += *(buf_c + tid * M + m); - } - C[m] = _sum0 * alpha + beta * C[m]; - } - #pragma omp parallel for - for (int m = (m & 0xfffffffc); m < M; ++m) { - register float _sum0 = *(buf_c + m); - for (register int tid = 1; tid < threads_num; ++tid) { + + for (int m = (M & 0xfffffffc); m < M; ++m) { + float _sum0 = *(buf_c + m); + for (int tid = 1; tid < threads_num; ++tid) { _sum0 += *(buf_c + tid * M + m); } C[m] = _sum0 * alpha + beta * C[m]; diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 8b1231dd244d3cde8d49e25e2eb0c1aabc68391a..c10f86f9870638ac4f1541fdf1a11c569e4e08a0 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -76,27 +76,11 @@ struct DtypeTensorTrait { class OpParam { public: OpParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, Scope *scope) { - scope_pointer_ = scope; - inputs_ = inputs; - } - - template - T *CreateNewScale() { - std::string scale_key = Getkey("Scale", inputs_, 0); - auto var = scope_pointer_->Var(scale_key + "_new"); - return var->GetMutable(); - } - - template - T *CreateNewBiase() { - std::string biase_key = Getkey("Bias", inputs_, 0); - auto var = scope_pointer_->Var(biase_key + "_new"); - return var->GetMutable(); - } + const AttributeMap &attrs, Scope *scope) + : scope_(scope) {} - VariableNameMap inputs_; - Scope *scope_pointer_ = nullptr; + Scope *GetScope() const { return scope_; } + Scope *scope_ = nullptr; #ifdef PADDLE_MOBILE_FPGA_KD zynqmp::Context &context() { return context_; }