diff --git a/paddle/fluid/operators/math/cpu_lstm_compute.cc b/paddle/fluid/operators/math/cpu_lstm_compute.cc index f7c55c215bacdafc99da5fcd0b750a058dfed21c..58e6512021203664573a0478dade052f92dd70bb 100644 --- a/paddle/fluid/operators/math/cpu_lstm_compute.cc +++ b/paddle/fluid/operators/math/cpu_lstm_compute.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,76 +10,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/cpu_lstm_compute.h" -#include "paddle/fluid/operators/math/cpu_vec.h" -#include "paddle/fluid/platform/cpu_info.h" -#ifdef __AVX__ -#include -#endif namespace paddle { namespace operators { -namespace math { - -// TODO(TJ): ugly workaround, clean me -template -void lstm_compute_ctht(T* gates, const T* ct_1, T* ct, T* ht) { - // gates: W_ch, W_ih, W_fh, W_oh - vec_sigmoid(24, gates + 8, gates + 8); - vec_tanh(8, gates, gates); - const T *i = gates + 8, *f = gates + 16, *o = gates + 24; - const T min = SIGMOID_THRESHOLD_MIN; - const T max = SIGMOID_THRESHOLD_MAX; - for (int d = 0; d < 8; ++d) { - // C_t = C_t-1 * fgated + cand_gated * igated - ct[d] = ct_1[d] * f[d] + gates[d] * i[d]; - // H_t = act_cell(C_t) * ogated - T tmp = ct[d] * 2; - tmp = static_cast(0) - ((tmp < min) ? min : ((tmp > max) ? max : tmp)); - vec_exp(1, &tmp, &tmp); - tmp = static_cast(2) / (static_cast(1) + tmp) - static_cast(1); - ht[d] = tmp * o[d]; - } -} - -#ifdef __AVX__ -namespace detail { -namespace forward { -namespace avx { -__m256 Sigmoid(const __m256 a); -__m256 Tanh(const __m256 a); -} // namespace avx -} // namespace forward -} // namespace detail - -template <> -void lstm_compute_ctht(float* gates, const float* ct_1, float* ct, - float* ht) { - namespace act = detail::forward::avx; - // gates: W_ch, W_ih, W_fh, W_oh - __m256 c, i, f, o; - c = _mm256_loadu_ps(gates); - i = _mm256_loadu_ps(gates + 8); - f = _mm256_loadu_ps(gates + 16); - o = _mm256_loadu_ps(gates + 24); - - /* C_t = C_t-1 * fgated + cand_gated * igated*/ - c = _mm256_mul_ps(act::Tanh(c), act::Sigmoid(i)); - i = _mm256_loadu_ps(ct_1); - f = _mm256_mul_ps(i, act::Sigmoid(f)); - f = _mm256_add_ps(c, f); - _mm256_storeu_ps(ct, f); - - /* H_t = act_cell(C_t) * ogated */ - o = _mm256_mul_ps(act::Tanh(f), act::Sigmoid(o)); - _mm256_storeu_ps(ht, o); -} -#endif - -template void lstm_compute_ctht(float* gates, const float* ct_1, - float* ct, float* ht); -template void lstm_compute_ctht(double* gates, const double* ct_1, - double* ct, double* ht); - -} // namespace math +namespace math {} // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/cpu_lstm_compute.h b/paddle/fluid/operators/math/cpu_lstm_compute.h index 244164f08c4bb70833a9bfc884982a4225945bf0..28b6f71729edf1b8cc5d610d76af78dea213313e 100644 --- a/paddle/fluid/operators/math/cpu_lstm_compute.h +++ b/paddle/fluid/operators/math/cpu_lstm_compute.h @@ -1,11 +1,8 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -14,6 +11,11 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/platform/cpu_info.h" +#ifdef __AVX__ +#include +#endif namespace paddle { namespace operators { @@ -21,7 +23,58 @@ namespace math { // TODO(TJ): ugly workaround, clean me template -void lstm_compute_ctht(T* gates, const T* ct_1, T* ct, T* ht); +void lstm_compute_ctht(T* gates, const T* ct_1, T* ct, T* ht) { + // gates: W_ch, W_ih, W_fh, W_oh + vec_sigmoid(24, gates + 8, gates + 8); + vec_tanh(8, gates, gates); + const T *i = gates + 8, *f = gates + 16, *o = gates + 24; + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int d = 0; d < 8; ++d) { + // C_t = C_t-1 * fgated + cand_gated * igated + ct[d] = ct_1[d] * f[d] + gates[d] * i[d]; + // H_t = act_cell(C_t) * ogated + T tmp = ct[d] * 2; + tmp = static_cast(0) - ((tmp < min) ? min : ((tmp > max) ? max : tmp)); + vec_exp(1, &tmp, &tmp); + tmp = static_cast(2) / (static_cast(1) + tmp) - static_cast(1); + ht[d] = tmp * o[d]; + } +} + +#ifdef __AVX__ +namespace detail { +namespace forward { +namespace avx { +__m256 Sigmoid(const __m256 a); +__m256 Tanh(const __m256 a); +} // namespace avx +} // namespace forward +} // namespace detail + +template <> +void lstm_compute_ctht(float* gates, const float* ct_1, float* ct, + float* ht) { + namespace act = detail::forward::avx; + // gates: W_ch, W_ih, W_fh, W_oh + __m256 c, i, f, o; + c = _mm256_loadu_ps(gates); + i = _mm256_loadu_ps(gates + 8); + f = _mm256_loadu_ps(gates + 16); + o = _mm256_loadu_ps(gates + 24); + + /* C_t = C_t-1 * fgated + cand_gated * igated*/ + c = _mm256_mul_ps(act::Tanh(c), act::Sigmoid(i)); + i = _mm256_loadu_ps(ct_1); + f = _mm256_mul_ps(i, act::Sigmoid(f)); + f = _mm256_add_ps(c, f); + _mm256_storeu_ps(ct, f); + + /* H_t = act_cell(C_t) * ogated */ + o = _mm256_mul_ps(act::Tanh(f), act::Sigmoid(o)); + _mm256_storeu_ps(ht, o); +} +#endif } // namespace math } // namespace operators diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py index 08579c7dd62ea6aea87b053345211914a6be6237..82e704169e4b828549dab4c47b0fa46d9afd8f7e 100644 --- a/python/paddle/fluid/tests/unittests/test_desc_clone.py +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -109,15 +109,20 @@ def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): return t +from paddle.fluid.transpiler.details import op_to_code + + def operator_equal(a, b): + if op_to_code(a) != op_to_code(b): + raise ValueError("In operator_equal not equal\n") + for k, v in six.iteritems(a.__dict__): if isinstance(v, fluid.framework.Program) or \ isinstance(v, fluid.framework.Block): continue elif isinstance(v, core.OpDesc): - if v.serialize_to_string() != b.__dict__[k].serialize_to_string(): - raise ValueError("In operator_equal not equal:{0}\n".format(k)) + continue elif isinstance(v, collections.OrderedDict): v0 = sorted(list(six.iteritems(v)), key=lambda x: x[0]) diff --git a/python/paddle/fluid/transpiler/details/program_utils.py b/python/paddle/fluid/transpiler/details/program_utils.py index a83aa0f11eed9bfc1674d8d75dcfacc297f056b0..200175cfe87e24a53e1e229e41d1ff2a25fd66ec 100644 --- a/python/paddle/fluid/transpiler/details/program_utils.py +++ b/python/paddle/fluid/transpiler/details/program_utils.py @@ -113,27 +113,32 @@ def op_to_code(op): inputs_str += ", " inputs_str += "}" + attr_names = sorted(op.attr_names) attrs_str = "" - for i in range(0, len(op.attr_names)): - name = op.attr_names[i] + for i in range(0, len(attr_names)): + name = attr_names[i] attr_type = op.desc.attr_type(name) if attr_type == core.AttrType.BLOCK: a = "{name} = block[{value}]".format( name=name, type=attr_type, value=op.block_attr_id(name)) attrs_str += a + if i != len(attr_names) - 1: + attrs_str += ", " continue if attr_type == core.AttrType.BLOCKS: a = "{name} = blocks{value}".format( name=name, type=attr_type, value=op.blocks_attr_ids(name)) attrs_str += a + if i != len(attr_names) - 1: + attrs_str += ", " continue a = "{name} = {value}".format( name=name, type=attr_type, value=op.desc.attr(name)) attrs_str += a - if i != len(op.attr_names) - 1: + if i != len(attr_names) - 1: attrs_str += ", " if outputs_str != "{}":