diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 87fda12ea631b9d9e40962aea2b744983566b733..14985a3f74aa234d22a774dee3c9b46c75d24d8f 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -15,9 +15,9 @@ limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" #include #include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" -// #include "paddle/fluid/operators/math/detail/activation_functions.h" -// #include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/platform/cpu_info.h" namespace paddle { namespace operators { @@ -230,13 +230,6 @@ use lstm_x_t as input and compute as standard LSTM. )DOC"); } -template -inline void vec_relu(const int n, const T* x, T* y) { - for (int i = 0; i < n; ++i) { - y[i] = x[i] > 0 ? x[i] : 0; - } -} - // y[i] = (x[i] + bias[0]) > 0 ? (x[i] + bias[0]) : 0; template inline void bias_relu(const int n, const T* x, const T* bias, T* y) { @@ -244,9 +237,9 @@ inline void bias_relu(const int n, const T* x, const T* bias, T* y) { for (int i = 0; i < n; ++i) { y[i] = x[i] + bias[0]; } - vec_relu(n, y, y); + math::vec_relu(n, y, y); } else { - vec_relu(n, x, y); + math::vec_relu(n, x, y); } } @@ -277,37 +270,6 @@ inline void vec_softmax(const math::BlasT& blas, const int n, blas.SCAL(n, static_cast(1) / scalar, y); } -#define SIGMOID_THRESHOLD_MIN -40.0 -#define SIGMOID_THRESHOLD_MAX 13.0 -#define EXP_MAX_INPUT 40.0 - -template -inline T sigmoid(T x) { - return 1. / (1. + exp(-x)); -} - -template -inline T tanh(T x) { - return 2. * sigmoid(2. * x) - 1.; -} - -template -inline void vec_sigmoid(const int n, const T* x, T* y) { - const T min = SIGMOID_THRESHOLD_MIN; - const T max = SIGMOID_THRESHOLD_MAX; - for (int i = 0; i < n; ++i) { - T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); - y[i] = 1.0 / (1.0 + std::exp(-tmp)); - } -} - -template -inline void vec_tanh(const int n, const T* x, T* y) { - for (int i = 0; i < n; ++i) { - y[i] = tanh(x[i]); - } -} - template class AttentionLSTMKernel : public framework::OpKernel { public: @@ -351,6 +313,10 @@ class AttentionLSTMKernel : public framework::OpKernel { fc_out->Resize({max_seq_len, 1}); // TODO(TJ): act functor init here + // if (platform::jit::MayIUse(platform::jit::avx2)) { + // } else if (platform::jit::MayIUse(platform::jit::avx)) { + // } else { + // } const T* x_data = x->data(); const T* h0_data = h0->data(); @@ -418,9 +384,9 @@ class AttentionLSTMKernel : public framework::OpKernel { blas.VADD(D4, lstm_b_data, lstm_out_data, lstm_out_data); // gate act: sigmoid - vec_sigmoid(D3, lstm_out_data, lstm_out_data); + math::vec_sigmoid(D3, lstm_out_data, lstm_out_data); // candicate act: tanh - vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3); + math::vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3); // a = forget * prev_cell blas.VMUL(D, lstm_out_data, prev_cell_data, lstm_out_data); @@ -432,7 +398,7 @@ class AttentionLSTMKernel : public framework::OpKernel { blas.VADD(D, lstm_out_data, lstm_out_data + D, cur_cell_out_data); // state act tanh(cell_out) * output_gate - vec_tanh(D, cur_cell_out_data, lstm_out_data); + math::vec_tanh(D, cur_cell_out_data, lstm_out_data); blas.VMUL(D, lstm_out_data, lstm_out_data + D2, cur_hidden_out_data); prev_hidden_data = cur_hidden_out_data; diff --git a/paddle/fluid/operators/math/cpu_vec.h b/paddle/fluid/operators/math/cpu_vec.h new file mode 100644 index 0000000000000000000000000000000000000000..29476fce7095a4db9f37b562b4c4bd36538a4077 --- /dev/null +++ b/paddle/fluid/operators/math/cpu_vec.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { +namespace math { + +#define SIGMOID_THRESHOLD_MIN -40.0 +#define SIGMOID_THRESHOLD_MAX 13.0 +#define EXP_MAX_INPUT 40.0 + +template +inline T sigmoid(T x) { + return 1. / (1. + exp(-x)); +} + +template +inline T tanh(T x) { + return 2. * sigmoid(2. * x) - 1.; +} + +template +inline void vec_sigmoid(const int n, const T* x, T* y) { + const T min = SIGMOID_THRESHOLD_MIN; + const T max = SIGMOID_THRESHOLD_MAX; + for (int i = 0; i < n; ++i) { + T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]); + y[i] = 1.0 / (1.0 + std::exp(-tmp)); + } +} + +template +inline void vec_tanh(const int n, const T* x, T* y) { + for (int i = 0; i < n; ++i) { + y[i] = tanh(x[i]); + } +} + +template +inline void vec_relu(const int n, const T* x, T* y) { + for (int i = 0; i < n; ++i) { + y[i] = x[i] > 0 ? x[i] : 0; + } +} + +template <> +inline void vec_relu(const int n, const float* x, + float* y) { + // TODO(TJ): complete me + for (int i = 0; i < n; ++i) { + y[i] = x[i] > 0 ? x[i] : 0; + } +} + +template <> +inline void vec_relu(const int n, const float* x, + float* y) { + // TODO(TJ): complete me + for (int i = 0; i < n; ++i) { + y[i] = x[i] > 0 ? x[i] : 0; + } +} + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 7d53a684d6068c79659719159696ef5aebfeaa2b..79a924434ba68bd4607e50f9e3272ad17dec5e74 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -112,6 +112,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) { switch (cpu_isa) { case sse42: return cpu.has(Cpu::tSSE42); + case avx: + return cpu.has(Cpu::tAVX); case avx2: return cpu.has(Cpu::tAVX2); case avx512_common: diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h index f5f67667594f1ab80058533e4c5d5b04c2592b60..2baa21c1bd142c77b3fb84786eec30abe1a4a8df 100644 --- a/paddle/fluid/platform/cpu_info.h +++ b/paddle/fluid/platform/cpu_info.h @@ -43,6 +43,7 @@ namespace jit { typedef enum { isa_any, sse42, + avx, avx2, avx512_common, avx512_core,