From 8b15ac82fa831f95493c2bd218b93655db0d739e Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 1 Sep 2017 17:50:01 +0800 Subject: [PATCH] Move the definition of hl_cpu_gru_forward and hl_cpu_gru_backward to function/GruFunctor.h. --- paddle/cuda/include/hl_cpu_gru.cuh | 134 --------------------- paddle/function/GruFunctor.h | 160 +++++++++++++++++++++++++ paddle/gserver/layers/GruCompute.cpp | 32 ++--- paddle/scripts/docker/build_android.sh | 25 +--- 4 files changed, 181 insertions(+), 170 deletions(-) create mode 100644 paddle/function/GruFunctor.h diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index 732799a28b2..347b0385988 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -18,14 +18,6 @@ limitations under the License. */ #ifndef __NVCC__ -#include "paddle/math/MathFunctions.h" - -// #ifndef PADDLE_TYPE_DOUBLE -// #define CBLAS_GEMM paddle::gemm -// #else -// #define CBLAS_GEMM paddle::gemm -// #endif - template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, real *gateValue, @@ -210,51 +202,6 @@ inline void forward_final_output(OpFinalOutput opFinalOutput, } } -template -void hl_cpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// 2 * frameSize, -// frameSize, -// 1, -// value.prevOutValue, -// frameSize, -// value.gateWeight, -// frameSize * 2, -// 1, -// value.gateValue, -// frameSize * 3); - } - - forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); - - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// value.resetOutputValue, -// frameSize, -// value.stateWeight, -// frameSize, -// 1, -// value.gateValue + frameSize * 2, -// frameSize * 3); - } - - forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); -} - template void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, real *gateValue, @@ -524,87 +471,6 @@ inline void backward_reset_grad(OpResetGrad opResetGrad, } } } - -template -void hl_cpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - backward_state_grad(opStateGrad, value, grad, - frameSize, batchSize, active_node); - - if (value.prevOutValue && grad.prevOutGrad) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// value.stateWeight, -// frameSize, -// 0, -// grad.resetOutputGrad, -// frameSize); - - if (grad.stateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize, -// batchSize, -// 1, -// value.resetOutputValue, -// frameSize, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// 1, -// grad.stateWeightGrad, -// frameSize); - } - } - - backward_reset_grad(opResetGrad, value, grad, - frameSize, batchSize, active_gate); - - if (grad.prevOutGrad && value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize * 2, -// 1, -// grad.gateGrad, -// frameSize * 3, -// value.gateWeight, -// frameSize * 2, -// 1, -// grad.prevOutGrad, -// frameSize); - - if (grad.gateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize * 2, -// batchSize, -// 1, -// value.prevOutValue, -// frameSize, -// grad.gateGrad, -// frameSize * 3, -// 1, -// grad.gateWeightGrad, -// frameSize * 2); - } - } -} - #endif #endif // HL_CPU_GRU_CUH_ diff --git a/paddle/function/GruFunctor.h b/paddle/function/GruFunctor.h new file mode 100644 index 00000000000..11f6174dbd4 --- /dev/null +++ b/paddle/function/GruFunctor.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "GemmFunctor.h" +#include "GruFunctor.h" +#include "hl_cpu_gru.cuh" + +namespace paddle { + +template +struct GruFunctor { + template + static void compute(OpResetOutput opResetOutput, + OpFinalOutput opFinalOutput, + hl_gru_value value, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + 2 * frameSize, + frameSize, + 1, + value.prevOutValue, + frameSize, + value.gateWeight, + frameSize * 2, + 1, + value.gateValue, + frameSize * 3); + } + + forward_reset_output( + opResetOutput, value, frameSize, batchSize, active_gate); + + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + frameSize, + frameSize, + 1, + value.resetOutputValue, + frameSize, + value.stateWeight, + frameSize, + 1, + value.gateValue + frameSize * 2, + frameSize * 3); + } + + forward_final_output( + opFinalOutput, value, frameSize, batchSize, active_node); +#endif + } +}; + +template +struct GruGradFunctor { + template + static void compute(OpStateGrad opStateGrad, + OpResetGrad opResetGrad, + hl_gru_value value, + hl_gru_grad grad, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + backward_state_grad( + opStateGrad, value, grad, frameSize, batchSize, active_node); + + if (value.prevOutValue && grad.prevOutGrad) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize, + 1, + grad.gateGrad + frameSize * 2, + frameSize * 3, + value.stateWeight, + frameSize, + 0, + grad.resetOutputGrad, + frameSize); + + if (grad.stateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize, + batchSize, + 1, + value.resetOutputValue, + frameSize, + grad.gateGrad + frameSize * 2, + frameSize * 3, + 1, + grad.stateWeightGrad, + frameSize); + } + } + + backward_reset_grad( + opResetGrad, value, grad, frameSize, batchSize, active_gate); + + if (grad.prevOutGrad && value.prevOutValue) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize * 2, + 1, + grad.gateGrad, + frameSize * 3, + value.gateWeight, + frameSize * 2, + 1, + grad.prevOutGrad, + frameSize); + + if (grad.gateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize * 2, + batchSize, + 1, + value.prevOutValue, + frameSize, + grad.gateGrad, + frameSize * 3, + 1, + grad.gateWeightGrad, + frameSize * 2); + } + } +#endif + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/gserver/layers/GruCompute.cpp index 06907768e98..148516391c6 100644 --- a/paddle/gserver/layers/GruCompute.cpp +++ b/paddle/gserver/layers/GruCompute.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "GruCompute.h" #include "hl_recurrent_apply.cuh" +#include "paddle/function/GruFunctor.h" #include "paddle/utils/Util.h" namespace paddle { @@ -25,13 +26,13 @@ void GruCompute::init(LayerConfig &config) { template <> void GruCompute::forward<0>(hl_gru_value value, int frameSize, int batchSize) { - hl_cpu_gru_forward(hppl::forward::gru_resetOutput(), - hppl::forward::gru_finalOutput(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruFunctor::compute(hppl::forward::gru_resetOutput(), + hppl::forward::gru_finalOutput(), + value, + frameSize, + batchSize, + activeNode_, + activeGate_); } template <> @@ -39,14 +40,15 @@ void GruCompute::backward<0>(hl_gru_value value, hl_gru_grad grad, int frameSize, int batchSize) { - hl_cpu_gru_backward(hppl::backward::gru_stateGrad(), - hppl::backward::gru_resetGrad(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruGradFunctor::compute( + hppl::backward::gru_stateGrad(), + hppl::backward::gru_resetGrad(), + value, + grad, + frameSize, + batchSize, + activeNode_, + activeGate_); } } // namespace paddle diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index a61c7c40e9b..34e31f13947 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,25 +2,8 @@ set -xe -COMPILER=gcc -USE_EIGEN=ON -if [ $COMPILER == clang ]; then - SUFFIX=_clang - C_COMPILER=clang - CXX_COMPILER=clang++ -else - SUFFIX=_gcc - C_COMPILER=gcc - CXX_COMPILER=g++ -fi -if [ $USE_EIGEN == ON ]; then - SUFFIX=${SUFFIX}_eigen -else - SUFFIX=${SUFFIX}_openblas -fi - -BUILD_ROOT=/paddle/build_android$SUFFIX -DEST_ROOT=/paddle/install$SUFFIX +BUILD_ROOT=/paddle/build_android +DEST_ROOT=/paddle/install rm -rf $BUILD_ROOT 2>/dev/null || true mkdir -p $BUILD_ROOT @@ -41,7 +24,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ -DWITH_STYLE_CHECK=OFF \ @@ -58,7 +41,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=OFF \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ .. -- GitLab