未验证 提交 f7386917 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #9740 from dzhwinter/memory/activation

"polish activation"
......@@ -163,8 +163,13 @@ function(op_library TARGET)
# pybind USE_OP
if (${pybind_flag} EQUAL 0)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP(relu);\n")
else()
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
endif()
endif()
endfunction()
add_subdirectory(math)
......
......@@ -9,7 +9,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/float16.h"
......
......@@ -10,6 +10,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
......@@ -25,6 +28,16 @@ limitations under the License. */
namespace paddle {
namespace operators {
/* Use ugly global variable, for the using in python layer side
Please refer to the layer_helper.py and get the details.
*/
static std::unordered_set<std::string> InplaceOpSet = {
"sigmoid", "exp", "relu", "tanh", "sqrt", "ceil",
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
};
static bool IsInplace(std::string op) { return InplaceOpSet.count(op); }
template <typename DeviceContext, typename Functor>
class ActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
......@@ -60,7 +73,6 @@ class ActivationGradKernel
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Out = context.Input<framework::Tensor>("Out");
auto* dOut =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
......@@ -68,7 +80,6 @@ class ActivationGradKernel
dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(*dOut);
auto x = framework::EigenVector<T>::Flatten(*X);
auto out = framework::EigenVector<T>::Flatten(*Out);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto* place =
......@@ -78,7 +89,16 @@ class ActivationGradKernel
for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
bool inplace = functor.Inplace();
if (!inplace) {
auto* X = context.Input<framework::Tensor>("X");
auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
}
};
......@@ -89,6 +109,14 @@ struct BaseActivationFunctor {
using AttrPair = std::vector<std::pair<const char*, float*>>;
AttrPair GetAttrs() { return AttrPair(); }
/* NOTE(*): Output reuse X memory if X is not dependented by its Gradient.
For example, sigmoid op's gradient didn't involve x, so its output can
reuse
input memory. But abs op's gradient use x, it can not be inplaced.
gradient did use x.
*/
bool Inplace() const { return false; }
};
// sigmoid(x) = 1 / (1 + exp(-x))
......@@ -102,6 +130,7 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -156,6 +185,7 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("exp"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -174,10 +204,11 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReluGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("relu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout * (x > static_cast<T>(0)).template cast<T>();
dx.device(d) = dout * (out > static_cast<T>(0)).template cast<T>();
}
};
......@@ -192,6 +223,7 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct TanhGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("tanh"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -297,6 +329,7 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sqrt"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -316,10 +349,11 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("ceil"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = static_cast<T>(0) / x;
dx.device(d) = static_cast<T>(0) / out;
}
};
......@@ -432,6 +466,7 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("reciprocal"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -531,11 +566,13 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("relu6"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
dx.device(d) = dout *
((x > static_cast<T>(0)) * (x < static_cast<T>(threshold)))
dx.device(d) =
dout *
((out > static_cast<T>(0)) * (out < static_cast<T>(threshold)))
.template cast<T>();
}
};
......@@ -611,11 +648,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("soft_relu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
auto tmp = static_cast<T>(threshold);
auto temp = ((x > -tmp) * (x < tmp)).template cast<T>().eval();
auto temp = ((out > -tmp) * (out < tmp)).template cast<T>().eval();
dx.device(d) = dout * (static_cast<T>(1) - (-out).exp()) * temp;
}
};
......@@ -791,7 +829,7 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
bool Inplace() { return IsInplace("hard_sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......
......@@ -33,6 +33,7 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -461,6 +462,9 @@ All parameter, weight, gradient are variables in Paddle.
self.back().set_lod(t.lod());
});
m.def("IsInplace",
[](std::string op) -> bool { return operators::IsInplace(op); });
m.def("op_support_gpu", OpSupportGPU);
#ifdef PADDLE_WITH_CUDA
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
......
......@@ -19,6 +19,7 @@ from framework import Variable, Parameter, default_main_program, default_startup
import unique_name
from paddle.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr, WeightNormParamAttr
import core
class LayerHelper(object):
......@@ -398,13 +399,16 @@ class LayerHelper(object):
return input_var
if isinstance(act, basestring):
act = {'type': act}
tmp = self.create_tmp_variable(dtype=input_var.dtype)
if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
act_type = act.pop('type')
if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution.
if not core.IsInplace(act_type):
tmp = self.create_tmp_variable(dtype=input_var.dtype)
self.append_op(
type=act_type,
inputs={"X": [input_var]},
......
......@@ -361,10 +361,7 @@ class TestCeil(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
# The same reason with TestFloor
def init_dtype(self):
pass
......@@ -396,10 +393,8 @@ class TestFloor(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
# the gradient on floor, ceil, round is undefined.
# we return zero as gradient, but the numpy return nan
def init_dtype(self):
pass
......@@ -501,11 +496,6 @@ class TestRound(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out', max_relative_error=0.007)
def init_dtype(self):
pass
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册