未验证 提交 5b9c62fa 编写于 作者: T Tao Luo 提交者: GitHub

Revert "Softmax op optimization for inference "

上级 893c1b01
...@@ -19,10 +19,8 @@ namespace paddle { ...@@ -19,10 +19,8 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
template class SoftmaxFunctor<platform::CPUDeviceContext, float, true>; template class SoftmaxFunctor<platform::CPUDeviceContext, float>;
template class SoftmaxFunctor<platform::CPUDeviceContext, float, false>; template class SoftmaxFunctor<platform::CPUDeviceContext, double>;
template class SoftmaxFunctor<platform::CPUDeviceContext, double, true>;
template class SoftmaxFunctor<platform::CPUDeviceContext, double, false>;
template class SoftmaxGradFunctor<platform::CPUDeviceContext, float>; template class SoftmaxGradFunctor<platform::CPUDeviceContext, float>;
template class SoftmaxGradFunctor<platform::CPUDeviceContext, double>; template class SoftmaxGradFunctor<platform::CPUDeviceContext, double>;
......
...@@ -98,14 +98,9 @@ template class SoftmaxGradCUDNNFunctor<float>; ...@@ -98,14 +98,9 @@ template class SoftmaxGradCUDNNFunctor<float>;
template class SoftmaxGradCUDNNFunctor<double>; template class SoftmaxGradCUDNNFunctor<double>;
template class SoftmaxGradCUDNNFunctor<platform::float16>; template class SoftmaxGradCUDNNFunctor<platform::float16>;
template class SoftmaxFunctor<platform::CUDADeviceContext, platform::float16, template class SoftmaxFunctor<platform::CUDADeviceContext, platform::float16>;
false>; template class SoftmaxFunctor<platform::CUDADeviceContext, float>;
template class SoftmaxFunctor<platform::CUDADeviceContext, platform::float16, template class SoftmaxFunctor<platform::CUDADeviceContext, double>;
true>;
template class SoftmaxFunctor<platform::CUDADeviceContext, float, false>;
template class SoftmaxFunctor<platform::CUDADeviceContext, double, false>;
template class SoftmaxFunctor<platform::CUDADeviceContext, float, true>;
template class SoftmaxFunctor<platform::CUDADeviceContext, double, true>;
template class SoftmaxGradFunctor<platform::CUDADeviceContext, float>; template class SoftmaxGradFunctor<platform::CUDADeviceContext, float>;
template class SoftmaxGradFunctor<platform::CUDADeviceContext, double>; template class SoftmaxGradFunctor<platform::CUDADeviceContext, double>;
template class SoftmaxGradFunctor<platform::CUDADeviceContext, template class SoftmaxGradFunctor<platform::CUDADeviceContext,
......
...@@ -19,7 +19,7 @@ namespace paddle { ...@@ -19,7 +19,7 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
template <typename DeviceContext, typename T, bool is_test> template <typename DeviceContext, typename T>
class SoftmaxFunctor { class SoftmaxFunctor {
public: public:
void operator()(const DeviceContext& context, const framework::Tensor* X, void operator()(const DeviceContext& context, const framework::Tensor* X,
......
...@@ -32,9 +32,9 @@ struct ValueClip { ...@@ -32,9 +32,9 @@ struct ValueClip {
} }
}; };
template <typename DeviceContext, typename T, bool is_test> template <typename DeviceContext, typename T>
void SoftmaxFunctor<DeviceContext, T, is_test>::operator()( void SoftmaxFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
const DeviceContext& context, const framework::Tensor* X, const framework::Tensor* X,
framework::Tensor* Y) { framework::Tensor* Y) {
auto logits = EigenMatrix<T>::From(*X); auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*Y); auto softmax = EigenMatrix<T>::From(*Y);
...@@ -65,39 +65,6 @@ void SoftmaxFunctor<DeviceContext, T, is_test>::operator()( ...@@ -65,39 +65,6 @@ void SoftmaxFunctor<DeviceContext, T, is_test>::operator()(
.broadcast(one_by_class)); .broadcast(one_by_class));
} }
template <typename DeviceContext, typename T>
class SoftmaxFunctor<DeviceContext, T, true> {
void operator()(const DeviceContext& context, const framework::Tensor* X,
framework::Tensor* Y) {
auto logits = EigenMatrix<T>::From(*X);
auto softmax = EigenMatrix<T>::From(*Y);
const int kBatchDim = 0;
const int kClassDim = 1;
const int batch_size = logits.dimension(kBatchDim);
const int num_classes = logits.dimension(kClassDim);
Eigen::DSizes<int, 1> along_class(kClassDim);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, num_classes);
auto shifted_logits = (logits -
logits.maximum(along_class)
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
softmax.device(*context.eigen_device()) = shifted_logits.exp();
softmax.device(*context.eigen_device()) = (softmax *
softmax.sum(along_class)
.inverse()
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class));
}
};
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void SoftmaxGradFunctor<DeviceContext, T>::operator()( void SoftmaxGradFunctor<DeviceContext, T>::operator()(
const DeviceContext& context, const framework::Tensor* y, const DeviceContext& context, const framework::Tensor* y,
......
...@@ -35,14 +35,8 @@ class SoftmaxKernel : public framework::OpKernel<T> { ...@@ -35,14 +35,8 @@ class SoftmaxKernel : public framework::OpKernel<T> {
Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1); Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1);
Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1); Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
const bool is_test = context.Attr<bool>("is_test"); math::SoftmaxFunctor<DeviceContext, T>()(
if (is_test == true) {
math::SoftmaxFunctor<DeviceContext, T, true>()(
context.template device_context<DeviceContext>(), &X_2d, &Out_2d); context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
} else {
math::SoftmaxFunctor<DeviceContext, T, false>()(
context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
}
} }
}; };
......
...@@ -42,8 +42,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> { ...@@ -42,8 +42,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
auto& dev_ctx = auto& dev_ctx =
context.template device_context<platform::CPUDeviceContext>(); context.template device_context<platform::CPUDeviceContext>();
math::SoftmaxFunctor<platform::CPUDeviceContext, T, false>()( math::SoftmaxFunctor<platform::CPUDeviceContext, T>()(dev_ctx, logits,
dev_ctx, logits, softmax); softmax);
math::CrossEntropyFunctor<platform::CPUDeviceContext, T>()( math::CrossEntropyFunctor<platform::CPUDeviceContext, T>()(
dev_ctx, loss, softmax, labels, context.Attr<bool>("soft_label"), dev_ctx, loss, softmax, labels, context.Attr<bool>("soft_label"),
context.Attr<int>("ignore_index")); context.Attr<int>("ignore_index"));
......
...@@ -35,7 +35,6 @@ class TestSoftmaxOp(OpTest): ...@@ -35,7 +35,6 @@ class TestSoftmaxOp(OpTest):
self.op_type = "softmax" self.op_type = "softmax"
self.use_cudnn = False self.use_cudnn = False
self.use_mkldnn = False self.use_mkldnn = False
self.is_test = False
self.dtype = np.float32 self.dtype = np.float32
self.init_kernel_type() self.init_kernel_type()
self.shape = self.get_x_shape() self.shape = self.get_x_shape()
...@@ -49,8 +48,7 @@ class TestSoftmaxOp(OpTest): ...@@ -49,8 +48,7 @@ class TestSoftmaxOp(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = { self.attrs = {
'use_cudnn': self.use_cudnn, 'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn, 'use_mkldnn': self.use_mkldnn
'is_test': self.is_test
} }
def init_kernel_type(self): def init_kernel_type(self):
...@@ -146,11 +144,6 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): ...@@ -146,11 +144,6 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
return [2, 3, 4, 5] return [2, 3, 4, 5]
class TestSoftmaxInference(TestSoftmaxOp):
def init_kernel_type(self):
self.is_test = True
class TestSoftmaxMKLDNNOp(TestSoftmaxOp): class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = True self.use_mkldnn = True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册