未验证 提交 7181afd7 编写于 作者: L Leo Chen 提交者: GitHub

Fix elementwise_pow bug on CUDA place with integer (#21675)

* fix elementwise_pow bug on integer, test=develop

* use llrint to support elementwise_pow_grad, test=develop

* add some tests, test=develop

* revert grad functor, test=develop
上级 68999b6c
......@@ -12,6 +12,7 @@ limitations under the License. */
#pragma once
#include <cmath>
#include <type_traits>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
......@@ -20,7 +21,18 @@ namespace operators {
template <typename T>
struct PowFunctor {
inline HOSTDEVICE T operator()(T a, T b) const { return std::pow(a, b); }
inline HOSTDEVICE T operator()(T a, T b) const {
#ifdef __CUDA_ARCH__
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
if (std::is_integral<T>::value) {
return std::llrint(std::pow(a, b));
}
#endif
return std::pow(a, b);
}
};
template <typename DeviceContext, typename T>
......
......@@ -16,6 +16,7 @@ from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid as fluid
class TestElementwisePowOp(OpTest):
......@@ -114,5 +115,48 @@ class TestElementwisePowOp_broadcast_4(TestElementwisePowOp):
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
class TestElementwisePowOpInt(OpTest):
def setUp(self):
self.op_type = "elementwise_pow"
self.inputs = {'X': np.asarray([1, 3, 6]), 'Y': np.asarray([1, 1, 1])}
self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
self.check_output()
class TestElementwisePowGradOpInt(unittest.TestCase):
def setUp(self):
self.x = np.asarray([1, 3, 6])
self.y = np.asarray([1, 1, 1])
self.res = self.x**self.y
# dout = 1
self.grad_res = np.asarray([1, 1, 1])
# dx = dout * y * pow(x, y-1)
self.grad_x = self.grad_res * self.y * (self.x
**(self.y - 1)).astype("int")
# dy = dout * log(x) * pow(x, y)
self.grad_y = (self.grad_res * np.log(self.x) *
(self.x**self.y)).astype("int")
print(self.grad_res, self.grad_x, self.grad_y)
def test_grad(self):
places = [fluid.CPUPlace()]
if fluid.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for place in places:
with fluid.dygraph.guard(place):
x = fluid.dygraph.to_variable(self.x, zero_copy=False)
y = fluid.dygraph.to_variable(self.y, zero_copy=False)
print(x, y)
x.stop_gradient = False
y.stop_gradient = False
res = x**y
res.backward()
self.assertTrue(np.array_equal(res.gradient(), self.grad_res))
self.assertTrue(np.array_equal(x.gradient(), self.grad_x))
self.assertTrue(np.array_equal(y.gradient(), self.grad_y))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册