未验证 提交 fb3dbccc 编写于 作者: Y Yuang Liu 提交者: GitHub

no value clip for parallel cross entropy (#53547) (#53709)

上级 16f69e7a
...@@ -188,8 +188,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> { ...@@ -188,8 +188,7 @@ struct CSoftmaxWithCrossEntropyFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.device(*dev_ctx.eigen_device()) =
(eigen_logits - (eigen_logits -
eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class)) eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class));
.unaryExpr(phi::funcs::ValueClip<T>());
// step 3, obtain predict target // step 3, obtain predict target
phi::DenseTensor predicted_logits; phi::DenseTensor predicted_logits;
...@@ -346,8 +345,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> { ...@@ -346,8 +345,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor<phi::GPUContext, T> {
eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.device(*dev_ctx.eigen_device()) =
(eigen_logits - (eigen_logits -
eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class)) eigen_logits_max.reshape(batch_by_one).broadcast(one_by_class));
.unaryExpr(phi::funcs::ValueClip<T>());
// step 3, obtain predict target // step 3, obtain predict target
phi::DenseTensor predicted_logits; phi::DenseTensor predicted_logits;
......
...@@ -26,7 +26,7 @@ def stable_softmax(x): ...@@ -26,7 +26,7 @@ def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way.""" """Compute the softmax of vector x in a numerically stable way."""
# clip to shiftx, otherwise, when calc loss with # clip to shiftx, otherwise, when calc loss with
# log(exp(shiftx)), may get log(0)=INF # log(exp(shiftx)), may get log(0)=INF
shiftx = (x - np.max(x)).clip(-64.0) shiftx = x - np.max(x)
exps = np.exp(shiftx) exps = np.exp(shiftx)
return exps / np.sum(exps) return exps / np.sum(exps)
...@@ -88,13 +88,13 @@ class TestCSoftmaxWithCrossEntropy(unittest.TestCase): ...@@ -88,13 +88,13 @@ class TestCSoftmaxWithCrossEntropy(unittest.TestCase):
# get input data for rank 0 # get input data for rank 0
np.random.seed(0) np.random.seed(0)
input0 = np.random.uniform( input0 = np.random.uniform(
low=-10.0, high=10.0, size=(self.batch_size, local_elements) low=-40.0, high=40.0, size=(self.batch_size, local_elements)
).astype(data_type) ).astype(data_type)
# get input data for rank 1 # get input data for rank 1
np.random.seed(1) np.random.seed(1)
input1 = np.random.uniform( input1 = np.random.uniform(
low=-10.0, high=10.0, size=(self.batch_size, local_elements) low=-40.0, high=40.0, size=(self.batch_size, local_elements)
).astype(data_type) ).astype(data_type)
# get combined input data # get combined input data
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册