提交 004df46f 编写于 作者: X xuwei06

Make print_op able to show the value of bool tensor

And some minor fixes on comments.
上级 432d2b5d
...@@ -314,7 +314,6 @@ EIGEN_FUNCTOR(Div, EIGEN_DIV); ...@@ -314,7 +314,6 @@ EIGEN_FUNCTOR(Div, EIGEN_DIV);
template <typename DeviceContext, typename T, typename functor, template <typename DeviceContext, typename T, typename functor,
typename broadcastfunctor, typename broadcast2functor> typename broadcastfunctor, typename broadcast2functor>
void ElementwiseGradCompute(const framework::ExecutionContext& ctx, void ElementwiseGradCompute(const framework::ExecutionContext& ctx,
const framework::Tensor* x, const framework::Tensor* x,
const framework::Tensor* y, const framework::Tensor* y,
const framework::Tensor* out, const framework::Tensor* out,
......
...@@ -46,7 +46,7 @@ struct Formater { ...@@ -46,7 +46,7 @@ struct Formater {
} }
private: private:
void PrintMessage() { CLOG << std::time(nullptr) << "\t" << message; } void PrintMessage() { CLOG << std::time(nullptr) << "\t" << message << "\t"; }
void PrintName() { void PrintName() {
if (!name.empty()) { if (!name.empty()) {
CLOG << "Tensor[" << name << "]" << std::endl; CLOG << "Tensor[" << name << "]" << std::endl;
...@@ -85,15 +85,16 @@ struct Formater { ...@@ -85,15 +85,16 @@ struct Formater {
// print float // print float
if (dtype.hash_code() == typeid(float).hash_code()) { if (dtype.hash_code() == typeid(float).hash_code()) {
Display<float>(size); Display<float>(size);
} } else if (dtype.hash_code() == typeid(double).hash_code()) {
if (dtype.hash_code() == typeid(double).hash_code()) {
Display<double>(size); Display<double>(size);
} } else if (dtype.hash_code() == typeid(int).hash_code()) {
if (dtype.hash_code() == typeid(int).hash_code()) {
Display<int>(size); Display<int>(size);
} } else if (dtype.hash_code() == typeid(int64_t).hash_code()) {
if (dtype.hash_code() == typeid(int64_t).hash_code()) {
Display<int64_t>(size); Display<int64_t>(size);
} else if (dtype.hash_code() == typeid(bool).hash_code()) {
Display<bool>(size);
} else {
CLOG << "\tdata: unprintable type: " << dtype.name() << std::endl;
} }
} }
...@@ -182,6 +183,7 @@ class TensorPrintOp : public framework::OperatorBase { ...@@ -182,6 +183,7 @@ class TensorPrintOp : public framework::OperatorBase {
} }
Formater formater; Formater formater;
formater.message = Attr<std::string>("message");
if (Attr<bool>("print_tensor_name")) { if (Attr<bool>("print_tensor_name")) {
formater.name = printed_var_name; formater.name = printed_var_name;
} }
......
...@@ -174,7 +174,7 @@ def Print(input, ...@@ -174,7 +174,7 @@ def Print(input,
print_tensor_type (bool): Print the tensor type. print_tensor_type (bool): Print the tensor type.
print_tensor_shape (bool): Print the tensor shape. print_tensor_shape (bool): Print the tensor shape.
print_tensor_lod (bool): Print the tensor lod. print_tensor_lod (bool): Print the tensor lod.
print_phase (bool): Which phase to displace, including 'forward', print_phase (str): Which phase to displace, including 'forward',
'backward' and 'both'. If set to 'backward' or 'both', will 'backward' and 'both'. If set to 'backward' or 'both', will
print the gradients of input tensor. print the gradients of input tensor.
......
...@@ -1579,7 +1579,7 @@ def layer_norm(input, ...@@ -1579,7 +1579,7 @@ def layer_norm(input,
""" """
**Layer Normalization** **Layer Normalization**
Assume feature vectors exist on dimensions Assume feature vectors exist on dimensions
:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics
along these dimensions for each feature vector :math:`a` with size along these dimensions for each feature vector :math:`a` with size
:math:`H`, then normalize each feature vector using the corresponding :math:`H`, then normalize each feature vector using the corresponding
...@@ -1600,13 +1600,13 @@ def layer_norm(input, ...@@ -1600,13 +1600,13 @@ def layer_norm(input,
Args: Args:
input(Variable): The input tensor variable. input(Variable): The input tensor variable.
scale(bool): Whether to learn the adaptive gain :math:`g` after scale(bool): Whether to learn the adaptive gain :math:`g` after
normalization. normalization.
shift(bool): Whether to learn the adaptive bias :math:`b` after shift(bool): Whether to learn the adaptive bias :math:`b` after
normalization. normalization.
begin_norm_axis(bool): The normalization will be performed along begin_norm_axis(bool): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`. dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
epsilon(float): The small value added to the variance to prevent epsilon(float): The small value added to the variance to prevent
division by zero. division by zero.
param_attr(ParamAttr|None): The parameter attribute for the learnable param_attr(ParamAttr|None): The parameter attribute for the learnable
gain :math:`g`. gain :math:`g`.
...@@ -2070,7 +2070,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): ...@@ -2070,7 +2070,7 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
Tensor variable with a single element, otherwise must be in the Tensor variable with a single element, otherwise must be in the
range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
the dimension to reduce is :math:`rank + dim`. the dimension to reduce is :math:`rank + dim`.
keep_dim (bool): Whether to reserve the reduced dimension in the keep_dim (bool|False): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true. than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
...@@ -3098,33 +3098,33 @@ def multiplex(inputs, index): ...@@ -3098,33 +3098,33 @@ def multiplex(inputs, index):
def softmax_with_cross_entropy(logits, label, soft_label=False): def softmax_with_cross_entropy(logits, label, soft_label=False):
""" """
**Softmax With Cross Entropy Operator.** **Softmax With Cross Entropy Operator.**
Cross entropy loss with softmax is used as the output layer extensively. This Cross entropy loss with softmax is used as the output layer extensively. This
operator computes the softmax normalized values for each row of the input operator computes the softmax normalized values for each row of the input
tensor, after which cross-entropy loss is computed. This provides a more tensor, after which cross-entropy loss is computed. This provides a more
numerically stable gradient. numerically stable gradient.
Because this operator performs a softmax on logits internally, it expects Because this operator performs a softmax on logits internally, it expects
unscaled logits. This operator should not be used with the output of unscaled logits. This operator should not be used with the output of
softmax operator since that would produce incorrect results. softmax operator since that would produce incorrect results.
When the attribute soft_label is set false, this operators expects mutually When the attribute soft_label is set false, this operators expects mutually
exclusive hard labels, each sample in a batch is in exactly one class with a exclusive hard labels, each sample in a batch is in exactly one class with a
probability of 1.0. Each sample in the batch will have a single label. probability of 1.0. Each sample in the batch will have a single label.
The equation is as follows: The equation is as follows:
1) Hard label (one-hot label, so every sample has exactly one class) 1) Hard label (one-hot label, so every sample has exactly one class)
.. math:: .. math::
loss_j = -\\text{logit}_{label_j} + loss_j = -\\text{logit}_{label_j} +
\\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logit}_i)\\right), j = 1,..., K \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logit}_i)\\right), j = 1,..., K
2) Soft label (each sample can have a distribution over all classes) 2) Soft label (each sample can have a distribution over all classes)
.. math:: .. math::
loss_j = -\\sum_{i=0}^{K}\\text{label}_i loss_j = -\\sum_{i=0}^{K}\\text{label}_i
\\left(\\text{logit}_i - \\log\\left(\\sum_{i=0}^{K} \\left(\\text{logit}_i - \\log\\left(\\sum_{i=0}^{K}
\\exp(\\text{logit}_i)\\right)\\right), j = 1,...,K \\exp(\\text{logit}_i)\\right)\\right), j = 1,...,K
...@@ -3169,7 +3169,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -3169,7 +3169,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
The operator takes the first dimension of X and Y as batch size. The operator takes the first dimension of X and Y as batch size.
For each instance, it computes the smooth l1 loss element by element first For each instance, it computes the smooth l1 loss element by element first
and then sums all the losses. So the shape of Out is [batch_size, 1]. and then sums all the losses. So the shape of Out is [batch_size, 1].
Args: Args:
x (Variable): A tensor with rank at least 2. The input value of smooth x (Variable): A tensor with rank at least 2. The input value of smooth
l1 loss op with shape [batch_size, dim1, ..., dimN]. l1 loss op with shape [batch_size, dim1, ..., dimN].
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册