未验证 提交 64045c29 编写于 作者: T tensor-tang 提交者: GitHub

Merge pull request #11102 from mozga-intel/mozga-intel/Sum_mkldnn_layout

MKLDNN layout: Support for sum operator
...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{}); framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]); sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]); WaitOnPlace(places[0]);
......
...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place); sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);
...@@ -18,6 +18,10 @@ limitations under the License. */ ...@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using framework::Tensor; using framework::Tensor;
...@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
auto x_vars = ctx.MultiInputVar("X"); auto x_vars = ctx.MultiInputVar("X");
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) { if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1; int dtype = -1;
for (auto& x_var : x_vars) { for (auto& x_var : x_vars) {
...@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor"); "Sum operator should have at least one tensor");
return framework::OpKernelType( return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype), static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
ctx.device_context()); layout, library);
} else if (x_vars[0]->IsType<framework::SelectedRows>()) { } else if (x_vars[0]->IsType<framework::SelectedRows>()) {
for (auto& var : x_vars) { for (auto& var : x_vars) {
auto& value = var->Get<framework::SelectedRows>().value(); auto& value = var->Get<framework::SelectedRows>().value();
if (value.IsInitialized()) { if (value.IsInitialized()) {
return framework::OpKernelType(framework::ToDataType(value.type()), return framework::OpKernelType(framework::ToDataType(value.type()),
ctx.device_context()); ctx.device_context(), layout, library);
} }
} }
// if input sparse vars are not initialized, use an default kernel type. // if input sparse vars are not initialized, use an default kernel type.
return framework::OpKernelType(framework::proto::VarType::FP32, return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.device_context()); ctx.device_context(), layout, library);
} else if (x_vars[0]->IsType<framework::LoDTensorArray>()) { } else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
for (auto& x_var : x_vars) { for (auto& x_var : x_vars) {
auto& array = x_var->Get<framework::LoDTensorArray>(); auto& array = x_var->Get<framework::LoDTensorArray>();
for (auto& each : array) { for (auto& each : array) {
if (each.numel() != 0) { if (each.numel() != 0) {
return framework::OpKernelType(framework::ToDataType(each.type()), return framework::OpKernelType(framework::ToDataType(each.type()),
ctx.device_context()); ctx.device_context(), layout,
library);
} }
} }
} }
...@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.") AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X"); AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Sum operator. Sum operator.
...@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc* block) const override { framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X"); auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS; auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) { for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " " VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType(); << block->FindRecursiveOrCreateVar(name).GetType();
...@@ -206,6 +225,7 @@ namespace ops = paddle::operators; ...@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker,
ops::SumOpVarTypeInference); ops::SumOpVarTypeInference);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>, sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>,
ops::SumKernel<paddle::platform::CPUDeviceContext, double>, ops::SumKernel<paddle::platform::CPUDeviceContext, double>,
......
...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod()); ->set_lod(inside_tensor.lod());
} }
} }
auto new_inside_name = cur_scope.Rename(inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}}, "sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); {{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place); sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
} }
......
...@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { ...@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory.get_primitive_desc().desc().data.format); memory.get_primitive_desc().desc().data.format);
} }
inline mkldnn::memory::format GetMKLDNNFormat(
const mkldnn::sum::primitive_desc& memory) {
return static_cast<mkldnn::memory::format>(
memory.dst_primitive_desc().desc().data.format);
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for idx, op_desc in enumerate(op_descs): for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names(): for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1: if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append( pending_sum_ops.append((_create_op_desc_(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]}, "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"Out": [var_name]}, {}), idx)) {"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name] renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names(): for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name( if var_name == core.empty_var_name(
...@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs): ...@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars[var_name].append(new_name) renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems(): for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1: if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_( pending_sum_ops.append(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs))) (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
{"use_mkldnn": False}), len(op_descs)))
# sum_op descs are sorted according to their insert position # sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops): for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0]) op_descs.insert(p[1], p[0])
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
All layers just related to the neural network. All layers just related to the neural network.
""" """
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
...@@ -109,14 +109,14 @@ def fc(input, ...@@ -109,14 +109,14 @@ def fc(input,
""" """
**Fully Connected Layer** **Fully Connected Layer**
This function creates a fully connected layer in the network. It can take This function creates a fully connected layer in the network. It can take
multiple tensors as its inputs. It creates a variable called weights for multiple tensors as its inputs. It creates a variable called weights for
each input tensor, which represents a fully connected weight matrix from each input tensor, which represents a fully connected weight matrix from
each input unit to each output unit. The fully connected layer multiplies each input unit to each output unit. The fully connected layer multiplies
each input tensor with its coresponding weight to produce an output Tensor. each input tensor with its coresponding weight to produce an output Tensor.
If multiple input tensors are given, the results of multiple multiplications If multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a bias variable will be created will be sumed up. If bias_attr is not None, a bias variable will be created
and added to the output. Finally, if activation is not None, it will be applied and added to the output. Finally, if activation is not None, it will be applied
to the output as well. to the output as well.
This process can be formulated as follows: This process can be formulated as follows:
...@@ -198,7 +198,10 @@ def fc(input, ...@@ -198,7 +198,10 @@ def fc(input,
else: else:
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": use_mkldnn})
# add bias # add bias
pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims) pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims)
# add activation # add activation
...@@ -847,7 +850,7 @@ def crf_decoding(input, param_attr, label=None): ...@@ -847,7 +850,7 @@ def crf_decoding(input, param_attr, label=None):
Returns: Returns:
Variable: ${viterbi_path_comment} Variable: ${viterbi_path_comment}
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1085,7 +1088,7 @@ def chunk_eval(input, ...@@ -1085,7 +1088,7 @@ def chunk_eval(input,
Here is a NER example of labeling for these tagging schemes: Here is a NER example of labeling for these tagging schemes:
.. code-block:: python .. code-block:: python
====== ====== ====== ===== == ============ ===== ===== ===== == ========= ====== ====== ====== ===== == ============ ===== ===== ===== == =========
Li Ming works at Agricultural Bank of China in Beijing. Li Ming works at Agricultural Bank of China in Beijing.
====== ====== ====== ===== == ============ ===== ===== ===== == ========= ====== ====== ====== ===== == ============ ===== ===== ===== == =========
...@@ -1111,7 +1114,7 @@ def chunk_eval(input, ...@@ -1111,7 +1114,7 @@ def chunk_eval(input,
is the num of chunk types, and `tag_type` get its value from the following table. is the num of chunk types, and `tag_type` get its value from the following table.
.. code-block:: python .. code-block:: python
Scheme Begin Inside End Single Scheme Begin Inside End Single
plain 0 - - - plain 0 - - -
IOB 0 1 - - IOB 0 1 - -
...@@ -1147,7 +1150,7 @@ def chunk_eval(input, ...@@ -1147,7 +1150,7 @@ def chunk_eval(input,
tuple: tuple containing: precision, recall, f1_score, tuple: tuple containing: precision, recall, f1_score,
num_infer_chunks, num_label_chunks, num_infer_chunks, num_label_chunks,
num_correct_chunks num_correct_chunks
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1247,7 +1250,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): ...@@ -1247,7 +1250,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
""" """
This function computes the softmax activation among all time-steps for each This function computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N`
is the sum of the length of all sequences. is the sum of the length of all sequences.
For i-th sequence in a mini-batch: For i-th sequence in a mini-batch:
...@@ -1267,7 +1270,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): ...@@ -1267,7 +1270,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
param_attr (ParamAttr|None): attributes for parameter param_attr (ParamAttr|None): attributes for parameter
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. Default: True library is installed. Default: True
Returns: Returns:
Variable: output of sequence_softmax Variable: output of sequence_softmax
...@@ -1828,11 +1831,11 @@ def pool2d(input, ...@@ -1828,11 +1831,11 @@ def pool2d(input,
${comment} ${comment}
Args: Args:
input (Variable): The input tensor of pooling operator. The format of input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the the number of channels, H is the height of the
feature, and W is the width of the feature. feature, and W is the width of the feature.
pool_size (int): The side length of pooling windows. All pooling pool_size (int): The side length of pooling windows. All pooling
windows are squares with pool_size on a side. windows are squares with pool_size on a side.
pool_type: ${pooling_type_comment} pool_type: ${pooling_type_comment}
pool_stride (int): stride of the pooling layer. pool_stride (int): stride of the pooling layer.
...@@ -1841,7 +1844,7 @@ def pool2d(input, ...@@ -1841,7 +1844,7 @@ def pool2d(input,
use_cudnn: ${use_cudnn_comment} use_cudnn: ${use_cudnn_comment}
ceil_mode: ${ceil_mode_comment} ceil_mode: ${ceil_mode_comment}
use_mkldnn: ${use_mkldnn_comment} use_mkldnn: ${use_mkldnn_comment}
name (str|None): A name for this layer(optional). If set None, the name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically. layer will be named automatically.
Returns: Returns:
...@@ -1859,10 +1862,10 @@ def pool2d(input, ...@@ -1859,10 +1862,10 @@ def pool2d(input,
data = fluid.layers.data( data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32') name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d( conv2d = fluid.layers.pool2d(
input=data, input=data,
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=1, pool_stride=1,
global_pooling=False) global_pooling=False)
""" """
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
...@@ -2227,14 +2230,14 @@ def beam_search_decode(ids, scores, name=None): ...@@ -2227,14 +2230,14 @@ def beam_search_decode(ids, scores, name=None):
This layers is to pack the output of beam search layer into sentences and This layers is to pack the output of beam search layer into sentences and
associated scores. It is usually called after the beam search layer. associated scores. It is usually called after the beam search layer.
Typically, the output of beam search layer is a tensor of selected ids, with Typically, the output of beam search layer is a tensor of selected ids, with
a tensor of the score of each id. Beam search layer's output ids, however, a tensor of the score of each id. Beam search layer's output ids, however,
are generated directly during the tree search, and they are stacked by each are generated directly during the tree search, and they are stacked by each
level of the search tree. Thus we need to reorganize them into sentences, level of the search tree. Thus we need to reorganize them into sentences,
based on the score of each id. This layer takes the output of beam search based on the score of each id. This layer takes the output of beam search
layer as input and repack them into sentences. layer as input and repack them into sentences.
Args: Args:
ids (Variable): The selected ids, output of beam search layer. ids (Variable): The selected ids, output of beam search layer.
scores (Variable): The associated scores of the ids, out put of beam scores (Variable): The associated scores of the ids, out put of beam
search layer. search layer.
name (str): The name of this layer. It is optional. name (str): The name of this layer. It is optional.
...@@ -2242,7 +2245,7 @@ def beam_search_decode(ids, scores, name=None): ...@@ -2242,7 +2245,7 @@ def beam_search_decode(ids, scores, name=None):
Returns: Returns:
tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores. tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
sentence_ids is a tensor with shape [size, length], where size is the sentence_ids is a tensor with shape [size, length], where size is the
beam size of beam search, and length is the length of each sentence. beam size of beam search, and length is the length of each sentence.
Note that the length of sentences may vary. Note that the length of sentences may vary.
sentence_scores is a tensor with the same shape as sentence_ids. sentence_scores is a tensor with the same shape as sentence_ids.
...@@ -2902,7 +2905,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None): ...@@ -2902,7 +2905,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
`None`, compute the mean over all elements of :attr:`input` `None`, compute the mean over all elements of :attr:`input`
and return a variable with a single element, otherwise it and return a variable with a single element, otherwise it
must be in the range :math:`[-rank(input), rank(input))`. If must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim[i] < 0`, the dimension to reduce is :math:`dim[i] < 0`, the dimension to reduce is
:math:`rank(input) + dim[i]`. :math:`rank(input) + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension output Tensor. The result tensor will have one fewer dimension
...@@ -3373,16 +3376,16 @@ def topk(input, k, name=None): ...@@ -3373,16 +3376,16 @@ def topk(input, k, name=None):
Args: Args:
input(Variable): The input variable which can be a vector or Tensor with input(Variable): The input variable which can be a vector or Tensor with
higher rank. higher rank.
k(int): The number of top elements to look for along the last dimension k(int): The number of top elements to look for along the last dimension
of input. of input.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
Default: None Default: None
Returns: Returns:
Tuple[Variable]: A tuple with two elements. Each element is a Variable. Tuple[Variable]: A tuple with two elements. Each element is a Variable.
The first one is k largest elements along each last The first one is k largest elements along each last
dimensional slice. The second one is indices of values dimensional slice. The second one is indices of values
within the last dimension of input. within the last dimension of input.
Raises: Raises:
...@@ -3577,15 +3580,15 @@ def warpctc(input, label, blank=0, norm_by_times=False): ...@@ -3577,15 +3580,15 @@ def warpctc(input, label, blank=0, norm_by_times=False):
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes. sequences' length and num_classes is the true number of classes.
(not including the blank label). (not including the blank label).
label (Variable): The ground truth of variable-length sequence, label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
where Lg is th sum of all labels' length. where Lg is th sum of all labels' length.
blank (int, default 0): The blank label index of Connectionist blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1). half-opened interval [0, num_classes + 1).
norm_by_times(bool, default false): Whether to normalize the gradients norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length. by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op. follewed by a mean_op.
Returns: Returns:
...@@ -3691,8 +3694,8 @@ def nce(input, ...@@ -3691,8 +3694,8 @@ def nce(input,
input (Variable): input variable. input (Variable): input variable.
label (Variable): label. label (Variable): label.
num_total_classes (int):${num_total_classes_comment} num_total_classes (int):${num_total_classes_comment}
sample_weight (Variable|None): A Variable of shape [batch_size, 1] sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each storing a weight for each sample. The default weight for each
sample is 1.0. sample is 1.0.
param_attr (ParamAttr|None): attributes for parameter param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias bias_attr (ParamAttr|None): attributes for bias
...@@ -4082,7 +4085,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -4082,7 +4085,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`. This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
It takes the first dimension of :attr:`x` and :attr:`y` as batch size. It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
For each instance, it computes the smooth L1 loss element by element first For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of ouput Variable is and then sums all the losses. So the shape of ouput Variable is
[batch_size, 1]. [batch_size, 1].
Args: Args:
...@@ -4091,14 +4094,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): ...@@ -4091,14 +4094,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
y (Variable): A tensor with rank at least 2. The target value of smooth y (Variable): A tensor with rank at least 2. The target value of smooth
L1 loss op with same shape as :attr:`x`. L1 loss op with same shape as :attr:`x`.
inside_weight (Variable|None): A tensor with rank at least 2. This inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If input is optional and should have same shape with :attr:`x`. If
provided, the result of (:attr:`x` - :attr:`y`) will be multiplied provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
by this tensor element by element. by this tensor element by element.
outside_weight (Variable|None): A tensor with rank at least 2. This outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If input is optional and should have same shape with :attr:`x`. If
provided, the out smooth L1 loss will be multiplied by this tensor provided, the out smooth L1 loss will be multiplied by this tensor
element by element. element by element.
sigma (float|None): Hyper parameter of smooth L1 loss layer. A float sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
scalar with default value 1.0. scalar with default value 1.0.
Returns: Returns:
...@@ -4144,7 +4147,7 @@ def one_hot(input, depth): ...@@ -4144,7 +4147,7 @@ def one_hot(input, depth):
Examples: Examples:
.. code-block:: python .. code-block:: python
label = layers.data(name="label", shape=[1], dtype="float32") label = layers.data(name="label", shape=[1], dtype="float32")
one_hot_label = layers.one_hot(input=label, depth=10) one_hot_label = layers.one_hot(input=label, depth=10)
""" """
...@@ -4298,10 +4301,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -4298,10 +4301,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
def lod_reset(x, y=None, target_lod=None): def lod_reset(x, y=None, target_lod=None):
""" """
Set LoD of :attr:`x` to a new one specified by :attr:`y` or Set LoD of :attr:`x` to a new one specified by :attr:`y` or
:attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be
considered as target LoD first, otherwise :attr:`y.data` would be considered as target LoD first, otherwise :attr:`y.data` would be
considered as target LoD. If :attr:`y` is not provided, target LoD should considered as target LoD. If :attr:`y` is not provided, target LoD should
be specified by :attr:`target_lod`. If target LoD is specified by be specified by :attr:`target_lod`. If target LoD is specified by
:attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported. :attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.
.. code-block:: text .. code-block:: text
...@@ -4355,7 +4358,7 @@ def lod_reset(x, y=None, target_lod=None): ...@@ -4355,7 +4358,7 @@ def lod_reset(x, y=None, target_lod=None):
Args: Args:
x (Variable): Input variable which could be a Tensor or LodTensor. x (Variable): Input variable which could be a Tensor or LodTensor.
y (Variable|None): If provided, output's LoD would be derived y (Variable|None): If provided, output's LoD would be derived
from :attr:`y`. from :attr:`y`.
target_lod (list|tuple|None): One level LoD which should be considered target_lod (list|tuple|None): One level LoD which should be considered
as target LoD when :attr:`y` not provided. as target LoD when :attr:`y` not provided.
...@@ -4671,7 +4674,7 @@ def image_resize(input, ...@@ -4671,7 +4674,7 @@ def image_resize(input,
""" """
**Resize a Batch of Images** **Resize a Batch of Images**
The input must be a tensor of the shape (num_batches, channels, in_h, in_w), The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
and the resizing only applies on the last two dimensions(hight and width). and the resizing only applies on the last two dimensions(hight and width).
Supporting resample methods: Supporting resample methods:
...@@ -4767,9 +4770,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): ...@@ -4767,9 +4770,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
def image_resize_short(input, out_short_len, resample='BILINEAR'): def image_resize_short(input, out_short_len, resample='BILINEAR'):
""" """
Resize a batch of images. The short edge of input images will be Resize a batch of images. The short edge of input images will be
resized to the given 'out_short_len'. The long edge of input images resized to the given 'out_short_len'. The long edge of input images
will be resized proportionately to make images' length-width ratio will be resized proportionately to make images' length-width ratio
constant. constant.
Args: Args:
...@@ -4802,7 +4805,7 @@ def gather(input, index): ...@@ -4802,7 +4805,7 @@ def gather(input, index):
""" """
**Gather Layer** **Gather Layer**
Output is obtained by gathering entries of the outer-most dimension Output is obtained by gathering entries of the outer-most dimension
of X indexed by `index` and concatenate them together. of X indexed by `index` and concatenate them together.
.. math:: .. math::
...@@ -4827,7 +4830,7 @@ def gather(input, index): ...@@ -4827,7 +4830,7 @@ def gather(input, index):
[5, 6]] [5, 6]]
Args: Args:
input (Variable): The source input with rank>=1. input (Variable): The source input with rank>=1.
index (Variable): The index input with rank=1. index (Variable): The index input with rank=1.
Returns: Returns:
...@@ -4863,7 +4866,7 @@ def random_crop(x, shape, seed=None): ...@@ -4863,7 +4866,7 @@ def random_crop(x, shape, seed=None):
Returns: Returns:
${out_comment} ${out_comment}
Examples: Examples:
>>> img = fluid.layers.data("img", [3, 256, 256]) >>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
...@@ -4909,7 +4912,7 @@ def log(x): ...@@ -4909,7 +4912,7 @@ def log(x):
Out = \\ln(x) Out = \\ln(x)
Args: Args:
x (Variable): Input tensor. x (Variable): Input tensor.
Returns: Returns:
Variable: The natural log of the input tensor computed element-wise. Variable: The natural log of the input tensor computed element-wise.
...@@ -4938,7 +4941,7 @@ def relu(x): ...@@ -4938,7 +4941,7 @@ def relu(x):
Out = \\max(0, x) Out = \\max(0, x)
Args: Args:
x (Variable): The input tensor. x (Variable): The input tensor.
Returns: Returns:
Variable: The output tensor with the same shape as input. Variable: The output tensor with the same shape as input.
...@@ -4959,15 +4962,15 @@ def relu(x): ...@@ -4959,15 +4962,15 @@ def relu(x):
def mean_iou(input, label, num_classes): def mean_iou(input, label, num_classes):
""" """
Mean Intersection-Over-Union is a common evaluation metric for Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes. semantic class and then computes the average over classes.
IOU is defined as follows: IOU is defined as follows:
.. math:: .. math::
IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}. IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}.
The predictions are accumulated in a confusion matrix and mean-IOU The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it. is then calculated from it.
...@@ -4980,12 +4983,12 @@ def mean_iou(input, label, num_classes): ...@@ -4980,12 +4983,12 @@ def mean_iou(input, label, num_classes):
Returns: Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1]. mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples: Examples:
.. code-block:: python .. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes) iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
""" """
helper = LayerHelper('mean_iou', **locals()) helper = LayerHelper('mean_iou', **locals())
......
...@@ -230,7 +230,11 @@ def sums(input, out=None): ...@@ -230,7 +230,11 @@ def sums(input, out=None):
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
if out is None: if out is None:
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) helper.append_op(
type='sum',
inputs={'X': input},
outputs={'Out': out},
attrs={'use_mkldnn': False})
return out return out
...@@ -380,7 +384,7 @@ def argmin(x, axis=0): ...@@ -380,7 +384,7 @@ def argmin(x, axis=0):
""" """
**argmin** **argmin**
This function computes the indices of the min elements This function computes the indices of the min elements
of the input tensor's element along the provided axis. of the input tensor's element along the provided axis.
Args: Args:
...@@ -395,7 +399,7 @@ def argmin(x, axis=0): ...@@ -395,7 +399,7 @@ def argmin(x, axis=0):
.. code-block:: python .. code-block:: python
out = fluid.layers.argmin(x=in, axis=0) out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1) out = fluid.layers.argmin(x=in, axis=-1)
""" """
helper = LayerHelper("arg_min", **locals()) helper = LayerHelper("arg_min", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64) out = helper.create_tmp_variable(VarDesc.VarType.INT64)
...@@ -411,7 +415,7 @@ def argmax(x, axis=0): ...@@ -411,7 +415,7 @@ def argmax(x, axis=0):
""" """
**argmax** **argmax**
This function computes the indices of the max elements This function computes the indices of the max elements
of the input tensor's element along the provided axis. of the input tensor's element along the provided axis.
Args: Args:
...@@ -426,7 +430,7 @@ def argmax(x, axis=0): ...@@ -426,7 +430,7 @@ def argmax(x, axis=0):
.. code-block:: python .. code-block:: python
out = fluid.layers.argmax(x=in, axis=0) out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1) out = fluid.layers.argmax(x=in, axis=-1)
""" """
helper = LayerHelper("arg_max", **locals()) helper = LayerHelper("arg_max", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64) out = helper.create_tmp_variable(VarDesc.VarType.INT64)
...@@ -495,9 +499,9 @@ def reverse(x, axis): ...@@ -495,9 +499,9 @@ def reverse(x, axis):
Args: Args:
x(Vairbale): the input to be reversed. x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list. will be apply on each axis in the tuple or list.
Returns: Returns:
Variable: The reversed tensor. Variable: The reversed tensor.
...@@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True): ...@@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True):
Args: Args:
x(variable): The Tensor/LoDTensor to be saved. x(variable): The Tensor/LoDTensor to be saved.
file_path(str): The file path where the variable will be saved. file_path(str): The file path where the variable will be saved.
overwrite(bool): Whether or not cover the given file when it has already overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime existed. If it's set 'False' and the file is existed, a runtime
error will be thrown. error will be thrown.
""" """
helper = LayerHelper("save", **locals()) helper = LayerHelper("save", **locals())
helper.append_op( helper.append_op(
...@@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True): ...@@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True):
a single file. a single file.
file_path(str): The file path where variables will be saved. file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime existed. If it's set 'False' and the file is existed, a runtime
error will be thrown. error will be thrown.
Returns: Returns:
There is no return value. There is no return value.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from test_sum_op import TestSumOp
class TestMKLDNN(TestSumOp):
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
...@@ -20,12 +20,15 @@ from op_test import OpTest ...@@ -20,12 +20,15 @@ from op_test import OpTest
class TestSumOp(OpTest): class TestSumOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sum" self.op_type = "sum"
self.use_mkldnn = False
self.init_kernel_type()
x0 = np.random.random((3, 4)).astype('float32') x0 = np.random.random((3, 4)).astype('float32')
x1 = np.random.random((3, 4)).astype('float32') x1 = np.random.random((3, 4)).astype('float32')
x2 = np.random.random((3, 4)).astype('float32') x2 = np.random.random((3, 4)).astype('float32')
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
y = x0 + x1 + x2 y = x0 + x1 + x2
self.outputs = {'Out': y} self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': self.use_mkldnn}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -33,6 +36,9 @@ class TestSumOp(OpTest): ...@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
def test_check_grad(self): def test_check_grad(self):
self.check_grad(['x0'], 'Out') self.check_grad(['x0'], 'Out')
def init_kernel_type(self):
pass
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -824,7 +824,8 @@ class DistributeTranspiler: ...@@ -824,7 +824,8 @@ class DistributeTranspiler:
table_opt_block.append_op( table_opt_block.append_op(
type="sum", type="sum",
inputs={"X": pserver_side_table_grad_list}, inputs={"X": pserver_side_table_grad_list},
outputs={"Out": [grad_var]}) outputs={"Out": [grad_var]},
attrs={"use_mkldnn": False})
else: else:
# in async_mode, for table gradient, it also need to be splited to each parameter server # in async_mode, for table gradient, it also need to be splited to each parameter server
origin_grad_name = grad_var.name origin_grad_name = grad_var.name
...@@ -1056,7 +1057,8 @@ class DistributeTranspiler: ...@@ -1056,7 +1057,8 @@ class DistributeTranspiler:
optimize_block.append_op( optimize_block.append_op(
type="sum", type="sum",
inputs={"X": vars2merge}, inputs={"X": vars2merge},
outputs={"Out": merged_var}) outputs={"Out": merged_var},
attrs={"use_mkldnn": False})
# TODO(panyx0718): What if it's SELECTED_ROWS. # TODO(panyx0718): What if it's SELECTED_ROWS.
if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS: if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS:
optimize_block.append_op( optimize_block.append_op(
......
...@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"): ...@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"):
class PipeReader: class PipeReader:
""" """
PipeReader read data by stream from a command, take it's PipeReader read data by stream from a command, take it's
stdout into a pipe buffer and redirect it to the parser to stdout into a pipe buffer and redirect it to the parser to
parse, then yield data as your desired format. parse, then yield data as your desired format.
...@@ -352,7 +352,7 @@ class PipeReader: ...@@ -352,7 +352,7 @@ class PipeReader:
An example: An example:
.. code-block:: python .. code-block:: python
def example_reader(): def example_reader():
for f in myfiles: for f in myfiles:
pr = PipeReader("cat %s"%f) pr = PipeReader("cat %s"%f)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册