提交 8567d042 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_refine_bilinear_interp

......@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
return train_program, fluid.default_startup_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
......
......@@ -108,10 +108,10 @@ def gen_job():
tn_container["ports"][0]["containerPort"] = spreadport
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
......@@ -167,16 +167,22 @@ def gen_job():
tn_container["volumeMounts"] = volumeMounts
ps_container["env"] = envs
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
ps_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "PSERVER"
})
tn_container["env"] = envs
if args.disttype == "pserver":
tn_container["env"].append({
"name": "TRAINING_ROLE",
"name": "PADDLE_TRAINING_ROLE",
"value": "TRAINER"
})
elif args.disttype == "nccl2" or args.disttype == "local":
# NCCL2 have no training role, set to plain WORKER
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
tn_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "WORKER"
})
os.mkdir(args.jobname)
if args.disttype == "pserver":
......
......@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result")
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
ExternalProject_Add(
......
......@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
第二步,启动Parameter Server:
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py
```
执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。
第三步,启动Trainer:
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py
```
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
......
......@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
ret_list.append(f)
return ret_list
trainers = int(os.getenv("TRAINERS"))
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
data_file = fluid.layers.io.open_files(
filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0),
thread_num=1,
......
......@@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
......@@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
STanh Activation Operator.
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC");
}
......
......@@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Detection mAP evaluate operator.
The general steps are as follows. First, calculate the true positive and
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
)DOC");
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/operators/mean_op.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
template <typename T>
class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::normal_distribution<T> dist(mean, std);
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
// The format of output is set as the mkldnn's format
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
tensor->set_layout(DataLayout::kMKLDNN);
tensor->set_format(mkldnn::memory::format::oihw);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace,
ops::GaussianMKLDNNKernel<float>);
......@@ -15,6 +15,10 @@ limitations under the License. */
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
......@@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
ctx.device_context());
ctx.device_context(), layout, library);
}
};
......@@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::proto::VarType::FP32);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
GaussianRandom Operator.
......
......@@ -22,43 +22,24 @@ namespace paddle {
namespace operators {
namespace math {
template <typename T>
__device__ T upper_bound(const T* first, T count, T val) {
const T* orig = first;
const T* it = nullptr;
T step = 0;
while (count > 0) {
it = first;
step = count / 2;
it += step;
if (!(val < *it)) {
first = ++it;
count -= step + 1;
} else {
count = step;
}
}
return first - orig;
}
template <typename T>
__global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
const int output_rows, const int output_cols,
T* output) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(input_cols, col_size, tid_x) - 1;
int curr_offset = input_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = input_cols[0];
for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = input_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = input_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
int segment_width = curr_col_offset - curr_offset;
T* input_ptr = inputs[curr_segment];
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y)
......@@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
const int in_col, const int* out_cols,
int out_cols_size, T** outputs_data) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(out_cols, out_cols_size, tid_x) - 1;
int curr_offset = out_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = out_cols[0];
for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = out_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = out_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
......@@ -228,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
outputs_cols[0] = 0;
for (int i = 0; i < o_num; ++i) {
int t_col = outputs->at(i)->numel() / out_row;
int t_col = ref_inputs.at(i)->numel() / out_row;
if (sameShape) {
if (t_col != out0_col) sameShape = false;
}
......
......@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{});
framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]);
......
......@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);
......@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
using framework::Tensor;
......@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto x_vars = ctx.MultiInputVar("X");
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1;
for (auto& x_var : x_vars) {
......@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor");
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype),
ctx.device_context());
static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
layout, library);
} else if (x_vars[0]->IsType<framework::SelectedRows>()) {
for (auto& var : x_vars) {
auto& value = var->Get<framework::SelectedRows>().value();
if (value.IsInitialized()) {
return framework::OpKernelType(framework::ToDataType(value.type()),
ctx.device_context());
ctx.device_context(), layout, library);
}
}
// if input sparse vars are not initialized, use an default kernel type.
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.device_context());
ctx.device_context(), layout, library);
} else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
for (auto& x_var : x_vars) {
auto& array = x_var->Get<framework::LoDTensorArray>();
for (auto& each : array) {
if (each.numel() != 0) {
return framework::OpKernelType(framework::ToDataType(each.type()),
ctx.device_context());
ctx.device_context(), layout,
library);
}
}
}
......@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Sum operator.
......@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType();
......@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker,
ops::SumOpVarTypeInference);
REGISTER_OP_CPU_KERNEL(
sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>,
ops::SumKernel<paddle::platform::CPUDeviceContext, double>,
......
......@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod());
}
}
auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name);
}
......
......@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory.get_primitive_desc().desc().data.format);
}
inline mkldnn::memory::format GetMKLDNNFormat(
const mkldnn::sum::primitive_desc& memory) {
return static_cast<mkldnn::memory::format>(
memory.dst_primitive_desc().desc().data.format);
}
} // namespace platform
} // namespace paddle
......@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]},
{"Out": [var_name]}, {}), idx))
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name(
......@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs)))
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
{"use_mkldnn": False}), len(op_descs)))
# sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0])
......
......@@ -24,8 +24,6 @@ __all__ = [
'GradientClipByValue',
'GradientClipByNorm',
'GradientClipByGlobalNorm',
'append_gradient_clip_ops',
'error_clip_callback',
]
......@@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
class ErrorClipByValue(BaseErrorClipAttr):
"""
Clips tensor values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
......@@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
class GradientClipByValue(BaseGradientClipAttr):
"""
Clips gradient values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
......@@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
class GradientClipByNorm(BaseGradientClipAttr):
"""
Clips tensor values to a maximum L2-norm.
This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
:math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
:math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
.. math::
Out = \\frac{max\_norm * X}{norm(X)},
where :math:`norm(X)` represents the L2 norm of :math:`X`.
Args:
clip_norm (float): The maximum norm value
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, clip_norm):
self.clip_norm = clip_norm
......@@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr):
class GradientClipByGlobalNorm(BaseGradientClipAttr):
"""
Clips values of multiple tensors by the ratio of the sum of their norms.
Given a list of tensors t_list, and a clipping ratio clip_norm, this
operation returns a list of clipped tensors list_clipped and the global
norm (global_norm) of all tensors in t_list.
To perform the clipping, the values :math:`t\_list[i]` are set to:
.. math::
t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)}
where:
.. math::
global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
otherwise they're all shrunk by the global ratio.
Args:
clip_norm (float): The maximum norm value
group_name (str, optional): The group name for this clip.
Examples:
.. code-block:: python
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
with fluid.program_guard(main_program=prog_clip):
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
"""
def __init__(self, clip_norm, group_name="default_group"):
if not isinstance(group_name, basestring):
raise TypeError("'group_name' must be a basestring.")
......@@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
def set_gradient_clip(clip, param_list=None, program=None):
"""
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list, None by default): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program, None by default): The program where parameters are.
Will be the default main program when assigned with None.
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list(Variable)): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program): The program where parameters are.
Will be the default main program when assigned with None.
"""
if not isinstance(clip, BaseGradientClipAttr):
raise TypeError(
......
......@@ -27,13 +27,30 @@ __all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
"""
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
:param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
"""
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
......@@ -60,9 +77,20 @@ class Inferencer(object):
def infer(self, inputs, return_numpy=True):
"""
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program
to get the predict value
:return: the predict value of the inference model
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
......
......@@ -19,26 +19,39 @@ from framework import convert_np_dtype_to_dtype_
from core import VarDesc
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer'
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'BilinearInitializer', 'MSRAInitializer'
]
_force_init_on_cpu_ = False
def force_init_on_cpu():
"""
The flag of whether force to init variables on CPU.
Examples:
.. code-block:: python
if force_init_on_cpu():
pass
"""
return _force_init_on_cpu_
@contextlib.contextmanager
def init_on_cpu():
"""
Switch program with `with` statement
Force the variable to be inited on CPU.
Examples:
>>> with init_on_cpu():
>>> step = layers.create_global_var()
.. code-block:: python
with init_on_cpu():
step = layers.create_global_var()
"""
global _force_init_on_cpu_
......@@ -104,14 +117,18 @@ class Initializer(object):
class ConstantInitializer(Initializer):
"""Implements the constant initializer
Args:
value (float): constant value to initialize the variable
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Constant(value=2.0))
"""
def __init__(self, value=0.0, force_cpu=False):
"""Constructor for ConstantInitializer
Args:
value: constant value to initialize the variable
"""
assert value is not None
super(ConstantInitializer, self).__init__()
self._value = value
......@@ -146,16 +163,20 @@ class ConstantInitializer(Initializer):
class UniformInitializer(Initializer):
"""Implements the random uniform distribution initializer
Args:
low (float): lower boundary of the uniform distribution
high (float): upper boundary of the uniform distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5))
"""
def __init__(self, low=-1.0, high=1.0, seed=0):
"""Constructor for UniformInitializer
Args:
low: lower boundary of the uniform distribution
high: upper boundary of the uniform distribution
seed: random seed
"""
assert low is not None
assert high is not None
assert high >= low
......@@ -196,17 +217,21 @@ class UniformInitializer(Initializer):
class NormalInitializer(Initializer):
"""Implements the random Normal(Gaussian) distribution initializer
"""Implements the Random Normal(Gaussian) distribution initializer
Args:
loc (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
"""
def __init__(self, loc=0.0, scale=1.0, seed=0):
"""Constructor for NormalInitializer
Args:
loc: mean of the normal distribution
scale: standard deviation of the normal distribution
seed: random seed
"""
assert loc is not None
assert scale is not None
assert seed is not None
......@@ -246,39 +271,49 @@ class NormalInitializer(Initializer):
class XavierInitializer(Initializer):
"""Implements the Xavier initializer
"""
This class implements the Xavier weight initializer from the paper
Understanding the difficulty of training deep feedforward neural
networks[1] by Xavier Glorot and Yoshua Bengio.
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
is sqrt(2/ (fan_in + fan_out)).
is
.. math::
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.Xavier(uniform=False))
References:
[1] Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
(http://proceedings.mlr.press/v9/glorot10a.html)
"""
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0):
"""Constructor for XavierInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out: fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in and fan_out to None for
most cases.
"""
assert uniform is not None
assert seed is not None
super(XavierInitializer, self).__init__()
......@@ -342,30 +377,42 @@ class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
and Jian Sun. This is a robust initialization method that particularly
considers the rectifier nonlinearities. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
distribution, the mean is 0 and the standard deviation
is sqrt(2/ fan_in).
References:
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
on ImageNet Classification
(https://arxiv.org/abs/1502.01852)
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
robust initialization method that particularly considers the rectifier
nonlinearities. In case of Uniform distribution, the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for MSRAInitializer. If None, it is\
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
"""
def __init__(self, uniform=True, fan_in=None, seed=0):
"""Constructor for MSRAInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for MSRAInitializer. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in to None for most cases.
"""
assert uniform is not None
assert seed is not None
......@@ -425,34 +472,37 @@ class MSRAInitializer(Initializer):
class BilinearInitializer(Initializer):
"""Implements the bilinear initializer.
"""
This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is:
>>> factor = 2
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear())
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input,
>>> num_filters=C,
>>> output_size=None,
>>> filter_size=2 * factor - factor % 2,
>>> padding=ceil((factor - 1) / 2.),
>>> stride=factor,
>>> groups=C,
>>> param_attr=w_attr,
>>> bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
Examples:
.. code-block:: python
factor = 2
w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
initializer=Bilinear())
conv_up = fluid.layers.conv2d_transpose(
input,
num_filters=C,
output_size=None,
filter_size=2 * factor - factor % 2,
padding=ceil((factor - 1) / 2.),
stride=factor,
groups=C,
param_attr=w_attr,
bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise transposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training.
interpolation unchanged during training.
"""
def __init__(self):
......@@ -469,7 +519,7 @@ class BilinearInitializer(Initializer):
be added.
Returns:
the initialization op
Operator: the initialization op
Raises:
ValueError: If type of `var` and `block` is not right.
......
......@@ -185,12 +185,14 @@ def Print(input,
Returns:
Variable: Output tensor, same data with input tensor.
Examples:
.. code-block:: python
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
'''
helper = LayerHelper('print', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
......@@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
class ConditionalBlock(object):
'''
**ConditionalBlock**
ConditionalBlock is an operator that bind a block to a specific condition,
if the condition matches, the corresponding block will be executed.
Args:
inputs (Variable): bool conditions.
is_scalar_condition (bool): whether the branch is controled by a scalar.
name(str): name of this ConditionalBlock.
Examples:
.. code-block:: python
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_cond = layers.ConditionalBlock([true_image])
with true_cond.block():
...
with false_cond.block():
...
'''
def __init__(self, inputs, is_scalar_condition=False, name=None):
for each_input in inputs:
if not isinstance(each_input, Variable):
......
......@@ -16,7 +16,7 @@ All layers just related to the detection neural network.
"""
from layer_function_generator import generate_layer_fn
from layer_function_generator import autodoc
from layer_function_generator import autodoc, templatedoc
from ..layer_helper import LayerHelper
import tensor
import nn
......@@ -155,7 +155,7 @@ def detection_output(loc,
return nmsed_outs
@autodoc()
@templatedoc()
def detection_map(detect_res,
label,
class_num,
......@@ -166,6 +166,47 @@ def detection_map(detect_res,
input_states=None,
out_states=None,
ap_version='integral'):
"""
${comment}
Args:
detect_res: ${detect_res_comment}
label: ${label_comment}
class_num: ${class_num_comment}
background_label: ${background_label_comment}
overlap_threshold: ${overlap_threshold_comment}
evaluate_difficult: ${evaluate_difficult_comment}
has_state: ${has_state_comment}
input_states: If not None, It contains 3 elements:
1. pos_count ${pos_count_comment}.
2. true_pos ${true_pos_comment}.
3. false_pos ${false_pos_comment}.
out_states: If not None, it contains 3 elements.
1. accum_pos_count ${accum_pos_count_comment}.
2. accum_true_pos ${accum_true_pos_comment}.
3. accum_false_pos ${accum_false_pos_comment}.
ap_version: ${ap_type_comment}
Returns:
${map_comment}
Examples:
.. code-block:: python
detect_res = fluid.layers.data(
name='detect_res',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
label = fluid.layers.data(
name='label',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
map_out = fluid.layers.detection_map(detect_res, label, 21)
"""
helper = LayerHelper("detection_map", **locals())
def __create_var(type):
......
此差异已折叠。
......@@ -230,7 +230,11 @@ def sums(input, out=None):
helper = LayerHelper('sum', **locals())
if out is None:
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out})
helper.append_op(
type='sum',
inputs={'X': input},
outputs={'Out': out},
attrs={'use_mkldnn': False})
return out
......@@ -380,7 +384,7 @@ def argmin(x, axis=0):
"""
**argmin**
This function computes the indices of the min elements
This function computes the indices of the min elements
of the input tensor's element along the provided axis.
Args:
......@@ -395,7 +399,7 @@ def argmin(x, axis=0):
.. code-block:: python
out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1)
out = fluid.layers.argmin(x=in, axis=-1)
"""
helper = LayerHelper("arg_min", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64)
......@@ -411,7 +415,7 @@ def argmax(x, axis=0):
"""
**argmax**
This function computes the indices of the max elements
This function computes the indices of the max elements
of the input tensor's element along the provided axis.
Args:
......@@ -426,7 +430,7 @@ def argmax(x, axis=0):
.. code-block:: python
out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1)
out = fluid.layers.argmax(x=in, axis=-1)
"""
helper = LayerHelper("arg_max", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64)
......@@ -495,9 +499,9 @@ def reverse(x, axis):
Args:
x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
Returns:
Variable: The reversed tensor.
......@@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True):
Args:
x(variable): The Tensor/LoDTensor to be saved.
file_path(str): The file path where the variable will be saved.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
"""
helper = LayerHelper("save", **locals())
helper.append_op(
......@@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True):
a single file.
file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
Returns:
There is no return value.
......
......@@ -26,10 +26,10 @@ from clip import append_gradient_clip_ops, error_clip_callback
from contextlib import contextmanager
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad',
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer',
'Adadelta', 'ModelAverage', 'Optimizer'
'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer'
]
......@@ -192,15 +192,15 @@ class Optimizer(object):
"""Add optimization operators to update gradients to variables.
Args:
loss: the target that this optimization is for.
parameters_and_grads: a list of (variable, gradient) pair to update.
loss(Variable): the target that this optimization is for.
parameters_and_grads(list(tuple(Variable, Variable))):
a list of (variable, gradient) pair to update.
Returns:
return_op_list: a list of operators that will complete one step of
optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage
their internal state.
:param startup_program:
"""
# This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that
......@@ -268,7 +268,22 @@ class Optimizer(object):
class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
Optimizer of the stochastic gradient descent algorithm.
.. math::
param\_out = param - learning\_rate * grad
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
Examples:
.. code-block:: python
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.2)
sgd_optimizer.minimize(cost)
"""
def __init__(self, learning_rate, **kwargs):
......@@ -294,7 +309,37 @@ class SGDOptimizer(Optimizer):
class MomentumOptimizer(Optimizer):
"""Simple Momentum optimizer with velocity state
"""
Simple Momentum optimizer with velocity state
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
.. math::
& velocity = mu * velocity + gradient
& if (use\_nesterov):
&\quad param = param - gradient * learning\_rate + mu * velocity * learning\_rate
& else:
&\quad param = param - learning\_rate * velocity
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
momentum (float): momentum factor
use_nesterov (bool): enables Nesterov momentum
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1)
optimizer.minimize(cost)
"""
_velocity_acc_str = "velocity"
......@@ -338,7 +383,32 @@ class MomentumOptimizer(Optimizer):
class AdagradOptimizer(Optimizer):
"""Simple Adagrad optimizer with moment state
"""
**Adaptive Gradient Algorithm (Adagrad)**
The update is done as follows:
.. math::
moment\_out &= moment + grad * grad
param\_out &= param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon}
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have the epsilon attribute. It is added here in our implementation
as also proposed here: http://cs231n.github.io/neural-networks-3/#ada
for numerical stability to avoid the division by zero error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adagrad(learning_rate=0.2)
optimizer.minimize(cost)
"""
_moment_acc_str = "moment"
......@@ -379,7 +449,40 @@ class AdagradOptimizer(Optimizer):
class AdamOptimizer(Optimizer):
"""Implements the Adam Optimizer
"""
This implements the Adam optimizer from Section 2 of the Adam
paper : https://arxiv.org/abs/1412.6980.
Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates:
.. math::
t & = t + 1
moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad
moment\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad
learning\_rate & = learning\_rate * \\
\\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {\\beta}_1^t}
param\_out & = param - learning\_rate * \\frac{moment\_1}{\sqrt{moment\_2} + \epsilon}
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adam(learning_rate=0.2)
optimizer.minimize(cost)
"""
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
......@@ -484,7 +587,42 @@ class AdamOptimizer(Optimizer):
class AdamaxOptimizer(Optimizer):
"""Implements the Adamax Optimizer
"""
We implement the Adamax optimizer from Section 7 of the Adam
paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the
Adam algorithm based on the infinity norm.
Adamax updates:
.. math::
t & = t + 1
moment\_out & = {\\beta}_1 * moment + (1 - {\\beta}_1) * grad
inf\_norm\_out & = max({\\beta}_2 * inf\_norm + \epsilon, |grad|)
learning\_rate & = \\frac{learning\_rate}{1 - {\\beta}_1^t}
param\_out & = param - learning\_rate * \\frac{moment\_out}{inf\_norm\_out}
The original paper does not have an epsilon attribute.
However, it is added here for numerical stability to prevent the
division by 0 error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adamax(learning_rate=0.2)
optimizer.minimize(cost)
"""
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"
......@@ -568,7 +706,34 @@ class AdamaxOptimizer(Optimizer):
class DecayedAdagradOptimizer(Optimizer):
"""Simple Decayed Adagrad optimizer with moment state
"""
**Decayed Adagrad Optimizer**
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
The update is done as follows:
.. math::
moment\_out & = decay * moment + (1 - decay) * grad * grad
param\_out & = param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon}
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
decay (float): decay rate.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.DecayedAdagrad(learning_rate=0.2)
optimizer.minimize(cost)
"""
_moment_acc_str = "moment"
......@@ -614,6 +779,7 @@ class DecayedAdagradOptimizer(Optimizer):
class AdadeltaOptimizer(Optimizer):
"""
**Adadelta Optimizer**
Simple Adadelta optimizer with average squared grad state and
average squared update state.
The details of adadelta please refer to this
......@@ -628,7 +794,7 @@ class AdadeltaOptimizer(Optimizer):
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
Args:
learning_rate(float): global leraning rate
learning_rate(float): global learning rate
rho(float): rho in equation
epsilon(float): epsilon in equation
......@@ -703,37 +869,37 @@ class RMSPropOptimizer(Optimizer):
.. math::
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2
w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w)
The first equation calculates moving average of the squared gradient for
each weight. Then dividing the gradient by :math: `sqrt{v(w,t)}`.
each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`.
In some cases, adding a momentum term :math: `\\beta` is beneficial.
In our implementation, Nesterov momentum is used:
.. math::
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\
r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2
v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{v(w,t) +
\\epsilon}} \\nabla Q_{i}(w)
w & = w - v(w, t)
where, :math: `\\rho` is a hyperparameter and typical values are 0.9, 0.95
where, :math:`\\rho` is a hyperparameter and typical values are 0.9, 0.95
and so on. :math: `beta` is the momentum term. :math: `\\epsilon` is a
smoothing term to avoid division by zero, usually set somewhere in range
from 1e-4 to 1e-8.
Args:
learning_rate(float): global leraning rate.
learning_rate(float): global learning rate.
rho(float): rho is :math: `\\rho` in equation, set 0.95 by default.
epsilon(float): :math: `\\epsilon` in equation is smoothing term to
avoid division by zero, set 1e-6 by default.
momentum(float): :math: `\\beta` in equation is the momentum term,
momentum(float): :math:`\\beta` in equation is the momentum term,
set 0.0 by default.
Raises:
......@@ -810,6 +976,113 @@ class RMSPropOptimizer(Optimizer):
return rmsprop_op
class FtrlOptimizer(Optimizer):
"""
FTRL (Follow The Regularized Leader) Optimizer.
The paper that proposed Follow The Regularized Leader (FTRL):
(https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf)
.. math::
&new\_accum = squared\_accum + grad^2
&if (lr\_power == -0.5):
&\quad linear\_accum += grad - \\frac{\\sqrt{new\_accum} - \\sqrt{squared\_accum}}{learning\_rate * param}
&else:
&\quad linear\_accum += grad - \\frac{new\_accum^{-lr\_power} - accum^{-lr\_power}}{learning\_rate * param}
&x = l1 * sign(linear\_accum) - linear\_accum
&if (lr\_power == -0.5):
&\quad y = \\frac{\\sqrt{new\_accum}}{learning\_rate} + (2 * l2)
&\quad pre\_shrink = \\frac{x}{y}
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
&else:
&\quad y = \\frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2)
&\quad pre\_shrink = \\frac{x}{y}
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
&squared\_accum += grad^2
Args:
learning_rate (float|Variable): global learning rate.
l1 (float):
l2 (float):
lr_power (float):
Raises:
ValueError: If learning_rate, rho, epsilon, momentum are None.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Ftrl(0.0001)
_, params_grads = optimizer.minimize(cost)
"""
_squared_acc_str = "squared"
_linear_acc_str = "linear"
def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs):
super(FtrlOptimizer, self).__init__(
learning_rate=learning_rate, **kwargs)
if learning_rate is None:
raise ValueError("learning_rate is not set.")
self.type = "ftrl"
self._l1 = l1
self._l2 = l2
self._lr_power = lr_power
def _create_accumulators(self, block, parameters):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
for p in parameters:
self._add_accumulator(self._squared_acc_str, p)
self._add_accumulator(self._linear_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
squared_acc = self._get_accumulator(self._squared_acc_str,
param_and_grad[0])
linear_acc = self._get_accumulator(self._linear_acc_str,
param_and_grad[0])
ftrl_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"SquaredAccumulator": squared_acc,
"LinearAccumulator": linear_acc,
"LearningRate": self._create_param_lr(param_and_grad),
},
outputs={
"ParamOut": param_and_grad[0],
"SquaredAccumOut": squared_acc,
"LinearAccumOut": linear_acc
},
attrs={"l1": self._l1,
"l2": self._l1,
"lr_power": self._lr_power})
return ftrl_op
# We short the class name, since users will use the optimizer with the package
# name. The sample code:
#
......@@ -826,6 +1099,7 @@ Adamax = AdamaxOptimizer
DecayedAdagrad = DecayedAdagradOptimizer
Adadelta = AdadeltaOptimizer
RMSProp = RMSPropOptimizer
Ftrl = FtrlOptimizer
class ModelAverage(Optimizer):
......@@ -844,7 +1118,9 @@ class ModelAverage(Optimizer):
max_average_window: The maximum size of average window.
Examples:
...
.. code-block:: python
optimizer = fluid.optimizer.Momentum()
_, params_grads = optimizer.minimize(cost)
model_average = fluid.optimizer.ModelAverage(params_grads, 0.15,
......
......@@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None):
counters/options for profiling by `config` argument. The default config
is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d',
'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace'].
Then users can use NVIDIA Visual Profiler
(https://developer.nvidia.com/nvidia-visual-profiler) tools to load this
this output file to visualize results.
Args:
output_file (string) : The output file name, the result will be
......@@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None):
Comma separated values format. It should be 'kvp' or 'csv'.
config (list of string) : The profiler options and counters can refer
to "Compute Command Line Profiler User Guide".
Raises:
ValueError: If `output_mode` is not in ['kvp', 'csv'].
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
epoc = 8
dshape = [4, 3, 28, 28]
data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'cuda_profiler.txt'
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
# then use NVIDIA Visual Profiler (nvvp) to load this output file
# to visualize results.
"""
if output_mode is None:
output_mode = 'csv'
......@@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None):
def reset_profiler():
"""The profiler clear interface.
reset_profiler will clear the previous time record.
"""
Clear the previous time record. This interface does not work for
`fluid.profiler.cuda_profiler`, it only works for
`fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`,
and `fluid.profiler.profiler`.
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
with profiler.profiler(state, 'total', '/tmp/profile'):
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# ...
"""
core.reset_profiler()
def start_profiler(state):
"""Enable the profiler.
"""
Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to insert the code, except the usage of
`fluid.profiler.profiler` interface.
Args:
state (string) : The profiling state, which should be 'CPU', 'GPU'
or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
GPU as well. 'All' also generates timeline.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
if core.is_profiler_enabled():
return
......@@ -97,7 +160,10 @@ def start_profiler(state):
def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
"""Stop the profiler.
"""
Stop the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to insert the code, except the usage of
`fluid.profiler.profiler` interface.
Args:
sorted_key (string) : If None, the profiling results will be printed
......@@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
proto output file.
Raises:
ValueError: If `sorted_key` is not in
['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
if not core.is_profiler_enabled():
return
......@@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
Different from cuda_profiler, this profiler can be used to profile both CPU
and GPU program. By defalut, it records the CPU and GPU operator kernels,
if you want to profile other program, you can refer the profiling tutorial
to add more records.
to add more records in C++ code.
If the state == 'All', a profile proto file will be written to
`profile_path`. This file records timeline information during the execution.
Then users can visualize this file to see the timeline, please refer
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md
Args:
state (string) : The profiling state, which should be 'CPU' or 'GPU',
......@@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
proto output file.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
not in ['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
with profiler.profiler('All', 'total', '/tmp/profile') as prof:
for pass_id in range(pass_num):
for batch_id, data in enumerate(train_reader()):
exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[],
use_program_cache=True)
# ...
"""
start_profiler(state)
yield
......
......@@ -16,8 +16,8 @@ import framework
from . import core
__all__ = [
'append_regularization_ops', 'WeightDecayRegularizer', 'L1Decay', 'L2Decay',
'L1DecayRegularizer', 'L2DecayRegularizer'
'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer',
'L2DecayRegularizer'
]
......@@ -36,7 +36,8 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
set. It will be applied with regularizer.
Returns:
list of (parameters, gradients) pair with the regularized gradient
list[(Variable, Variable)]: list of (parameters, gradients) \
pair with the regularized gradient
Raises:
Exception: Unknown regularization type
......@@ -100,6 +101,24 @@ class WeightDecayRegularizer(object):
class L2DecayRegularizer(WeightDecayRegularizer):
"""Implements the L2 Weight Decay Regularization
Small values of L2 can help prevent over fitting the training data.
.. math::
L2WeightDecay = reg\_coeff * parameter
Args:
regularization_coeff(float): regularization coeff
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adagrad(
learning_rate=1e-4,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.1))
optimizer.minimize(avg_cost)
"""
def __init__(self, regularization_coeff=0.0):
......@@ -154,6 +173,27 @@ class L2DecayRegularizer(WeightDecayRegularizer):
class L1DecayRegularizer(WeightDecayRegularizer):
"""Implements the L1 Weight Decay Regularization
L1 regularization encourages sparsity.
.. math::
L1WeightDecay = reg\_coeff * sign(parameter)
Args:
regularization_coeff(float): regularization coeff
Examples:
.. code-block:: python
program = fluid.framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="mul.x",
regularizer=fluid.regularizer.L1DecayRegularizer(0.5))
"""
def __init__(self, regularization_coeff=0.0):
......
......@@ -194,16 +194,16 @@ def train(word_dict,
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local):
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local):
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True):
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True):
if is_local:
train_loop(framework.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -151,16 +151,16 @@ def train(nn_type,
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True):
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -43,7 +43,7 @@ class TestConcatOp(OpTest):
self.axis = 1
class TestConcatOp2(OpTest):
class TestConcatOp2(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((2, 3, 4, 5)).astype('float32')
self.x1 = np.random.random((2, 3, 4, 5)).astype('float32')
......@@ -51,5 +51,16 @@ class TestConcatOp2(OpTest):
self.axis = 1
class TestConcatOp3(TestConcatOp):
def init_test_data(self):
self.x0 = np.random.random((1, 256, 170, 256)).astype('float32')
self.x1 = np.random.random((1, 128, 170, 256)).astype('float32')
self.x2 = np.random.random((1, 128, 170, 256)).astype('float32')
self.axis = 1
def test_check_grad(self):
pass
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from test_gaussian_random_op import TestGaussianRandomOp
class TestMKLDNN(TestGaussianRandomOp):
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
......@@ -25,7 +25,15 @@ class TestGaussianRandomOp(unittest.TestCase):
def setUp(self):
self.op_type = "gaussian_random"
self.inputs = {}
self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10}
self.use_mkldnn = False
self.init_kernel_type()
self.attrs = {
"shape": [1000, 784],
"mean": .0,
"std": 1.,
"seed": 10,
"use_mkldnn": self.use_mkldnn
}
self.outputs = ["Out"]
......@@ -58,6 +66,9 @@ class TestGaussianRandomOp(unittest.TestCase):
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
def init_kernel_type(self):
pass
if __name__ == "__main__":
unittest.main()
......@@ -401,6 +401,15 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(output)
print(str(program))
def test_maxout(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 5], dtype="float32")
y = layers.data(name='y', shape=[2, 3], dtype="float32")
output = layers.crop(x, shape=y)
self.assertIsNotNone(output)
print(str(program))
if __name__ == '__main__':
unittest.main()
......@@ -434,5 +434,71 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
class TestFtrlOptimizer(unittest.TestCase):
class MockFtrl(optimizer.FtrlOptimizer):
def get_accumulators(self):
return self._accumulators
def get_squared_str(self):
return self._squared_acc_str
def get_linear_str(self):
return self._linear_acc_str
def test_ftrl_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="mul.x",
optimize_attr={'learning_rate': 1.1})
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
ftrl_optimizer = self.MockFtrl(
learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0)
opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out,
init_program)
self.assertEqual(len(opts), 3)
self.assertEqual([op.type for op in opts],
["fill_constant", "elementwise_mul", "ftrl"])
# Check accumulators
accumulators = ftrl_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2)
self.assertTrue(ftrl_optimizer.get_squared_str() in accumulators)
self.assertTrue(ftrl_optimizer.get_linear_str() in accumulators)
squared_acc = accumulators[ftrl_optimizer.get_squared_str()]
linear_acc = accumulators[ftrl_optimizer.get_linear_str()]
self.assertEqual(len(squared_acc), 1)
self.assertEqual(len(linear_acc), 1)
self.assertTrue(mul_x.name in squared_acc)
self.assertTrue(mul_x.name in linear_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 3)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from test_sum_op import TestSumOp
class TestMKLDNN(TestSumOp):
def init_kernel_type(self):
self.use_mkldnn = True
if __name__ == '__main__':
unittest.main()
......@@ -20,12 +20,15 @@ from op_test import OpTest
class TestSumOp(OpTest):
def setUp(self):
self.op_type = "sum"
self.use_mkldnn = False
self.init_kernel_type()
x0 = np.random.random((3, 4)).astype('float32')
x1 = np.random.random((3, 4)).astype('float32')
x2 = np.random.random((3, 4)).astype('float32')
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
y = x0 + x1 + x2
self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': self.use_mkldnn}
def test_check_output(self):
self.check_output()
......@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
def test_check_grad(self):
self.check_grad(['x0'], 'Out')
def init_kernel_type(self):
pass
if __name__ == "__main__":
unittest.main()
......@@ -824,7 +824,8 @@ class DistributeTranspiler:
table_opt_block.append_op(
type="sum",
inputs={"X": pserver_side_table_grad_list},
outputs={"Out": [grad_var]})
outputs={"Out": [grad_var]},
attrs={"use_mkldnn": False})
else:
# in async_mode, for table gradient, it also need to be splited to each parameter server
origin_grad_name = grad_var.name
......@@ -1056,7 +1057,8 @@ class DistributeTranspiler:
optimize_block.append_op(
type="sum",
inputs={"X": vars2merge},
outputs={"Out": merged_var})
outputs={"Out": merged_var},
attrs={"use_mkldnn": False})
# TODO(panyx0718): What if it's SELECTED_ROWS.
if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS:
optimize_block.append_op(
......
......@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"):
class PipeReader:
"""
PipeReader read data by stream from a command, take it's
PipeReader read data by stream from a command, take it's
stdout into a pipe buffer and redirect it to the parser to
parse, then yield data as your desired format.
......@@ -352,7 +352,7 @@ class PipeReader:
An example:
.. code-block:: python
def example_reader():
for f in myfiles:
pr = PipeReader("cat %s"%f)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册