未验证 提交 68a75344 编写于 作者: Y Yi Wang 提交者: GitHub

Merge branch 'develop' into fix_cpplint_errors_operators_detail

......@@ -370,4 +370,48 @@ extern void hl_maxout_backward(real* inGrad,
size_t featLen,
size_t groups);
/**
* @brief Upsample forward.
* @param[in] inputData input data.
* @param[out] maskData the mask data from MaxPoolWithMaskLayer.
* @param[out] batchSize the batch size of the input.
* @param[in] imgSizeH image height.
* @param[in] imgSizeW image width.
* @param[in] channels the input channels.
* @param[in] outputH the output height.
* @param[in] outputW the output widht.
* @param[out] outputData output data.
*/
extern void hl_upsample_forward(real* inputData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* outputData);
/**
* @brief Upsample backward.
* @param[in] outputGradData the output grad data.
* @param[out] maskData the mask data from MaxPoolWithMaskLayer.
* @param[out] batchSize the batch size of the input.
* @param[in] imgSizeH image height.
* @param[in] imgSizeW image width.
* @param[in] channels the input channels.
* @param[in] outputH the output height.
* @param[in] outputW the output widht.
* @param[out] inputGradData the input grad data.
*/
extern void hl_upsample_backward(real* outputGradData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* inputGradData);
#endif // HL_CNN_H_
......@@ -224,4 +224,24 @@ inline void hl_maxout_backward(real* inGrad,
size_t featLen,
size_t group) {}
inline void hl_upsample_forward(real* inputData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* outputData) {}
inline void hl_upsample_backward(real* outputGradData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* inputGradData) {}
#endif // HL_CNN_STUB_H_
......@@ -1028,3 +1028,79 @@ void hl_maxout_backward(real* inGrad,
num_kernels, inGrad, outGrad, idData, size, featLen, groups);
CHECK_SYNC("hl_maxout_backward failed");
}
__global__ void upsampleForwardCompute(real* input_data,
real* mask_data,
size_t nthreads,
size_t in_h,
size_t in_w,
size_t out_h,
size_t out_w,
real* output_data) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offset = index / (in_w * in_h) * out_h * out_w;
int upsample_idx = static_cast<int>(mask_data[index]);
output_data[offset + upsample_idx] = input_data[index];
}
}
__global__ void upsampleBackwardCompute(real* out_grad,
real* mask_data,
size_t nthreads,
size_t in_h,
size_t in_w,
size_t out_h,
size_t out_w,
real* input_grad) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offset = index / (in_w * in_h) * out_h * out_w;
int upsample_idx = static_cast<int>(mask_data[index]);
input_grad[index] = out_grad[offset + upsample_idx];
}
}
void hl_upsample_forward(real* inputData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* outputData) {
int num_kernels = batchSize * imgSizeH * imgSizeW * channels;
int blocks = (num_kernels + 1024 - 1) / 1024;
upsampleForwardCompute<<<blocks, 1024, 0, STREAM_DEFAULT>>>(inputData,
maskData,
num_kernels,
imgSizeH,
imgSizeW,
outputH,
outputW,
outputData);
CHECK_SYNC("hl_upsample_forward failed");
}
void hl_upsample_backward(real* outputGradData,
real* maskData,
size_t batchSize,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW,
real* inputGradData) {
int num_kernels = batchSize * imgSizeH * imgSizeW * channels;
int blocks = (num_kernels + 1024 - 1) / 1024;
upsampleBackwardCompute<<<blocks, 1024, 0, STREAM_DEFAULT>>>(outputGradData,
maskData,
num_kernels,
imgSizeH,
imgSizeW,
outputH,
outputW,
inputGradData);
CHECK_SYNC("hl_upsample_backward failed");
}
......@@ -279,6 +279,21 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
return std::unique_ptr<ExecutorPrepareContext>(ctx);
}
std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
const ProgramDesc& program, const std::vector<int>& block_ids) {
std::vector<std::shared_ptr<ExecutorPrepareContext>> result;
for (auto& bid : block_ids) {
auto* ctx = new ExecutorPrepareContext(program, bid);
PADDLE_ENFORCE_LT(static_cast<size_t>(bid), program.Size());
auto& block = program.Block(bid);
for (auto& op_desc : block.AllOps()) {
ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc));
}
result.push_back(std::shared_ptr<ExecutorPrepareContext>(ctx));
}
return result;
}
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars) {
auto& block = ctx->prog_.Block(ctx->block_id_);
......
......@@ -61,6 +61,9 @@ class Executor {
static std::unique_ptr<ExecutorPrepareContext> Prepare(
const ProgramDesc& program, int block_id);
static std::vector<std::shared_ptr<ExecutorPrepareContext>> Prepare(
const ProgramDesc& program, const std::vector<int>& block_ids);
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope = true,
bool create_vars = true);
......
......@@ -14,8 +14,6 @@
#include "paddle/fluid/operators/detail/variable_response.h"
#include <string.h>
#include <string>
#include <utility>
#include <vector>
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/fc_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
template <typename T>
class MKLDNNMD {
public:
explicit MKLDNNMD(const T* in, const T* w, bool bias)
: in{paddle::framework::vectorize2int(in->dims())},
w{paddle::framework::vectorize2int(w->dims())} {
with_bias_ = bias;
}
mkldnn::memory::desc dst() const {
return platform::MKLDNNMemDesc({in[0], w[1]},
mkldnn::memory::data_type::f32,
mkldnn::memory::format::nc);
}
mkldnn::memory::desc src() const {
return is_spatial()
? platform::MKLDNNMemDesc({in[0], in[1], in[2], in[3]},
mkldnn::memory::data_type::f32,
mkldnn::memory::format::nchw)
: platform::MKLDNNMemDesc({in[0], in[1]},
mkldnn::memory::data_type::f32,
mkldnn::memory::format::nc);
}
mkldnn::memory::desc weights() const {
return is_spatial()
? platform::MKLDNNMemDesc({w[1], in[1], in[2], in[3]},
mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw)
: platform::MKLDNNMemDesc({w[1], in[1]},
mkldnn::memory::data_type::f32,
mkldnn::memory::format::oi);
}
mkldnn::memory::desc bias() const {
return with_bias_
? platform::MKLDNNMemDesc({w[1]}, mkldnn::memory::data_type::f32,
mkldnn::memory::format::format_undef)
: platform::MKLDNNMemDesc({}, mkldnn::memory::data_type::f32,
mkldnn::memory::format::format_undef);
}
private:
bool is_spatial() const { return in.size() > 1 && w.size() > 1; }
std::vector<int> in;
std::vector<int> w;
bool with_bias_;
bool is_spatial_;
};
class MKLDNNMemory {
public:
MKLDNNMemory(MKLDNNMD<Tensor>* t, const mkldnn::engine& e)
: md_{t}, engine_{e} {}
virtual ~MKLDNNMemory() = default;
template <typename Output>
mkldnn::memory dst(const Output* out) {
return mkldnn::memory({md_->dst(), engine_},
static_cast<void*>(const_cast<float*>(out)));
}
template <typename Output>
mkldnn::memory dst(Output* out) {
return mkldnn::memory({md_->dst(), engine_}, out);
}
template <typename Input>
mkldnn::memory src(const Input* in) {
return mkldnn::memory({md_->src(), engine_},
static_cast<void*>(const_cast<float*>(in)));
}
template <typename Weight>
mkldnn::memory weights(const Weight* w) {
return mkldnn::memory({md_->weights(), engine_},
static_cast<void*>(const_cast<float*>(w)));
}
mkldnn::memory bias() {
return mkldnn::memory(mkldnn::memory::primitive_desc(md_->bias(), engine_));
}
private:
MKLDNNMD<Tensor>* md_;
const mkldnn::engine& engine_;
};
template <typename T>
class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto input = ctx.Input<Tensor>("Input");
auto w = ctx.Input<Tensor>("W");
PADDLE_ENFORCE(input->dims().size() == 2 || input->dims().size() == 4,
"Input must be with 2 or 4 dimensions, i.e. NCHW");
PADDLE_ENFORCE(w->dims().size() == 2 || w->dims().size() == 4,
"Weights must be with 2 or 4 dimensions, i.e. OI or OIHW");
bool with_bias = ctx.Attr<bool>("bias_attr");
MKLDNNMD<Tensor> md(input, w, with_bias);
std::shared_ptr<mkldnn::inner_product_forward::primitive_desc> pd =
FcFwdPrimitiveDesc(md.src(), md.weights(), md.dst(), md.bias(),
with_bias, mkldnn_engine);
const std::string key = ctx.op().Output("Out");
const std::string key_fc_pd = key + "@fc_pd";
dev_ctx.SetBlob(key_fc_pd, pd);
MKLDNNMemory mem(&md, mkldnn_engine);
const T* input_data = input->data<T>();
const T* w_data = w->data<T>();
auto output = ctx.Output<Tensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
auto dst_memory = mem.dst(output_data);
auto src_memory = mem.src(input_data);
auto weights_memory = mem.weights(w_data);
auto bias_memory = mem.bias();
auto forward = with_bias ? mkldnn::inner_product_forward(
*pd, src_memory, weights_memory, bias_memory,
dst_memory)
: mkldnn::inner_product_forward(
*pd, src_memory, weights_memory, dst_memory);
std::vector<mkldnn::primitive> pipeline = {forward};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}
private:
std::unique_ptr<mkldnn::inner_product_forward::primitive_desc>
FcFwdPrimitiveDesc(const mkldnn::memory::desc& src,
const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& dst,
const mkldnn::memory::desc& bias, const bool with_bias,
const mkldnn::engine& engine) const {
auto desc = with_bias
? mkldnn::inner_product_forward::desc(
mkldnn::prop_kind::forward, src, weights, bias, dst)
: mkldnn::inner_product_forward::desc(
mkldnn::prop_kind::forward, src, weights, dst);
auto pd = new mkldnn::inner_product_forward::primitive_desc(desc, engine);
return std::unique_ptr<mkldnn::inner_product_forward::primitive_desc>(pd);
}
};
template <typename T>
class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
T* input_grad_data = nullptr;
T* w_grad_data = nullptr;
Tensor* input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
Tensor* w_grad = ctx.Output<Tensor>(framework::GradVarName("W"));
if (input_grad) {
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
}
if (w_grad) {
w_grad_data = w_grad->mutable_data<T>(ctx.GetPlace());
}
const Tensor* input = ctx.Input<Tensor>("Input");
const T* input_data = input->data<T>();
const Tensor* w = ctx.Input<Tensor>("W");
const T* w_data = w->data<T>();
const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
const T* out_grad_data = out_grad->data<T>();
bool with_bias = ctx.Attr<bool>("bias_attr");
MKLDNNMD<Tensor> md(input, w, with_bias);
MKLDNNMemory mem(&md, mkldnn_engine);
auto dst_memory = mem.dst(out_grad_data);
auto src_memory = mem.src(input_data);
auto weights_memory = mem.weights(w_data);
auto bias_memory = mem.bias();
const std::string key = ctx.op().Input("Out");
const std::string key_fc_pd = key + "@fc_pd";
auto pd =
std::static_pointer_cast<mkldnn::inner_product_forward::primitive_desc>(
dev_ctx.GetBlob(key_fc_pd));
PADDLE_ENFORCE(pd != nullptr, "Fail to find key_fc_pd in device context");
if (w_grad) {
auto weights_grad_memory = mem.weights(w_grad_data);
mkldnn::inner_product_backward_weights::primitive_desc bwd_weight_pd =
FcBwdWeightsPrimitiveDesc(md.src(), md.weights(), md.dst(), md.bias(),
with_bias, *pd, mkldnn_engine);
auto bwd_weights_prim = mkldnn::inner_product_backward_weights(
bwd_weight_pd, src_memory, dst_memory, weights_grad_memory,
bias_memory);
std::vector<mkldnn::primitive> pipeline{bwd_weights_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}
if (input_grad) {
auto src_grad_memory = mem.src(input_grad_data);
mkldnn::inner_product_backward_data::primitive_desc bwd_data_pd =
FcBwdDataPrimitiveDesc(md.src(), md.weights(), md.dst(), *pd,
mkldnn_engine);
auto bwd_data_prim = mkldnn::inner_product_backward_data(
bwd_data_pd, dst_memory, weights_memory, src_grad_memory);
std::vector<mkldnn::primitive> pipeline{bwd_data_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}
}
private:
mkldnn::inner_product_backward_weights::primitive_desc
FcBwdWeightsPrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& diff_weights,
const mkldnn::memory::desc& diff_dst, const mkldnn::memory::desc& bias,
const bool with_bias,
const mkldnn::inner_product_forward::primitive_desc& pd,
const mkldnn::engine& engine) const {
auto bwd_weight_desc = with_bias
? mkldnn::inner_product_backward_weights::desc(
src, diff_weights, bias, diff_dst)
: mkldnn::inner_product_backward_weights::desc(
src, diff_weights, bias, diff_dst);
return mkldnn::inner_product_backward_weights::primitive_desc(
bwd_weight_desc, engine, pd);
}
mkldnn::inner_product_backward_data::primitive_desc FcBwdDataPrimitiveDesc(
const mkldnn::memory::desc& diff_src, const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& diff_dst,
const mkldnn::inner_product_forward::primitive_desc& pd,
const mkldnn::engine& engine) const {
auto bwd_data_desc =
mkldnn::inner_product_backward_data::desc(diff_src, weights, diff_dst);
return mkldnn::inner_product_backward_data::primitive_desc(bwd_data_desc,
engine, pd);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(fc, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::FCMKLDNNOpKernel<float>);
REGISTER_OP_KERNEL(fc_grad, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::FCMKLDNNGradOpKernel<float>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fc_op.h"
#include <vector>
namespace paddle {
namespace operators {
void FCOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"X(Input) of Fully Connected should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Out(Output) of Fully Connected should not be null.");
PADDLE_ENFORCE(ctx->HasInput("W"),
"W(Input) of Fully Connected should not be null.");
auto in_dims = ctx->GetInputDim("Input");
auto w_dims = ctx->GetInputDim("W");
std::vector<int64_t> output_shape({in_dims[0], w_dims[1]});
PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4,
"Fully Connected input should be 2-D or 4-D tensor.");
PADDLE_ENFORCE(w_dims.size() == 2 || w_dims.size() == 4,
"Fully Connected input should be 2-D or 4-D tensor.");
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->ShareLoD("Input", "Out");
}
framework::OpKernelType FCOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kMKLDNN};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout, library);
}
void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto w_dims = ctx->GetInputDim("W");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("W"))) {
ctx->SetOutputDim(framework::GradVarName("W"), w_dims);
}
}
framework::OpKernelType FCOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library{framework::LibraryType::kMKLDNN};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
layout, library);
}
FCOpMaker::FCOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", "(Tensor) The input tensor of fully connected operator. ");
AddInput("W", "(Tensor), The second input tensor of fc op.");
AddOutput("Out", "(Tensor) The output tensor of fully connected operator. ");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddAttr<bool>("bias_attr", "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Fully Connected Operator.
The fully connected operation calculates the output based on the input, weights and bias attribute.
The size of each dimension of the parameters checked in the infer-shape.
The matrix of bias is generated by the mkldnn framework, when the bias_attr is True.
Additional parametrs are use_mkldnn and bias_attr.
The input(X) size and output(Out) size may be diffrent.
The fully connected layer only supports MKLDNN version
)DOC");
}
} // namespace operators
} // namespace paddle
REGISTER_OP(fc, paddle::operators::FCOp, paddle::operators::FCOpMaker, fc_grad,
paddle::operators::FCOpGrad);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class FCOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class FCOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
class FCOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FCOpMaker(OpProto* proto, OpAttrChecker* op_checker);
};
} // namespace operators
} // namespace paddle
......@@ -45,20 +45,23 @@ static void CreateTensorFromMessageType(framework::Variable *var,
}
}
static void ParallelExecuteBlocks(const std::vector<size_t> &parallel_blkids,
framework::Executor *executor,
framework::ProgramDesc *program,
framework::Scope *scope) {
static void ParallelExecuteBlocks(
const std::vector<size_t> &parallel_blkids, framework::Executor *executor,
const std::vector<std::shared_ptr<framework::ExecutorPrepareContext>>
&prepared,
framework::ProgramDesc *program, framework::Scope *scope) {
std::vector<std::future<void>> fs;
for (size_t idx : parallel_blkids) {
fs.push_back(framework::Async([&executor, &program, &scope, idx]() {
int run_block = idx; // thread local
try {
executor->Run(*program, scope, run_block, false, false);
} catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
}));
fs.push_back(
framework::Async([&executor, &prepared, &program, &scope, idx]() {
int run_block = idx; // thread local
try {
executor->RunPreparedContext(prepared[run_block].get(), scope,
false, false);
} catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
}));
}
for (size_t i = 0; i < fs.size(); ++i) fs[i].wait();
}
......@@ -96,11 +99,18 @@ class ListenAndServOp : public framework::OperatorBase {
auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock);
auto *program = block->Program();
int num_blocks = program->Size();
size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks");
framework::Executor executor(dev_place);
std::vector<int> block_list;
for (size_t blkid = 1; blkid < num_blocks; ++blkid)
block_list.push_back(blkid);
auto prepared = executor.Prepare(*program, block_list);
prepared.insert(
prepared.begin(),
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
// TODO(qiao) set proper fields for table lookup and update
rpc_service_->SetExecutor(&executor);
......@@ -153,21 +163,22 @@ class ListenAndServOp : public framework::OperatorBase {
// The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future
size_t last_parent_blkid = program->Block(1).Parent();
int32_t last_parent_blkid = program->Block(1).Parent();
std::vector<size_t> parallel_blkids;
parallel_blkids.push_back(1);
double ts = detail::GetTimestamp();
for (size_t blkid = 2; blkid < num_blocks; ++blkid) {
if (program->Block(blkid).Parent() != last_parent_blkid) {
for (size_t idx : parallel_blkids) VLOG(3) << idx;
ParallelExecuteBlocks(parallel_blkids, &executor, program,
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope);
parallel_blkids.clear();
last_parent_blkid = program->Block(blkid).Parent();
}
parallel_blkids.push_back(blkid);
}
ParallelExecuteBlocks(parallel_blkids, &executor, program, &recv_scope);
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope);
VLOG(3) << "run all blocks spent " << detail::GetTimestamp() - ts
<< "(ms)";
......@@ -181,7 +192,8 @@ class ListenAndServOp : public framework::OperatorBase {
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
}
rpc_service_->SetCond(1);
// FIXME(typhoonzero): use another condition to sync wait clients get.
// NOTE: does not consider barrier request retry in here, we may use
// global barrier id to resolve this.
rpc_service_->WaitClientGet(fan_in);
sparse_vars.clear();
} // while(true)
......
......@@ -30,19 +30,16 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
PADDLE_THROW("SplitIds do not support GPU kernel");
}
const auto* ids_t = ctx.Input<framework::LoDTensor>("Ids");
auto& ids_dims = ids_t->dims();
auto& ids_dims = ctx.Input<framework::LoDTensor>("Ids")->dims();
const T* ids = ctx.Input<framework::LoDTensor>("Ids")->data<T>();
auto outs = ctx.MultiOutput<framework::LoDTensor>("Out");
const T* ids = ids_t->data<T>();
const size_t shard_num = outs.size();
std::vector<std::vector<T>> out_ids;
out_ids.resize(outs.size());
// split id by their shard_num.
for (size_t i = 0; i < ids_dims[0]; ++i) {
for (int i = 0; i < ids_dims[0]; ++i) {
T id = ids[i];
size_t shard_id = static_cast<size_t>(id) % shard_num;
out_ids[shard_id].push_back(id);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "UpsampleLayer.h"
#include "iostream"
namespace paddle {
REGISTER_LAYER(upsample, UpsampleLayer);
size_t UpsampleLayer::getOutputSize() {
if (upsampleSize_ == 0) {
upsampleSize_ = imgSize_ * scale_ - static_cast<int>(padOutX_);
upsampleSizeY_ = imgSizeY_ * scaleY_ - static_cast<int>(padOutY_);
}
return upsampleSize_ * upsampleSizeY_ * channels_;
}
bool UpsampleLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 2U);
CHECK_EQ(config_.inputs_size(), 2);
const auto& conf = config_.inputs(0).upsample_conf();
const auto& img_conf = conf.image_conf();
imgSizeY_ =
img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size();
imgSize_ = img_conf.img_size();
channels_ = img_conf.channels();
CHECK((conf.has_upsample_size()) || (conf.has_scale()))
<< "scale or upsample_size is required.";
if (conf.has_upsample_size()) {
upsampleSize_ = conf.upsample_size();
upsampleSizeY_ = upsampleSize_;
if (conf.has_upsample_size_y()) {
upsampleSizeY_ = conf.upsample_size_y();
}
} else {
if (!conf.has_scale_y()) {
scale_ = scaleY_ = conf.scale_y();
CHECK_GT(static_cast<int>(scale_), 1);
} else {
scale_ = conf.scale();
scaleY_ = conf.scale_y();
}
padOutX_ = conf.pad_out_x();
padOutY_ = conf.pad_out_y();
CHECK(!padOutX_ || scale_ == 2)
<< "Output height padding compensation requires scale_ == 2";
CHECK(!padOutY_ || scaleY_ == 2)
<< "Output width padding compensation requires scaleY_ == 2";
upsampleSize_ = upsampleSizeY_ = 0;
}
return true;
}
void UpsampleLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = getInputValue(0);
MatrixPtr mask = inputLayers_[1]->getOutput("mask").value;
size_t batchSize = input->getHeight();
size_t outSize = getOutputSize();
CHECK_EQ(input->getWidth(), mask->getWidth());
CHECK_EQ(mask->getHeight(), batchSize);
resetOutput(batchSize, outSize);
MatrixPtr output = getOutputValue();
output->upsampleForward(*input,
*mask,
imgSize_,
imgSizeY_,
channels_,
upsampleSize_,
upsampleSizeY_);
}
void UpsampleLayer::backward(const UpdateCallback& callback) {
MatrixPtr mask = inputLayers_[1]->getOutput("mask").value;
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
inputGrad->upsampleBackward(*outputGrad,
*mask,
imgSize_,
imgSizeY_,
channels_,
upsampleSize_,
upsampleSizeY_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
/**
* This layer transpose the pooling process.
* It takes two input, the first input is the input data, and
* the second is the mask data from the max-pool-with-mask layer.
*
*/
class UpsampleLayer : public Layer {
public:
explicit UpsampleLayer(const LayerConfig& config) : Layer(config) {}
~UpsampleLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
size_t getOutputSize();
protected:
size_t scale_, scaleY_;
size_t upsampleSize_, upsampleSizeY_;
size_t padOutX_, padOutY_;
size_t imgSize_, imgSizeY_;
size_t channels_;
};
} // namespace paddle
......@@ -27,6 +27,7 @@ gserver_test(test_BatchNorm)
gserver_test(test_KmaxSeqScore)
gserver_test(test_Expand)
gserver_test(test_MaxPoolingWithMaskOutput)
gserver_test(test_Upsample)
set(PYTHON_PATH
${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle;
void setPoolConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(1);
int kw = 2, kh = 2;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(2);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
LayerPtr doOneUpsampleTest(MatrixPtr& inputMat,
const string& poolType,
bool use_gpu,
real* tempGradData) {
/* prepare maxPoolWithMaskLayer */
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 128, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
pool->set_img_size(8);
pool->set_img_size_y(8);
setPoolConfig(&config, pool, "max-pool-with-mask");
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
config.layerConfig.set_name("MaxPoolWithMask");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(config,
&dataLayers,
&datas,
&layerMap,
"MaxPoolWithMask",
1,
false,
use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputMat);
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr maxPoolingWithMaskOutputLayer;
initTestLayer(config, &layerMap, &parameters, &maxPoolingWithMaskOutputLayer);
maxPoolingWithMaskOutputLayer->forward(PASS_GC);
/* prepare the upsample layer */
LayerConfig upsampleLayerConfig;
upsampleLayerConfig.set_type("upsample");
LayerInputConfig* input1 = upsampleLayerConfig.add_inputs();
upsampleLayerConfig.add_inputs();
UpsampleConfig* upsampleConfig = input1->mutable_upsample_conf();
upsampleConfig->set_scale(2);
ImageConfig* imageConfig = upsampleConfig->mutable_image_conf();
imageConfig->set_channels(2);
imageConfig->set_img_size(4);
imageConfig->set_img_size_y(4);
upsampleLayerConfig.set_size(2 * 8 * 8);
upsampleLayerConfig.set_name("upsample");
for (size_t i = 0; i < 2; i++) {
LayerInputConfig& inputTemp = *(upsampleLayerConfig.mutable_inputs(i));
inputTemp.set_input_layer_name("MaxPoolWithMask");
}
LayerPtr upsampleLayer;
ParameterMap parameterMap;
upsampleLayer = Layer::create(upsampleLayerConfig);
layerMap[upsampleLayerConfig.name()] = upsampleLayer;
upsampleLayer->init(layerMap, parameterMap);
upsampleLayer->setNeedGradient(true);
upsampleLayer->forward(PASS_GC);
upsampleLayer->getOutputGrad()->copyFrom(tempGradData, 128);
upsampleLayer->backward();
return upsampleLayer;
}
TEST(Layer, maxPoolingWithMaskOutputLayerFwd) {
bool useGpu = false;
MatrixPtr inputMat;
MatrixPtr inputGPUMat;
MatrixPtr tempGradMat;
inputMat = Matrix::create(1, 128, false, useGpu);
inputMat->randomizeUniform();
tempGradMat = Matrix::create(1, 128, false, useGpu);
tempGradMat->randomizeUniform();
real* data = inputMat->getData();
real* tempGradData = tempGradMat->getData();
LayerPtr upsampleLayerCPU =
doOneUpsampleTest(inputMat, "max-pool-with-mask", useGpu, tempGradData);
#ifdef PADDLE_WITH_CUDA
useGpu = true;
inputGPUMat = Matrix::create(1, 128, false, useGpu);
inputGPUMat->copyFrom(data, 128);
LayerPtr upsampleLayerGPU = doOneUpsampleTest(
inputGPUMat, "max-pool-with-mask", useGpu, tempGradData);
checkMatrixEqual(upsampleLayerCPU->getOutput("").value,
upsampleLayerGPU->getOutput("").value);
checkMatrixEqual(upsampleLayerCPU->getPrev(0)->getOutputGrad(),
upsampleLayerGPU->getPrev(0)->getOutputGrad());
#endif
}
......@@ -1024,6 +1024,66 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) {
LOG(INFO) << "the diffCnt is " << diffCnt;
}
void GpuMatrix::upsampleForward(Matrix& input,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
CHECK(input.useGpu_ == true) << "Matrix type are not equal";
CHECK(mask.useGpu_ == true) << "Matrix type are not equal";
real* inputData = input.getData();
real* maskData = mask.getData();
real* outData = data_;
size_t batch = input.getHeight();
CHECK(imgSizeH * imgSizeW * channels == input.getWidth());
CHECK(imgSizeH * imgSizeW * channels == mask.getWidth());
CHECK_EQ(batch, this->getHeight());
CHECK(width_ == outputH * outputW * channels);
hl_upsample_forward(inputData,
maskData,
batch,
imgSizeH,
imgSizeW,
channels,
outputH,
outputW,
outData);
}
void GpuMatrix::upsampleBackward(Matrix& outputGrad,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
CHECK(outputGrad.useGpu_ == true) << "Matrix type are not equal";
CHECK(mask.useGpu_ == true) << "Matrix type are not equal";
real* outputGradData = outputGrad.getData();
real* maskData = mask.getData();
real* inputGradData = data_;
size_t batch = outputGrad.getHeight();
CHECK(imgSizeH * imgSizeW == this->getWidth() / channels);
CHECK_EQ(batch, this->getHeight());
CHECK_EQ(channels * outputH * outputW, outputGrad.getWidth());
hl_upsample_backward(outputGradData,
maskData,
batch,
imgSizeH,
imgSizeW,
channels,
outputH,
outputW,
inputGradData);
}
void GpuMatrix::maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
size_t imgSizeW,
......@@ -1986,6 +2046,72 @@ void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) {
CHECK_EQ(info, 0);
}
void CpuMatrix::upsampleForward(Matrix& input,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
real* inputData = input.getData();
real* maskData = mask.getData();
real* outData = data_;
size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
size_t batch = input.getHeight();
CHECK(inLength == input.getWidth() / channels);
CHECK_EQ(batch, this->getHeight());
CHECK_EQ(channels * outLength, this->getWidth());
for (size_t k = 0; k < batch; k++) {
for (size_t c = 0; c < channels; c++) {
for (size_t i = 0; i < inLength; i++) {
size_t out_index = static_cast<int>(maskData[i]);
if (out_index >= outLength) {
LOG(FATAL) << "upsample index " << out_index << " out of range.";
}
outData[out_index] = inputData[i];
}
inputData += inLength;
maskData += inLength;
outData += outLength;
}
}
}
void CpuMatrix::upsampleBackward(Matrix& outputGrad,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
real* outputGradData = outputGrad.getData();
real* maskData = mask.getData();
real* inputGradData = data_;
size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
size_t batch = outputGrad.getHeight();
CHECK(inLength == this->getWidth() / channels);
CHECK_EQ(batch, this->getHeight());
CHECK_EQ(channels * outLength, outputGrad.getWidth());
for (size_t k = 0; k < batch; k++) {
for (size_t c = 0; c < channels; c++) {
for (size_t i = 0; i < inLength; i++) {
size_t out_index = static_cast<int>(maskData[i]);
if (out_index >= outLength) {
LOG(FATAL) << "upsample index " << out_index << " out of range.";
}
inputGradData[i] = outputGradData[out_index];
}
inputGradData += inLength;
maskData += inLength;
outputGradData += outLength;
}
}
}
void CpuMatrix::maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
size_t imgSizeW,
......
......@@ -859,6 +859,26 @@ public:
LOG(FATAL) << "Not implemented";
}
virtual void upsampleForward(Matrix& input,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
LOG(FATAL) << "Not implemeted";
}
virtual void upsampleBackward(Matrix& outputGrad,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW) {
LOG(FATAL) << "Not implemeted";
}
/**
* Pooling forward operation, pick out the largest element
* in the sizeX of value, if the maskMatP is not NULL, it will
......@@ -1420,6 +1440,22 @@ public:
void classificationError(Matrix& output, IVector& label, size_t topkSize = 1);
void upsampleForward(Matrix& input,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW);
void upsampleBackward(Matrix& outputGrad,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW);
void maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
size_t imgSizeW,
......@@ -1694,6 +1730,22 @@ public:
MatrixPtr clone(size_t height, size_t width, bool useGpu = false);
void upsampleForward(Matrix& input,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW);
void upsampleBackward(Matrix& outputGrad,
Matrix& mask,
size_t imgSizeH,
size_t imgSizeW,
size_t channels,
size_t outputH,
size_t outputW);
void maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
size_t imgSizeW,
......
......@@ -323,6 +323,16 @@ message ClipConfig {
required double max = 2;
}
message UpsampleConfig {
required ImageConfig image_conf = 1;
optional uint32 scale = 2 [ default = 2 ];
optional uint32 scale_y = 3 [ default = 2 ];
optional bool pad_out_x = 4 [ default = false ];
optional bool pad_out_y = 5 [ default = false ];
optional uint32 upsample_size = 6;
optional uint32 upsample_size_y = 7;
}
message ROIPoolConfig {
required uint32 pooled_width = 1;
required uint32 pooled_height = 2;
......@@ -359,6 +369,7 @@ message LayerInputConfig {
optional ClipConfig clip_conf = 18;
optional ScaleSubRegionConfig scale_sub_region_conf = 19;
optional ROIPoolConfig roi_pool_conf = 20;
optional UpsampleConfig upsample_conf = 21;
}
message LayerConfig {
......
......@@ -133,6 +133,8 @@ def fc(input,
bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to None, no bias will be added to the output units.
act (str, default None): Activation to be applied to the output of this layer.
use_mkldnn(bool): Use mkldnn kernel or not, it is valid only when the mkldnn
library is installed. Default: False
name (str, default None): The name of this layer.
Returns:
......@@ -153,38 +155,64 @@ def fc(input,
dtype = helper.input_dtype()
mul_results = []
for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape
if use_mkldnn:
tmp = helper.create_tmp_variable(dtype)
input_shape = input.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
] + [size]
w = helper.create_parameter(
attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False)
tmp = helper.create_tmp_variable(dtype)
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
is_bias=False)
if bias_attr is None or bias_attr is False:
bias_attr = False
else:
bias_attr = True
helper.append_op(
type="mul",
inputs={"X": input_var,
"Y": w},
type="fc",
inputs={"Input": input,
"W": w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": num_flatten_dims,
"y_num_col_dims": 1,
'use_mkldnn': use_mkldnn
})
mul_results.append(tmp)
# sum
if len(mul_results) == 1:
pre_bias = mul_results[0]
attrs={"use_mkldnn": use_mkldnn,
"bias_attr": bias_attr})
return helper.append_activation(tmp)
else:
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
# add bias
pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims)
# add activation
return helper.append_activation(pre_activation)
for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1)
] + [size]
w = helper.create_parameter(
attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False)
tmp = helper.create_tmp_variable(dtype)
helper.append_op(
type="mul",
inputs={"X": input_var,
"Y": w},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": num_flatten_dims,
"y_num_col_dims": 1,
})
mul_results.append(tmp)
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = helper.create_tmp_variable(dtype)
helper.append_op(
type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias})
# add bias
pre_activation = helper.append_bias_op(
pre_bias, dim_start=num_flatten_dims)
# add activation
return helper.append_activation(pre_activation)
def embedding(input,
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
def fully_connected_naive(input, weights, bias_data=None):
in_n, in_c, in_h, in_w = input.shape
w_h, w_c = weights.shape
x_data = np.reshape(input, [in_n, in_c * in_h * in_w])
w_data = np.transpose(np.reshape(weights, (w_c, in_c * in_h * in_w)))
result = None
if not bias_data:
result = np.dot(x_data, w_data)
else:
result = np.dot(x_data, w_data) + bias_data
return result
class MatrixGenerate:
def __init__(self, mb, ic, oc, h, w):
self.input = np.random.random((mb, ic, h, w)).astype("float32")
self.weights = np.random.random((ic * h * w, oc)).astype("float32")
class TestFCMKLDNNOp(OpTest):
def setUp(self):
self.op_type = "fc"
self.use_mkldnn = True
self.with_bias = True
self.matrix = MatrixGenerate(1, 10, 15, 3, 3)
self.inputs = {'Input': self.matrix.input, 'W': self.matrix.weights}
self.attrs = {
'use_mkldnn': self.use_mkldnn,
'with_bias': self.with_bias
}
self.outputs = {
'Out': fully_connected_naive(self.matrix.input, self.matrix.weights)
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(set(['Input', 'W']), 'Out', max_relative_error=0.9)
def test_check_grad_no_weight(self):
self.check_grad(
['Input'], 'Out', max_relative_error=0.5, no_grad_set=set('W'))
class TestFCMKLDNNOp1(TestFCMKLDNNOp):
def init_op_type(self):
self.matrix = MatrixGenerate(2, 15, 48, 2, 2)
class TestFCMKLDNNOp2(TestFCMKLDNNOp):
def init_op_type(self):
self.matrix = MatrixGenerate(2, 32, 40, 1, 1)
class TestFCMKLDNNOp3(TestFCMKLDNNOp):
def init_op_type(self):
self.matrix = MatrixGenerate(2, 2, 4, 1, 1)
class TestFCMKLDNNOp4(TestFCMKLDNNOp):
def init_op_type(self):
self.with_bias = False
self.matrix = MatrixGenerate(2, 32, 48, 2, 2)
class TestFCMKLDNNOp4(TestFCMKLDNNOp):
def init_op_type(self):
self.with_bias = False
self.matrix = MatrixGenerate(2, 32, 1000, 6, 6)
if __name__ == "__main__":
unittest.main()
......@@ -471,6 +471,7 @@ class Input(Cfg):
maxout=None,
spp=None,
pad=None,
upsample=None,
format=None,
nnz=None,
is_static=None,
......@@ -983,6 +984,13 @@ class Pad(Cfg):
self.add_keys(locals())
@config_class
class Upsample(Cfg):
def __init__(self, scale, scale_y, pad_out_x, pad_out_y, upsample_size,
upsample_size_y):
self.add_keys(locals())
@config_class
class Norm(Cfg):
def __init__(self,
......@@ -2380,6 +2388,46 @@ class SpatialPyramidPoolLayer(LayerBase):
self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels)
@config_layer('upsample')
class UpsampleLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
super(UpsampleLayer, self).__init__(
name, 'upsample', 0, inputs=inputs, **xargs)
input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].upsample_conf.image_conf
image_conf.img_size = input_layer.width
image_conf.img_size_y = input_layer.height
image_conf.channels = input_layer.size / (input_layer.width *
input_layer.height)
upsample = self.inputs[0].upsample
output_x = 0
output_y = 0
output_size = 0
if upsample.scale:
self.config.inputs[0].upsample_conf.scale = upsample.scale
self.config.inputs[0].upsample_conf.scale_y = upsample.scale_y
output_x = input_layer.width * upsample.scale
output_y = input_layer.height * upsample.scale_y
self.config.inputs[0].upsample_conf.pad_out_x = upsample.pad_out_x
self.config.inputs[0].upsample_conf.pad_out_y = upsample.pad_out_y
if upsample.upsample_size:
self.config.inputs[
0].upsample_conf.upsample_size = upsample.upsample_size
self.config.inputs[
0].upsample_conf.upsample_size_y = upsample.upsample_size_y
output_x = upsample.upsample_size
output_y = upsample.upsample_size_y
output_size = image_conf.channels * output_x * output_y
self.set_layer_height_width(output_y, output_x)
self.set_layer_depth(input_layer.depth)
self.set_layer_size(output_size)
@config_layer('pad')
class PadLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
......
......@@ -148,6 +148,7 @@ __all__ = [
'resize_layer',
'sub_seq_layer',
'scale_sub_region_layer',
'upsample_layer',
'factorization_machine',
]
......@@ -166,6 +167,7 @@ class LayerType(object):
SEQUENCE_RESHAPE = 'seqreshape'
POOLING_MAX = 'max'
POOLING_AVG = 'average'
UPSAMPLE_LAYER = 'upsample'
FC_LAYER = 'fc'
COST = 'cost'
COSINE_SIM_VEC = 'cos_vm'
......@@ -3014,6 +3016,83 @@ def img_pool3d_layer(input,
size=l.config.size)
@wrap_name_default("upsample")
@layer_support()
def upsample_layer(input,
name=None,
scale=None,
scale_y=None,
upsample_size=None,
upsample_size_y=None,
pad_out_x=False,
pad_out_y=False,
layer_attr=None):
"""
The DePooling process.
Inputs should be a list of length 2. The first input is a layer,
and the second input should be the MaxWithMaskPoolingLayer
The example usage is:
.. code-block:: python
pool1 = paddle.v2.layer.img_pool(input=input, pool_size=2, stride=2,
pool_type=paddle.pooling.MaxWithMask())
upsample = paddle.v2.layer.upsample(input=[layer1, pool1])
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: contains an input layer and a MaxWithMaskPoolingLayer
:type input: list | tuple | collections.Sequence
:param scale: outputSize = scale * inputSize
:type scale: int | list | tuple | .
:param scale_y: scale_y will be equal to scale, if it's value is None,
:type scale: int | None.
:param upsample_size: specify the outputSize.
:type upsample_size: int | list | tuple.
:param upsample_size_y: specify the y dimension outputSize.
:type upsample_size_y: int.
:param pad_out_x: specify exact x dimension size. This parameter only works when scale is 2
:type pad_out_x: bool.
:param pad_out_y: specify exact y dimension size. This parameter only works when scale is 2
:type pad_out_y: bool.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert (scale is not None) or (upsample_size is not None), \
'scale or upsample_size, there must be one to be designated'
assert len(input) == 2, 'layer input size must be 2'
assert input[1].layer_type == LayerType.POOL_LAYER, \
'the second input should be the MaxPoolWithMaskLayer'
scale_y = scale \
if scale is not None else scale_y
upsample_size_y = upsample_size \
if upsample_size is not None else upsample_size_y
layer_type = LayerType.UPSAMPLE_LAYER
layer = Layer(
name=name,
type=layer_type,
inputs=[
Input(
input[0].name,
upsample=Upsample(scale, scale_y, pad_out_x, pad_out_y,
upsample_size, upsample_size_y)),
Input(input[1].name)
],
**ExtraLayerAttribute.to_kwargs(layer_attr))
sz = layer.config.size
return LayerOutput(name, layer_type=layer_type, parents=input, size=sz)
@wrap_name_default("spp")
@layer_support()
def spp_layer(input,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册