提交 b545b5b8 编写于 作者: S superjom

Merge branch 'develop' of github.com:PaddlePaddle/Paddle into feature/recurrent_op_backward_fix

......@@ -106,22 +106,22 @@ function(merge_static_libs TARGET_NAME)
endforeach()
list(REMOVE_DUPLICATES libs_deps)
if(APPLE) # Use OSX's libtool to merge archives
# To produce a library we need at least one source file.
# It is created by add_custom_command below and will helps
# also help to track dependencies.
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${dummyfile}
COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile}
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
add_library(${TARGET_NAME} STATIC ${dummyfile})
file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
foreach(lib ${libs})
......@@ -130,11 +130,14 @@ function(merge_static_libs TARGET_NAME)
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
)
else() # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
foreach(lib ${libs})
set(objlistfile ${lib}.objlist) # list of objects in the input library
set(objdir ${lib}.objdir)
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)
add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
......@@ -142,31 +145,32 @@ function(merge_static_libs TARGET_NAME)
add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ../${objlistfile}
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# Empty dummy source file that goes into merged library
set(mergebase ${lib}.mergebase.c)
add_custom_command(OUTPUT ${mergebase}
COMMAND ${CMAKE_COMMAND} -E touch ${mergebase}
DEPENDS ${objlistfile})
list(APPEND mergebases "${mergebase}")
list(APPEND target_OBJS "${objlistfile}")
endforeach()
add_library(${TARGET_NAME} STATIC ${mergebases})
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})
# Generate dummy staic lib
file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
# Get the file name of the generated library
set(outlibfile "$<TARGET_FILE:${TARGET_NAME}>")
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
foreach(lib ${libs})
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} cr ${outlibfile} *.o
COMMAND ${CMAKE_RANLIB} ${outlibfile}
WORKING_DIRECTORY ${lib}.objdir)
endforeach()
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif()
endfunction(merge_static_libs)
......@@ -196,7 +200,7 @@ function(cc_library TARGET_NAME)
add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS})
else(cc_library_SRCS)
if (cc_library_DEPS)
if(cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
else()
message(FATAL "Please specify source file or library in cc_library.")
......
......@@ -25,7 +25,7 @@ function(target_circle_link_libraries TARGET_NAME)
endif()
endforeach()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang")
if(IOS AND NOT IOS_ENABLE_BITCODE)
if(NOT IOS_ENABLE_BITCODE)
list(APPEND LIBS "-undefined dynamic_lookup")
endif()
endif()
......
......@@ -158,17 +158,23 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。
7. \*-cp27mu-linux_x86_64.whl is not a supported wheel on this platform.
7. paddlepaddle\*.whl is not a supported wheel on this platform.
------------------------------------------------------------------------
出现这个问题的主要原因是,系统编译wheel包的时候,使用的 :code:`wheel` 包是最新的,
而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。
出现这个问题的主要原因是,没有找到和当前系统匹配的paddlepaddle安装包。最新的paddlepaddle python安装包支持Linux x86_64和MacOS 10.12操作系统,并安装了python 2.7和pip 9.0.1。
更新 :code:`pip` 包的方法是\:
.. code-block:: bash
pip install --upgrade pip
如果还不行,可以执行 :code:`python -c "import pip; print(pip.pep425tags.get_supported())"` 获取当前系统支持的python包的后缀,
并对比是否和正在安装的后缀一致。
如果系统支持的是 :code:`linux_x86_64` 而安装包是 :code:`manylinux1_x86_64` ,需要升级pip版本到最新;
如果系统支持 :code:`manylinux1_x86_64` 而安装包(本地)是 :code:`linux_x86_64` ,可以重命名这个whl包为 :code:`manylinux1_x86_64` 再安装。
8. python相关的单元测试都过不了
--------------------------------
......@@ -310,7 +316,7 @@ Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异
* 模型一直不收敛,发散到了一个数值特别大的地方。
* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。
主要的解决办法是减小学习或者对数据进行归一化处理。
主要的解决办法是减小学习或者对数据进行归一化处理。
15. 编译安装后执行 import paddle.v2 as paddle 报ImportError: No module named v2
------------------------------------------------------------------------
......@@ -373,3 +379,15 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数
parameters = paddle.parameters.create(my_cost)
parameters.set('emb', load_parameter(emb_param_file, 30000, 256))
18. 集群多节点训练,日志中保存均为网络通信类错误
------------------------------
集群多节点训练,日志报错为网络通信类错误,比如 :code:`Connection reset by peer` 等。
此类报错通常是由于某一个节点的错误导致这个节点的训练进程退出,从而引发其他节点无法连接导致,可以参考下面的步骤排查:
* 从 :code:`train.log` , :code:`server.log` 找到最早报错的地方,查看是否是其他错误引发的报错(比如FPE,内存不足,磁盘空间不足等)。
* 如果发现最早的报错就是网络通信的问题,很有可能是非独占方式执行导致的端口冲突,可以联系OP,看当前MPI集群是否支持resource=full参数提交,如果支持增加此参数提交,并更换job 端口。
* 如果当前MPI集群并不支持任务独占模式,可以联系OP是否可以更换集群或升级当前集群。
\ No newline at end of file
......@@ -62,6 +62,7 @@ if(ANDROID)
LIBRARY DESTINATION lib/${ANDROID_ABI})
execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=oneline -1
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMITS_LIST
RESULT_VARIABLE GIT_COMMITS_LIST_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
......@@ -81,8 +82,7 @@ if(ANDROID)
)"
)
else(ANDROID)
install(TARGETS paddle_capi_whole
ARCHIVE DESTINATION lib)
install(TARGETS paddle_capi_whole ARCHIVE DESTINATION lib)
if(NOT IOS)
install(TARGETS paddle_capi_shared DESTINATION lib)
endif()
......
......@@ -19,6 +19,19 @@ limitations under the License. */
namespace paddle {
namespace framework {
static ProgramDesc* g_program_desc = nullptr;
ProgramDesc& GetProgramDesc() {
if (g_program_desc == nullptr) {
g_program_desc = new ProgramDesc();
}
return *g_program_desc;
}
template <>
AttrType AttrTypeID<bool>() {
return BOOLEAN;
}
template <>
AttrType AttrTypeID<int>() {
return INT;
......@@ -32,6 +45,10 @@ AttrType AttrTypeID<std::string>() {
return STRING;
}
template <>
AttrType AttrTypeID<std::vector<bool>>() {
return BOOLEANS;
}
template <>
AttrType AttrTypeID<std::vector<int>>() {
return INTS;
}
......@@ -47,40 +64,54 @@ template <>
AttrType AttrTypeID<std::vector<std::pair<int, int>>>() {
return INT_PAIRS;
}
template <>
AttrType AttrTypeID<BlockDesc>() {
return BLOCK;
}
Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
switch (attr_desc.type()) {
case paddle::framework::AttrType::INT: {
case framework::AttrType::BOOLEAN: {
return attr_desc.b();
}
case framework::AttrType::INT: {
return attr_desc.i();
}
case paddle::framework::AttrType::FLOAT: {
case framework::AttrType::FLOAT: {
return attr_desc.f();
}
case paddle::framework::AttrType::STRING: {
case framework::AttrType::STRING: {
return attr_desc.s();
}
case paddle::framework::AttrType::INTS: {
case framework::AttrType::BOOLEANS: {
std::vector<bool> val(attr_desc.bools_size());
for (int i = 0; i < attr_desc.bools_size(); ++i) {
val[i] = attr_desc.bools(i);
}
return val;
}
case framework::AttrType::INTS: {
std::vector<int> val(attr_desc.ints_size());
for (int i = 0; i < attr_desc.ints_size(); ++i) {
val[i] = attr_desc.ints(i);
}
return val;
}
case paddle::framework::AttrType::FLOATS: {
case framework::AttrType::FLOATS: {
std::vector<float> val(attr_desc.floats_size());
for (int i = 0; i < attr_desc.floats_size(); ++i) {
val[i] = attr_desc.floats(i);
}
return val;
}
case paddle::framework::AttrType::STRINGS: {
case framework::AttrType::STRINGS: {
std::vector<std::string> val(attr_desc.strings_size());
for (int i = 0; i < attr_desc.strings_size(); ++i) {
val[i] = attr_desc.strings(i);
}
return val;
}
case paddle::framework::AttrType::INT_PAIRS: {
case framework::AttrType::INT_PAIRS: {
std::vector<std::pair<int, int>> val(attr_desc.int_pairs_size());
for (int i = 0; i < attr_desc.int_pairs_size(); ++i) {
val[i].first = attr_desc.int_pairs(i).first();
......@@ -88,6 +119,9 @@ Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
}
return val;
}
case framework::AttrType::BLOCK: {
return GetProgramDesc().mutable_blocks(attr_desc.block_idx());
}
}
PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !");
return boost::blank();
......
......@@ -27,13 +27,16 @@ limitations under the License. */
namespace paddle {
namespace framework {
typedef boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>,
std::vector<std::pair<int, int>>>
typedef boost::variant<boost::blank, bool, int, float, std::string,
std::vector<bool>, std::vector<int>, std::vector<float>,
std::vector<std::string>,
std::vector<std::pair<int, int>>, BlockDesc*>
Attribute;
typedef std::unordered_map<std::string, Attribute> AttributeMap;
ProgramDesc& GetProgramDesc();
template <typename T>
AttrType AttrTypeID();
......
......@@ -166,9 +166,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// If part of input gradient of that operator is not calculated, fill
// zero variables to that input gradient.
net->AppendOp(OpRegistry::CreateOp("fill_zeros_like",
{{"Src", {prefix}}},
{{"Dst", {grad_input}}}, {}));
net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
{{"Y", {grad_input}}}, {}));
}
return false;
});
......
......@@ -127,8 +127,8 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
public:
FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Src", "x");
AddOutput("Dst", "out");
AddInput("X", "x");
AddOutput("Y", "out");
AddComment("");
}
};
......@@ -325,10 +325,10 @@ TEST(Backward, op_part_of_output_are_not_need) {
auto &fill_zero = *net->ops_[0];
ASSERT_EQ("fill_zeros_like", fill_zero.Type());
ASSERT_EQ(1UL, fill_zero.Inputs("Src").size());
ASSERT_EQ("Z", fill_zero.Input("Src"));
ASSERT_EQ(1UL, fill_zero.Outputs("Dst").size());
ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Dst"));
ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
ASSERT_EQ("Z", fill_zero.Input("X"));
ASSERT_EQ(1UL, fill_zero.Outputs("Y").size());
ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Y"));
auto &d_many_out = *net->ops_[1];
ASSERT_EQ("many_output_op_grad", d_many_out.Type());
......
......@@ -292,5 +292,13 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims) {
DDim flatten_to_1d(const DDim& src) { return make_ddim({product(src)}); }
DDim stride(const DDim& ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = 1;
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i + 1];
}
return framework::make_ddim(strides);
}
} // namespace framework
} // namespace paddle
......@@ -121,6 +121,7 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
DDim flatten_to_1d(const DDim& src);
DDim stride(const DDim& ddim);
} // namespace framework
} // namespace paddle
......
......@@ -23,6 +23,9 @@ enum AttrType {
FLOATS = 4;
STRINGS = 5;
INT_PAIRS = 6;
BOOLEAN = 7;
BOOLEANS = 8;
BLOCK = 9;
}
message IntPair {
......@@ -44,6 +47,9 @@ message OpDesc {
repeated float floats = 7;
repeated string strings = 8;
repeated IntPair int_pairs = 9;
optional bool b = 10;
repeated bool bools = 11;
optional int32 block_idx = 12;
};
message Var {
......@@ -108,3 +114,12 @@ message VarDesc {
required string name = 1;
optional LoDTensorDesc lod_tensor = 2;
}
message BlockDesc {
required int32 idx = 1;
required int32 parent_idx = 2;
repeated VarDesc vars = 3;
repeated OpDesc ops = 4;
}
message ProgramDesc { repeated BlockDesc blocks = 1; }
......@@ -207,23 +207,22 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
}
template <>
Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
auto* var = OutputVar(name);
return var == nullptr ? nullptr : const_cast<Tensor*>(GetTensorFromVar(var));
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const {
auto var = OutputVar(name);
return var == nullptr ? nullptr : var->GetMutable<LoDTensor>();
}
template <>
std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
const std::string& name) const {
auto names = op().Outputs(name);
std::vector<Tensor*> res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope().FindVar(sub_name);
return var == nullptr
? nullptr
: const_cast<Tensor*>(GetTensorFromVar(var));
auto var = scope_.FindVar(sub_name);
return var == nullptr ? nullptr
: var->GetMutable<LoDTensor>();
});
return res;
}
......
......@@ -212,9 +212,9 @@ class InferShapeContext {
return res;
}
std::vector<const Variable*> MultiOutputVar(const std::string& name) const {
std::vector<Variable*> MultiOutputVar(const std::string& name) const {
auto names = op_.Outputs(name);
std::vector<const Variable*> res;
std::vector<Variable*> res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) {
......@@ -271,6 +271,20 @@ class InferShapeContext {
return &var->Get<Tensor>();
}
void ShareLoD(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) const {
PADDLE_ENFORCE_LT(i, InputSize(in));
PADDLE_ENFORCE_LT(j, OutputSize(out));
auto* in_var = MultiInputVar(in)[i];
auto* out_var = MultiOutputVar(out)[j];
if (!in_var->IsType<LoDTensor>()) return;
PADDLE_ENFORCE(out_var->IsType<LoDTensor>(),
"The %d-th output of Output(%s) must be LoDTensor.", j, out);
auto in_tensor = in_var->Get<LoDTensor>();
auto* out_tensor = out_var->GetMutable<LoDTensor>();
out_tensor->set_lod(in_tensor.lod());
}
private:
const OperatorBase& op_;
const Scope& scope_;
......@@ -283,6 +297,13 @@ template <>
const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
const std::string& name) const;
template <>
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const;
template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
const std::string& name) const;
template <typename T>
struct EigenDeviceConverter;
......@@ -315,38 +336,10 @@ class ExecutionContext : public InferShapeContext {
return device_context_;
}
// redefine Output function,
// use Variable::Get instead of Variable::GetMutable
template <typename T>
T* Output(const std::string& name) const {
auto var = OutputVar(name);
return var == nullptr ? nullptr : const_cast<T*>(&var->Get<T>());
}
// redefine MultiOutput function.
// use Variable::Get instead of Variable::GetMutable
template <typename T>
std::vector<T*> MultiOutput(const std::string& name) const {
auto names = op().Outputs(name);
std::vector<T*> res;
res.reserve(names.size());
std::transform(
names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) { return Output<T>(sub_name); });
return res;
}
private:
const platform::DeviceContext& device_context_;
};
template <>
Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
template <>
std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const;
class OpKernel {
public:
/**
......
......@@ -55,6 +55,13 @@ function(op_library TARGET)
set(pybind_flag 1)
endif()
# activation_op contains several operators
if ("${TARGET}" STREQUAL "activation_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_OP(sigmoid);\n")
endif()
# pybind USE_NO_KERNEL_OP
file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}")
......
......@@ -39,7 +39,8 @@ class AccuracyOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(inference->dims()[0], label->dims()[0],
"inference size must be the same as label size");
ctx.Output<framework::LoDTensor>("Accuracy")->Resize({1});
ctx.Output<framework::Tensor>("Accuracy")->Resize({1});
ctx.ShareLoD("Inference", /*->*/ "Accuracy");
}
};
......@@ -54,11 +55,15 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
// TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch");
AddComment(
R"DOC(Accuracy. It will print accuracy rate for classification.
AddComment(R"DOC(
Accuracy. It will print accuracy rate for classification.
The accuracy is:
.. math::
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})DOC");
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
Both the input `Inference` and `Label` can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD with input `Inference`.
)DOC");
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/activation_op.h"
namespace paddle {
namespace operators {
class ActivationOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<framework::Tensor>("Y")->Resize(
ctx.Input<framework::Tensor>("X")->dims());
ctx.ShareLoD("X", /*->*/ "Y");
}
};
class ActivationOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<framework::Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<framework::Tensor>("Y")->dims());
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sigmoid operator");
AddOutput("Y", "Output of Sigmoid operator");
AddComment("Sigmoid activation operator, sigmoid = 1 / (1 + exp(-x))");
}
};
class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ExpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Exp operator");
AddOutput("Y", "Output of Exp operator");
AddComment("Exp activation operator, exp(x) = e^x");
}
};
class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu operator");
AddOutput("Y", "Output of Relu operator");
AddComment("Relu activation operator, relu(x) = max(x, 0)");
}
};
class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Tanh operator");
AddOutput("Y", "Output of Tanh operator");
AddComment(
"Tanh activation operator, tanh = (exp(x) - exp(-x)) / (exp(x) + "
"exp(-x))");
}
};
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sqrt operator");
AddOutput("Y", "Output of Sqrt operator");
AddComment("Sqrt activation operator, sqrt(x) = x^(1/2)");
}
};
class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AbsOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Abs operator");
AddOutput("Y", "Output of Abs operator");
AddComment("Abs activation operator, abs(x) = |x|");
}
};
class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ReciprocalOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Reciprocal operator");
AddOutput("Y", "Output of Reciprocal operator");
AddComment("Reciprocal activation operator, reciprocal(x) = 1 / x");
}
};
class LogOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LogOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Log operator");
AddOutput("Y", "Output of Log operator");
AddComment("Log activation operator, log(x) = natural logarithm of x");
}
};
class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SquareOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Square operator");
AddOutput("Y", "Output of Square operator");
AddComment("Square activation operator, square(x) = x^2");
}
};
template <typename AttrType>
class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of BRelu operator");
AddOutput("Y", "Output of BRelu operator");
AddComment("BRelu activation operator, brelu = max(min(x, t_min), t_max)");
AddAttr<AttrType>("t_min", "The min marginal value of BRelu")
.SetDefault(static_cast<AttrType>(0));
AddAttr<AttrType>("t_max", "The max marginal value of BRelu")
.SetDefault(static_cast<AttrType>(24));
}
};
template <typename AttrType>
class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftReluOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of SoftRelu operator");
AddOutput("Y", "Output of SoftRelu operator");
AddComment(
"SoftRelu activation operator, soft_relu = log(1 + exp(max(min(x, "
"threshold), threshold)))");
AddAttr<AttrType>("threshold", "The threshold value of SoftRelu")
.SetDefault(static_cast<AttrType>(40));
}
};
template <typename AttrType>
class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public:
PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Pow operator");
AddOutput("Y", "Output of Pow operator");
AddComment("Pow activation operator, pow(x, factor) = x^factor");
AddAttr<AttrType>("factor", "The exponential factor of Pow")
.SetDefault(static_cast<AttrType>(1));
}
};
template <typename AttrType>
class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
public:
STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of STanh operator");
AddOutput("Y", "Output of STanh operator");
AddComment("STanh activation operator, stanh = b * tanh(a * x)");
AddAttr<AttrType>("scale_a", "The scale parameter of a for the input")
.SetDefault(static_cast<AttrType>(2 / 3));
AddAttr<AttrType>("scale_b", "The scale parameter of b for the input")
.SetDefault(static_cast<AttrType>(1.7159));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(sigmoid,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::SigmoidFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
sigmoid_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SigmoidGradFunctor<float>>);
REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
exp,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::ExpFunctor>);
REGISTER_OP_CPU_KERNEL(exp_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace,
float, ops::ExpGradFunctor>);
REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(relu,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::ReluFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
relu_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::ReluGradFunctor<float>>);
REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
tanh,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::TanhFunctor>);
REGISTER_OP_CPU_KERNEL(
tanh_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::TanhGradFunctor<float>>);
REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
sqrt,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::SqrtFunctor>);
REGISTER_OP_CPU_KERNEL(
sqrt_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SqrtGradFunctor<float>>);
REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
abs,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::AbsFunctor>);
REGISTER_OP_CPU_KERNEL(abs_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace,
float, ops::AbsGradFunctor>);
REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker,
reciprocal_grad, ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(reciprocal,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::ReciprocalFunctor<float>>);
REGISTER_OP_CPU_KERNEL(
reciprocal_grad,
ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::ReciprocalGradFunctor<float>>);
REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(
log,
ops::ActivationKernel<paddle::platform::CPUPlace, float, ops::LogFunctor>);
REGISTER_OP_CPU_KERNEL(
log_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::LogGradFunctor<float>>);
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(square,
ops::ActivationKernel<paddle::platform::CPUPlace, float,
ops::SquareFunctor>);
REGISTER_OP_CPU_KERNEL(
square_grad, ops::ActivationGradKernel<paddle::platform::CPUPlace, float,
ops::SquareGradFunctor<float>>);
REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(brelu,
ops::BReluKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(brelu_grad,
ops::BReluGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
soft_relu_grad, ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(soft_relu,
ops::SoftReluKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(pow, ops::PowKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(pow_grad,
ops::PowGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
ops::ActivationOpGrad);
REGISTER_OP_CPU_KERNEL(stanh,
ops::STanhKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(stanh_grad,
ops::STanhGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/activation_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(sigmoid,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::SigmoidFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
sigmoid_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SigmoidGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
exp,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::ExpFunctor>);
REGISTER_OP_GPU_KERNEL(exp_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace,
float, ops::ExpGradFunctor>);
REGISTER_OP_GPU_KERNEL(relu,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::ReluFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
relu_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::ReluGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
tanh,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::TanhFunctor>);
REGISTER_OP_GPU_KERNEL(
tanh_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::TanhGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
sqrt,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::SqrtFunctor>);
REGISTER_OP_GPU_KERNEL(
sqrt_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SqrtGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
abs,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::AbsFunctor>);
REGISTER_OP_GPU_KERNEL(abs_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace,
float, ops::AbsGradFunctor>);
REGISTER_OP_GPU_KERNEL(reciprocal,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::ReciprocalFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
reciprocal_grad,
ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::ReciprocalGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(
log,
ops::ActivationKernel<paddle::platform::GPUPlace, float, ops::LogFunctor>);
REGISTER_OP_GPU_KERNEL(
log_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::LogGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(square,
ops::ActivationKernel<paddle::platform::GPUPlace, float,
ops::SquareFunctor>);
REGISTER_OP_GPU_KERNEL(
square_grad, ops::ActivationGradKernel<paddle::platform::GPUPlace, float,
ops::SquareGradFunctor<float>>);
REGISTER_OP_GPU_KERNEL(brelu,
ops::BReluKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(brelu_grad,
ops::BReluGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(soft_relu,
ops::SoftReluKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
soft_relu_grad, ops::SoftReluGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(pow, ops::PowKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(pow_grad,
ops::PowGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(stanh,
ops::STanhKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(stanh_grad,
ops::STanhGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T, typename Functor>
class ActivationKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Output<framework::Tensor>("Y");
Y->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto place = context.GetEigenDevice<Place>();
Functor functor;
functor(place, x, y);
}
};
template <typename Place, typename T, typename Functor>
class ActivationGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Input<framework::Tensor>("Y");
auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
dX->mutable_data<T>(context.GetPlace());
auto dy = framework::EigenVector<T>::Flatten(*dY);
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto place = context.GetEigenDevice<Place>();
Functor functor;
functor(place, x, y, dy, dx);
}
};
// sigmoid(x) = 1 / (1 + exp(-x))
template <typename T>
struct SigmoidFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = static_cast<T>(1) / (static_cast<T>(1) + (-x).exp());
}
};
template <typename T>
struct SigmoidGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * y * (static_cast<T>(1) - y);
}
};
// exp(x) = e^x
struct ExpFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.exp();
}
};
struct ExpGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * y;
}
};
// relu(x) = max(x, 0)
template <typename T>
struct ReluFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.cwiseMax(static_cast<T>(0));
}
};
template <typename T>
struct ReluGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * (x > static_cast<T>(0)).template cast<T>();
}
};
// tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
struct TanhFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.tanh();
}
};
template <typename T>
struct TanhGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * (static_cast<T>(1) - y * y);
}
};
// sqrt(x) = x^(1/2)
struct SqrtFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.sqrt();
}
};
template <typename T>
struct SqrtGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
const Y y_conj = Eigen::numext::conj(y);
dx.device(d) = static_cast<T>(0.5) * dy / y_conj;
}
};
// abs(x) = |x|
struct AbsFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.abs();
}
};
struct AbsGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * x.sign();
}
};
// reciprocal(x) = 1 / x
template <typename T>
struct ReciprocalFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = static_cast<T>(1) / x;
}
};
template <typename T>
struct ReciprocalGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * static_cast<T>(-1) * y * y;
}
};
// log(x) = natural logarithm of x
struct LogFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.log();
}
};
template <typename T>
struct LogGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * (static_cast<T>(1) / x);
}
};
// square(x) = x^2
struct SquareFunctor {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
y.device(d) = x.square();
}
};
template <typename T>
struct SquareGradFunctor {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
dx.device(d) = dy * static_cast<T>(2) * x;
}
};
template <typename Place, typename T, typename AttrType = T>
class BReluKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Output<framework::Tensor>("Y");
auto t_min = static_cast<T>(context.Attr<AttrType>("t_min"));
auto t_max = static_cast<T>(context.Attr<AttrType>("t_max"));
Y->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto place = context.GetEigenDevice<Place>();
y.device(place) = x.cwiseMax(t_min).cwiseMin(t_max);
}
};
template <typename Place, typename T, typename AttrType = T>
class BReluGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto t_min = static_cast<T>(context.Attr<AttrType>("t_min"));
auto t_max = static_cast<T>(context.Attr<AttrType>("t_max"));
dX->mutable_data<T>(context.GetPlace());
auto dy = framework::EigenVector<T>::Flatten(*dY);
auto x = framework::EigenVector<T>::Flatten(*X);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto place = context.GetEigenDevice<Place>();
dx.device(place) = dy * ((x > t_min) * (x < t_max)).template cast<T>();
}
};
template <typename Place, typename T, typename AttrType = T>
class SoftReluKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Output<framework::Tensor>("Y");
auto threshold = static_cast<T>(context.Attr<AttrType>("threshold"));
Y->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto place = context.GetEigenDevice<Place>();
auto temp = x.cwiseMax(-threshold).cwiseMin(threshold).eval();
y.device(place) = (static_cast<T>(1) + temp.exp()).log();
}
};
template <typename Place, typename T, typename AttrType = T>
class SoftReluGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Input<framework::Tensor>("Y");
auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto threshold = static_cast<T>(context.Attr<AttrType>("threshold"));
dX->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto dy = framework::EigenVector<T>::Flatten(*dY);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto place = context.GetEigenDevice<Place>();
auto temp = ((x > -threshold) * (x < threshold)).template cast<T>().eval();
dx.device(place) = dy * (static_cast<T>(1) - (-y).exp()) * temp;
}
};
template <typename Place, typename T, typename AttrType = T>
class PowKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Output<framework::Tensor>("Y");
auto factor = static_cast<T>(context.Attr<AttrType>("factor"));
Y->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto place = context.GetEigenDevice<Place>();
y.device(place) = x.pow(factor);
}
};
template <typename Place, typename T, typename AttrType = T>
class PowGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto factor = static_cast<T>(context.Attr<AttrType>("factor"));
dX->mutable_data<T>(context.GetPlace());
auto dy = framework::EigenVector<T>::Flatten(*dY);
auto x = framework::EigenVector<T>::Flatten(*X);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto place = context.GetEigenDevice<Place>();
dx.device(place) = dy * factor * x.pow(factor - static_cast<T>(1));
}
};
template <typename Place, typename T, typename AttrType = T>
class STanhKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* Y = context.Output<framework::Tensor>("Y");
auto scale_a = static_cast<T>(context.Attr<AttrType>("scale_a"));
auto scale_b = static_cast<T>(context.Attr<AttrType>("scale_b"));
Y->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto y = framework::EigenVector<T>::Flatten(*Y);
auto place = context.GetEigenDevice<Place>();
y.device(place) = scale_b * (scale_a * x).tanh();
}
};
template <typename Place, typename T, typename AttrType = T>
class STanhGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<framework::Tensor>("X");
auto* dY = context.Input<framework::Tensor>(framework::GradVarName("Y"));
auto* dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto scale_a = static_cast<T>(context.Attr<AttrType>("scale_a"));
auto scale_b = static_cast<T>(context.Attr<AttrType>("scale_b"));
dX->mutable_data<T>(context.GetPlace());
auto dy = framework::EigenVector<T>::Flatten(*dY);
auto x = framework::EigenVector<T>::Flatten(*X);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto place = context.GetEigenDevice<Place>();
auto temp = (scale_a * x).tanh() * (scale_a * x).tanh();
dx.device(place) = dy * scale_a * scale_b * (static_cast<T>(1) - temp);
}
};
} // namespace operators
} // namespace paddle
......@@ -33,7 +33,7 @@ class AddOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims(),
ctx.Input<Tensor>("Y")->dims(),
"Two input of Add Op's dimension must be same.");
ctx.Output<framework::LoDTensor>("Out")->Resize(
ctx.Output<framework::Tensor>("Out")->Resize(
ctx.Input<Tensor>("X")->dims());
}
};
......
......@@ -17,8 +17,6 @@
namespace paddle {
namespace operators {
using framework::LoDTensor;
class ClipOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -29,11 +27,12 @@ class ClipOp : public framework::OperatorWithKernel {
"Input(X) of ClipOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
"Output(Out) of ClipOp should not be null.");
auto x_dims = ctx.Input<LoDTensor>("X")->dims();
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto max = Attr<float>("max");
auto min = Attr<float>("min");
PADDLE_ENFORCE_LT(min, max, "max should be greater than min.");
ctx.Output<LoDTensor>("Out")->Resize(x_dims);
ctx.Output<Tensor>("Out")->Resize(x_dims);
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -66,8 +65,8 @@ class ClipOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<LoDTensor>("X")->dims();
auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
if (x_grad != nullptr) {
x_grad->Resize(x_dims);
}
......
......@@ -29,7 +29,7 @@ class ConcatOp : public framework::OperatorWithKernel {
"Output(Out) of ConcatOp should not be null.");
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *out = ctx.Output<framework::Tensor>("Out");
size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
size_t n = ins.size();
......
......@@ -37,7 +37,7 @@ class Conv2DOp : public framework::OperatorWithKernel {
auto in = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto out = ctx.Output<framework::LoDTensor>("Output");
auto out = ctx.Output<framework::Tensor>("Output");
std::vector<int> strides = Attr<std::vector<int>>("strides");
std::vector<int> paddings = Attr<std::vector<int>>("paddings");
int groups = Attr<int>("groups");
......@@ -111,10 +111,9 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
void InferShape(const framework::InferShapeContext &ctx) const override {
auto in = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto d_in =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Input"));
auto d_in = ctx.Output<framework::Tensor>(framework::GradVarName("Input"));
auto d_filter =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Filter"));
ctx.Output<framework::Tensor>(framework::GradVarName("Filter"));
if (d_in) d_in->Resize(in->dims());
if (d_filter) d_filter->Resize(filter->dims());
}
......
......@@ -54,9 +54,10 @@ class CosSimOp : public framework::OperatorWithKernel {
" just 1 (which will be broadcasted to match Input(X)).");
// resize tensor
ctx.Output<framework::LoDTensor>("Out")->Resize({x_dims[0], 1});
ctx.Output<framework::LoDTensor>("XNorm")->Resize({x_dims[0], 1});
ctx.Output<framework::LoDTensor>("YNorm")->Resize({y_dims[0], 1});
ctx.Output<framework::Tensor>("Out")->Resize({x_dims[0], 1});
ctx.Output<framework::Tensor>("XNorm")->Resize({x_dims[0], 1});
ctx.Output<framework::Tensor>("YNorm")->Resize({y_dims[0], 1});
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -81,10 +82,13 @@ Cosine Similarity Operator.
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
Input(X) and Input(Y) must have the same shape, except that the 1st dimension
of Input(Y) could be just 1 (different from Input(X)), which will be
broadcasted to match the shape of Input(X) before computing their cosine
The input `X` and `Y` must have the same shape, except that the 1st dimension
of input `Y` could be just 1 (different from input `X`), which will be
broadcasted to match the shape of input `X` before computing their cosine
similarity.
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
)DOC");
}
};
......@@ -139,10 +143,8 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
"Shape of Input(Out@Grad) must be [X.Dim(0), 1].");
// resize tensor
auto *x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *y_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
auto *x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto *y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
if (x_grad) x_grad->Resize(x_dims);
if (y_grad) y_grad->Resize(y_dims);
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/crop_op.h"
#include <boost/lexical_cast.hpp>
namespace paddle {
namespace operators {
using framework::Tensor;
class CropOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of CropOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
"Output(Out) of CropOp should not be null.");
auto x_dim = ctx.Input<Tensor>("X")->dims();
auto *y = ctx.Input<Tensor>("Y");
auto *out = ctx.Output<Tensor>("Out");
if (y == nullptr) {
auto shape = Attr<std::vector<int>>("shape");
PADDLE_ENFORCE_EQ(
int64_t(shape.size()), x_dim.size(),
"Shape size should be equal to dimention size of input tensor.");
std::vector<int64_t> tensor_shape(shape.size());
for (size_t i = 0; i < shape.size(); ++i) {
tensor_shape[i] = static_cast<int64_t>(shape[i]);
}
out->Resize(framework::make_ddim(tensor_shape));
} else {
PADDLE_ENFORCE_EQ(framework::arity(x_dim), framework::arity(y->dims()),
"Tensor rank of both CropOp's "
"inputs must be same.");
out->Resize(y->dims());
}
}
};
class CropOpMaker : public framework::OpProtoAndCheckerMaker {
public:
CropOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)");
AddInput("Y",
"The input used as reference for cropping"
" with the same dimension as X. ");
AddOutput("Out",
"The output of crop op "
"with the same dimension as X.");
AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped."
"The size of offsets list should be as same as "
"dimension size of input X.");
AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output."
"The size of shape list should be as same as "
"dimension size of input X.")
.SetDefault(std::vector<int>());
AddComment(R"DOC(
Crop Operator.
Crop input into output, as specified by offsets and shape.
There are two ways to set shape:
1. referenc input: crop input X as shape as reference input.
The dimension of reference input should
be as same as input X.
2. shape list: crop input X by shape described by a list<int>.
The size of shape list should be as same as
dimension size of input X.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
Given:
X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]]
and
offsets = [0, 1]
and
shape = [2, 2]
then we get
Out = [[1, 2],
[3, 4]]
)DOC");
}
};
class CropOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
if (x_grad != nullptr) {
x_grad->Resize(x_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(crop, ops::CropOp, ops::CropOpMaker, crop_grad, ops::CropOpGrad);
REGISTER_OP_CPU_KERNEL(crop, ops::CropKernel<float>);
REGISTER_OP_CPU_KERNEL(crop_grad,
ops::CropGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/crop_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(crop, ops::CropKernel<float>);
REGISTER_OP_GPU_KERNEL(crop_grad,
ops::CropGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 CropdleCropdle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
namespace operators { // Internal
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using framework::Tensor;
template <typename T>
class CropKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X");
auto* out = context.Output<Tensor>("Out");
const T* x_data = x->data<T>();
T* out_data = out->mutable_data<T>(context.GetPlace());
auto x_stride = framework::stride(x->dims());
auto out_stride = framework::stride(out->dims());
auto offsets = context.Attr<std::vector<int>>("offsets");
PADDLE_ENFORCE_EQ(
x->dims().size(), offsets.size(),
"Offsets size should be equal to dimension size of input tensor.");
int64_t offset = 0;
for (int i = 0; i < offsets.size(); ++i) {
offset += (x_stride[i] * offsets[i]);
}
StridedMemcpy<T>(context.device_context(), x_data + offset, x_stride,
out->dims(), out_stride, out_data);
}
};
template <typename Place, typename T, size_t D>
void CropGradFunction(const framework::ExecutionContext& context) {
auto* d_x = context.Output<Tensor>(framework::GradVarName("X"));
if (d_x != nullptr) {
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
d_x->mutable_data<T>(context.GetPlace());
auto offsets = context.Attr<std::vector<int>>("offsets");
Eigen::array<std::pair<int, int>, D> paddings;
for (int i = 0; i < D; ++i) {
paddings[i].first = offsets[i];
paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i];
}
auto d_x_tensor = EigenTensor<T, D>::From(*d_x);
auto d_out_tensor = EigenTensor<T, D>::From(*d_out);
d_x_tensor.device(context.GetEigenDevice<Place>()) =
d_out_tensor.pad(paddings, 0);
}
}
template <typename Place, typename T>
class CropGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
size_t rank =
context.Input<Tensor>(framework::GradVarName("Out"))->dims().size();
switch (rank) {
case 1:
CropGradFunction<Place, T, 1>(context);
break;
case 2:
CropGradFunction<Place, T, 2>(context);
break;
case 3:
CropGradFunction<Place, T, 3>(context);
break;
case 4:
CropGradFunction<Place, T, 4>(context);
break;
case 5:
CropGradFunction<Place, T, 5>(context);
break;
case 6:
CropGradFunction<Place, T, 6>(context);
break;
default:
PADDLE_THROW(
"CropOp only support tensors with no more than 6 dimensions.");
}
}
};
} // namespace operators
} // namespace paddle
......@@ -17,8 +17,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
using framework::LoDTensor;
class CrossEntropyOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -35,23 +33,21 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(x->dims().size(), 2, "Input(X)'s rank must be 2.");
PADDLE_ENFORCE_EQ(label->dims().size(), 2,
"Input(Label)'s rank must be 2.");
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
ctx.Attr<int>("soft_label") == 1);
PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
"The 1st dimension of Input(X) and Input(Label) must "
"be equal.");
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
"If Attr(soft_label) == 1, The 2nd dimension of "
"If Attr(soft_label) == true, The 2nd dimension of "
"Input(X) and Input(Label) must be equal.");
} else {
PADDLE_ENFORCE_EQ(label->dims()[1], 1,
"If Attr(soft_label) == 0, The 2nd dimension of "
"If Attr(soft_label) == false, The 2nd dimension of "
"Input(Label) must be 1.");
}
ctx.Output<LoDTensor>("Y")->Resize({x->dims()[0], 1});
ctx.Output<Tensor>("Y")->Resize({x->dims()[0], 1});
ctx.ShareLoD("X", /*->*/ "Y");
}
};
......@@ -74,9 +70,6 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(dy->dims().size(), 2, "Input(Y@Grad)'s rank must be 2.");
PADDLE_ENFORCE_EQ(label->dims().size(), 2,
"Input(Label)'s rank must be 2.");
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE(ctx.Attr<int>("soft_label") == 0 ||
ctx.Attr<int>("soft_label") == 1);
PADDLE_ENFORCE_EQ(x->dims()[0], label->dims()[0],
"The 1st dimension of Input(X) and Input(Label) must "
"be equal.");
......@@ -85,17 +78,17 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"be equal.");
PADDLE_ENFORCE_EQ(dy->dims()[1], 1,
"The 2nd dimension of Input(Y@Grad) must be 1.");
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x->dims()[1], label->dims()[1],
"If Attr(soft_label) == 1, The 2nd dimension of "
"If Attr(soft_label) == true, The 2nd dimension of "
"Input(X) and Input(Label) must be equal.");
} else {
PADDLE_ENFORCE_EQ(label->dims()[1], 1,
"If Attr(soft_label) == 0, The 2nd dimension of "
"If Attr(soft_label) == false, The 2nd dimension of "
"Input(Label) must be 1.");
}
auto dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
auto dx = ctx.Output<Tensor>(framework::GradVarName("X"));
dx->Resize(x->dims());
}
};
......@@ -108,7 +101,8 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The first input of CrossEntropyOp");
AddInput("Label", "The second input of CrossEntropyOp");
AddOutput("Y", "The output of CrossEntropyOp");
AddAttr<int>("soft_label", "Is soft label. Default zero.").SetDefault(0);
AddAttr<bool>("soft_label", "Is soft label. Default zero.")
.SetDefault(false);
AddComment(R"DOC(
CrossEntropy Operator.
......@@ -116,12 +110,12 @@ CrossEntropy Operator.
It supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy:
soft_label = 0, Label[i, 0] indicates the class index for sample i:
soft_label = False, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]])
2) Soft-label cross-entropy:
soft_label = 1, Label[i, j] indicates the soft label of class j
soft_label = True, Label[i, j] indicates the soft label of class j
for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
......@@ -133,6 +127,9 @@ computation.
As a special case of 2), when each row of Input(Label) has only one
non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation.
Both the input `X` and `Label` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
)DOC");
}
};
......
......@@ -102,7 +102,7 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel {
int grid = (n + block - 1) / block;
// TODO(qingqing) launch kernel on specified stream
// base on ExecutionContext.
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
SoftCrossEntropyKernel<T><<<grid, block>>>(y_data, x_data, label_data, n,
d);
......@@ -137,7 +137,7 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
grid = (n + block - 1) / block;
// TODO(qingqing): launch kernel on specified stream
// base on ExecutionContext.
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
auto* label_data = label->data<T>();
SoftCrossEntropyGradientKernel<T><<<grid, block>>>(
dx_data, dy_data, x_data, label_data, n, d);
......
......@@ -51,7 +51,7 @@ class CrossEntropyOpKernel : public framework::OpKernel {
int batch_size = x->dims()[0];
int class_num = x->dims()[1];
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
int index = 0;
for (int i = 0; i < batch_size; ++i) {
......@@ -92,7 +92,7 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel {
int class_num = x->dims()[1];
// TODO(qingqing): make zero setting an common function.
if (ctx.Attr<int>("soft_label") == 1) {
if (ctx.Attr<bool>("soft_label")) {
auto* label_data = ctx.Input<Tensor>("Label")->data<T>();
int index = 0;
for (int i = 0; i < batch_size; ++i) {
......
......@@ -18,7 +18,6 @@ namespace paddle {
namespace operators {
using framework::Tensor;
using framework::LoDTensor;
class DropoutOp : public framework::OperatorWithKernel {
public:
......@@ -29,15 +28,13 @@ class DropoutOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
PADDLE_ENFORCE_GE(ctx.Attr<float>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx.Attr<float>("dropout_prob"), 1);
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
ctx.Attr<int>("is_training") == 1);
auto dims = ctx.Input<Tensor>("X")->dims();
ctx.Output<LoDTensor>("Out")->Resize(dims);
if (ctx.Attr<int>("is_training") == 1) {
ctx.Output<LoDTensor>("Mask")->Resize(dims);
ctx.Output<Tensor>("Out")->Resize(dims);
if (ctx.Attr<bool>("is_training")) {
ctx.Output<Tensor>("Mask")->Resize(dims);
}
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -49,8 +46,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<AttrType>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
// TODO(xinghai-sun): use bool for is_training after bool is supported.
AddAttr<int>("is_training", "Whether in training phase.").SetDefault(1);
AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddInput("X", "The input of dropout op.");
AddOutput("Out", "The output of dropout op.");
......@@ -59,7 +55,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Dropout Operator.
"Dropout" refers to randomly dropping out units in a nerual network. It is a
'Dropout' refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others
......@@ -75,7 +71,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_EQ(ctx.Attr<int>("is_training"), 1,
PADDLE_ENFORCE(ctx.Attr<bool>("is_training"),
"GradOp is only callable when is_training is true");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
......@@ -85,9 +81,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GE(ctx.Attr<AttrType>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx.Attr<AttrType>("dropout_prob"), 1);
// TODO(xinghai-sun): remove this check after swtiching to bool
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
ctx.Attr<int>("is_training") == 1);
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
PADDLE_ENFORCE_EQ(x_dims, out_dims,
......@@ -96,7 +89,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(x_dims, mask_dims,
"Dimensions of Input(X) and Mask must be the same.");
auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
x_grad->Resize(x_dims);
}
};
......
......@@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel {
auto Y = EigenMatrix<T>::Reshape(*y, 1);
auto place = context.GetEigenDevice<Place>();
if (context.Attr<int>("is_training") == 1) {
if (context.Attr<bool>("is_training")) {
auto* mask = context.Output<Tensor>("Mask");
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
int size = framework::product(mask->dims());
......
......@@ -35,7 +35,7 @@ class CPUDropoutKernel : public framework::OpKernel {
auto* y_data = y->mutable_data<T>(context.GetPlace());
AttrType dropout_prob = context.Attr<AttrType>("dropout_prob");
if (context.Attr<int>("is_training") == 1) {
if (context.Attr<bool>("is_training")) {
auto* mask = context.Output<Tensor>("Mask");
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
int seed = context.Attr<int>("seed");
......@@ -65,7 +65,7 @@ template <typename Place, typename T>
class DropoutGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
PADDLE_ENFORCE_EQ(context.Attr<int>("is_training"), 1,
PADDLE_ENFORCE(context.Attr<bool>("is_training"),
"GradOp is only callable when is_training is true");
auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
......
......@@ -12,51 +12,28 @@
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/elementwise_add_op.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T>
class SigmoidKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>("X");
auto output = context.Output<Tensor>("Y");
output->mutable_data<T>(context.GetPlace());
// The clipping is used in Paddle's raw implenmention
auto X = EigenVector<T>::Flatten(*input);
auto Y = EigenVector<T>::Flatten(*output);
auto place = context.GetEigenDevice<Place>();
Y.device(place) = 1. / (1. + (-X).exp());
}
};
template <typename Place, typename T>
class SigmoidGradKernel : public framework::OpKernel {
class ElementwiseAddOpMaker : public ElementwiseOpMaker {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto Y_t = context.Input<Tensor>("Y");
auto dY_t = context.Input<Tensor>(framework::GradVarName("Y"));
auto dX_t = context.Output<Tensor>(framework::GradVarName("X"));
dX_t->mutable_data<T>(context.GetPlace());
auto dX = EigenVector<T>::Flatten(*dX_t);
auto Y = EigenVector<T>::Flatten(*Y_t);
auto dY = EigenVector<T>::Flatten(*dY_t);
dX.device(context.GetEigenDevice<Place>()) = dY * Y * (1. - Y);
ElementwiseAddOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) {
SetComment("add", "Out = X + Y");
AddComment(comment_);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker,
elementwise_add_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>);
......@@ -13,11 +13,13 @@
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h"
#include "paddle/operators/elementwise_add_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(sigmoid,
ops::SigmoidKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sigmoid_grad, ops::SigmoidGradKernel<paddle::platform::GPUPlace, float>);
elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/elementwise_op.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class ElementwiseAddKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseCompute<EigenAddFunctor, Place, T>(ctx);
}
};
template <typename T>
struct ElementwiseAddGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = dz_e;
}
}
};
template <typename T>
struct ElementwiseAddOneGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = dz_e.sum();
}
}
};
template <typename T>
struct ElementwiseAddBroadCastGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = dz_e.reshape(Eigen::DSizes<int, 2>(pre, n))
.sum(Eigen::array<int, 1>{{0}});
}
}
};
template <typename T>
struct ElementwiseAddBroadCast2GradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N, typename Post>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
Post post) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = dz_e.reshape(Eigen::DSizes<int, 3>(pre, n, post))
.sum(Eigen::array<int, 2>{{0, 2}});
}
}
};
template <typename Place, typename T>
class ElementwiseAddGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseGradCompute<Place, T, ElementwiseAddGradFunctor<T>,
ElementwiseAddOneGradFunctor<T>,
ElementwiseAddBroadCastGradFunctor<T>,
ElementwiseAddBroadCast2GradFunctor<T>>(ctx);
}
};
} // namespace operators
} // namespace paddle
......@@ -12,46 +12,17 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sigmoid_op.h"
#include "paddle/operators/elementwise_div_op.h"
namespace paddle {
namespace operators {
class SigmoidOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of SigmoidOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"),
"Output(Y) of SigmoidOp should not be null.");
ctx.Output<framework::LoDTensor>("Y")->Resize(
ctx.Input<Tensor>("X")->dims());
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
class ElementwiseDivOpMaker : public ElementwiseOpMaker {
public:
SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input");
AddOutput("Y", "sigmoid output");
AddComment("Sigmoid function");
}
};
class SigmoidOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("Y")->dims());
ElementwiseDivOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) {
SetComment("Div", "Out = X / Y");
AddComment(comment_);
}
};
......@@ -59,9 +30,11 @@ class SigmoidOpGrad : public framework::OperatorWithKernel {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker, sigmoid_grad,
ops::SigmoidOpGrad);
REGISTER_OP_CPU_KERNEL(sigmoid,
ops::SigmoidKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker,
elementwise_div_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
sigmoid_grad, ops::SigmoidGradKernel<paddle::platform::CPUPlace, float>);
elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/elementwise_div_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/elementwise_op.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class ElementwiseDivKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseCompute<EigenDivFunctor, Place, T>(ctx);
}
};
template <typename T>
struct ElementwiseDivGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto z_e = framework::EigenVector<T>::Flatten(*z);
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e / y_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = -1.0 * dz_e * z_e / y_e;
}
}
};
template <typename T>
struct ElementwiseDivBroadCastGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
.broadcast(Eigen::DSizes<int, 2>(pre, 1))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e / y_e_bcast;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0 * (x_e * dz_e) / (y_e_bcast * y_e_bcast))
.reshape(Eigen::DSizes<int, 2>(pre, n))
.sum(Eigen::array<int, 1>{{0}});
}
}
};
template <typename T>
struct ElementwiseDivBroadCast2GradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N, typename Post>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
Post post) {
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
.broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e / y_e_bcast;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0 * (x_e * dz_e) / (y_e_bcast * y_e_bcast))
.reshape(Eigen::DSizes<int, 3>(pre, n, post))
.sum(Eigen::array<int, 2>{{0, 2}});
}
}
};
template <typename Place, typename T>
class ElementwiseDivGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseGradCompute<Place, T, ElementwiseDivGradFunctor<T>,
ElementwiseDivGradFunctor<T>,
ElementwiseDivBroadCastGradFunctor<T>,
ElementwiseDivBroadCast2GradFunctor<T>>(ctx);
}
};
} // namespace operators
} // namespace paddle
......@@ -17,101 +17,25 @@
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class ElementWiseMulOp : public framework::OperatorWithKernel {
class ElementwiseMulOpMaker : public ElementwiseOpMaker {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of ElementWiseMulOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"),
"Input(Y) of ElementWiseMulOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(
ctx.OutputVar("Out"),
"Output(Out) of ElementWiseMulOp should not be null.");
auto x_dim = ctx.Input<Tensor>("X")->dims();
auto y_dim = ctx.Input<Tensor>("Y")->dims();
PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(),
"Rank of first input must >= rank of second input.")
ctx.Output<framework::LoDTensor>("Out")->Resize(x_dim);
ElementwiseMulOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) {
SetComment("Mul", "Out = X ⊙ Y");
AddComment(comment_);
}
};
class ElementWiseMulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ElementWiseMulOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of elementwise mul op");
AddInput("Y", "The second input of elementwise mul op");
AddAttr<int>("axis",
R"DOC(
When shape(Y) does not equal shape(X),Y will be broadcasted
to match the shape of X and axis should be dimension index Y in X
)DOC")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddOutput("Out", "The output of elementwise mul op");
AddComment(R"DOC(
Limited elementwise multiple operator.The equation is: Out = X ⊙ Y.
1. The shape of Y should be same with X or
2. Y's shape is a subset of X.
Y will be broadcasted to match the shape of X and axis should be dimension index Y in X.
example:
shape(X) = (2, 3, 4, 5), shape(Y) = (,)
shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
)DOC");
}
};
class ElementWiseMulOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto y_dims = ctx.Input<Tensor>("Y")->dims();
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
auto *x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *y_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
"Rank of first input must >= rank of second input.")
if (x_grad) {
x_grad->Resize(x_dims);
}
if (y_grad) {
y_grad->Resize(y_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(elementwise_mul, ops::ElementWiseMulOp, ops::ElementWiseMulOpMaker,
elementwise_mul_grad, ops::ElementWiseMulOpGrad);
REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker,
elementwise_mul_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_mul,
ops::ElementWiseMulKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
elementwise_mul_grad,
ops::ElementWiseMulGradKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>);
......@@ -19,7 +19,7 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_mul,
ops::ElementWiseMulKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
elementwise_mul_grad,
ops::ElementWiseMulGradKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, float>);
......@@ -13,169 +13,102 @@
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/elementwise_op.h"
namespace paddle {
namespace operators {
/*
* Out = X ⊙ Y
* 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
* pre=2, n=3*4, post=5
* 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
* pre=2*3, n=4*5, post=1
*/
inline void get_mid_dims(const framework::DDim& x_dims,
const framework::DDim& y_dims, const int axis,
int& pre, int& n, int& post) {
pre = 1;
n = 1;
post = 1;
for (int i = 0; i < axis; ++i) {
pre *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(x_dims[i + axis], y_dims[i],
"Broadcast dimension mismatch.");
n *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
post *= x_dims[i];
}
}
template <typename Place, typename T>
class ElementWiseMulKernel : public framework::OpKernel {
class ElementwiseMulKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto z_e = framework::EigenVector<T>::Flatten(*z);
auto x_dims = x->dims();
auto y_dims = y->dims();
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
"Rank of first input must >= rank of second input.")
if (x_dims == y_dims || product(y_dims) == 1) {
z_e.device(ctx.GetEigenDevice<Place>()) = x_e * y_e;
return;
}
int axis = ctx.Attr<int>("axis");
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
if (post == 1) {
auto y_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
.broadcast(Eigen::DSizes<int, 2>(pre, 1))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
z_e.device(ctx.GetEigenDevice<Place>()) = x_e * y_bcast;
return;
} else {
auto y_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
.broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
z_e.device(ctx.GetEigenDevice<Place>()) = x_e * y_bcast;
return;
}
ElementwiseCompute<EigenMulFunctor, Place, T>(ctx);
}
};
template <typename Place, typename T>
class ElementWiseMulGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
template <typename T>
struct ElementwiseMulGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto dout_e = framework::EigenVector<T>::Flatten(*dout);
auto x_dims = x->dims();
auto y_dims = y->dims();
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
if (dx) {
dx->mutable_data<T>(ctx.GetPlace());
}
if (dy) {
dy->mutable_data<T>(ctx.GetPlace());
}
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (x_dims == y_dims || product(y_dims) == 1) {
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(ctx.GetEigenDevice<Place>()) = dout_e * y_e;
dx_e.device(d) = dz_e * y_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(ctx.GetEigenDevice<Place>()) = x_e * dout_e;
dy_e.device(d) = x_e * dz_e;
}
return;
}
};
int axis = ctx.Attr<int>("axis");
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
template <typename T>
struct ElementwiseMulBroadCastGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
// TODO(gongweibao): wrap reshape to a function.
if (post == 1) {
auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n))
.broadcast(Eigen::DSizes<int, 2>(pre, 1))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(ctx.GetEigenDevice<Place>()) = dout_e * y_e_bcast;
dx_e.device(d) = dz_e * y_e_bcast;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(ctx.GetEigenDevice<Place>()) =
(x_e * dout_e)
dy_e.device(d) = (x_e * dz_e)
.reshape(Eigen::DSizes<int, 2>(pre, n))
.sum(Eigen::array<int, 1>{{0}});
}
return;
} else {
}
};
template <typename T>
struct ElementwiseMulBroadCast2GradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N, typename Post>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
Post post) {
auto x_e = framework::EigenVector<T>::Flatten(*x);
auto y_e = framework::EigenVector<T>::Flatten(*y);
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
auto y_e_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1))
.broadcast(Eigen::DSizes<int, 3>(pre, 1, post))
.reshape(Eigen::DSizes<int, 1>(x_e.size()));
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(ctx.GetEigenDevice<Place>()) = dout_e * y_e_bcast;
dx_e.device(d) = dz_e * y_e_bcast;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(ctx.GetEigenDevice<Place>()) =
(x_e * dout_e)
dy_e.device(d) = (x_e * dz_e)
.reshape(Eigen::DSizes<int, 3>(pre, n, post))
.sum(Eigen::array<int, 2>{{0, 2}});
}
return;
}
};
template <typename Place, typename T>
class ElementwiseMulGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseGradCompute<Place, T, ElementwiseMulGradFunctor<T>,
ElementwiseMulGradFunctor<T>,
ElementwiseMulBroadCastGradFunctor<T>,
ElementwiseMulBroadCast2GradFunctor<T>>(ctx);
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
/*
* Out = X ⊙ Y
* If Y's shape does not match X' shape, they will be reshaped.
* For example:
* 1. shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
* pre=2, n=3*4, post=5
* x.shape(2, 12, 5) * y.shape(1,12,1).broadcast(2,12,5)
* 2. shape(X) = (2, 3, 4, 5), shape(Y) = (4,5)
* pre=2*3, n=4*5, post=1
* x.shape(2, 3, 20) * y.shape(1,1,20).broadcast(2,3,20)
*/
inline void get_mid_dims(const framework::DDim& x_dims,
const framework::DDim& y_dims, const int axis,
int& pre, int& n, int& post) {
pre = 1;
n = 1;
post = 1;
for (int i = 0; i < axis; ++i) {
pre *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(x_dims[i + axis], y_dims[i],
"Broadcast dimension mismatch.");
n *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
post *= x_dims[i];
}
}
#define EIGEN_FUNCTOR(name, eigen_op) \
struct Eigen##name##Functor { \
template <typename Place, typename T> \
inline void Run(const framework::Tensor* x, const framework::Tensor* y, \
framework::Tensor* z, \
const framework::ExecutionContext& ctx) { \
auto x_e = framework::EigenVector<T>::Flatten(*x); \
auto y_e = framework::EigenVector<T>::Flatten(*y); \
auto z_e = framework::EigenVector<T>::Flatten(*z); \
z_e.device(ctx.GetEigenDevice<Place>()) = eigen_op(x_e, y_e); \
} \
template <typename Place, typename T> \
inline void RunBroadCast(const framework::Tensor* x, \
const framework::Tensor* y, framework::Tensor* z, \
const framework::ExecutionContext& ctx, int pre, \
int n) { \
auto x_e = framework::EigenVector<T>::Flatten(*x); \
auto y_e = framework::EigenVector<T>::Flatten(*y); \
auto z_e = framework::EigenVector<T>::Flatten(*z); \
auto y_bcast = y_e.reshape(Eigen::DSizes<int, 2>(1, n)) \
.broadcast(Eigen::DSizes<int, 2>(pre, 1)) \
.reshape(Eigen::DSizes<int, 1>(x_e.size())); \
z_e.device(ctx.GetEigenDevice<Place>()) = eigen_op(x_e, y_bcast); \
} \
template <typename Place, typename T> \
inline void RunBroadCast2(const framework::Tensor* x, \
const framework::Tensor* y, \
framework::Tensor* z, \
const framework::ExecutionContext& ctx, int pre, \
int n, int post) { \
auto x_e = framework::EigenVector<T>::Flatten(*x); \
auto y_e = framework::EigenVector<T>::Flatten(*y); \
auto z_e = framework::EigenVector<T>::Flatten(*z); \
auto y_bcast = y_e.reshape(Eigen::DSizes<int, 3>(1, n, 1)) \
.broadcast(Eigen::DSizes<int, 3>(pre, 1, post)) \
.reshape(Eigen::DSizes<int, 1>(x_e.size())); \
z_e.device(ctx.GetEigenDevice<Place>()) = eigen_op(x_e, y_bcast); \
} \
}
template <class functor, typename Place, typename T>
void ElementwiseCompute(const framework::ExecutionContext& ctx) {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
auto x_dims = x->dims();
auto y_dims = y->dims();
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
"Rank of first input must >= rank of second input.")
if (x_dims == y_dims || product(y_dims) == 1) {
functor f;
f.template Run<Place, T>(x, y, z, ctx);
return;
}
int axis = ctx.Attr<int>("axis");
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
if (post == 1) {
functor f;
f.template RunBroadCast<Place, T>(x, y, z, ctx, pre, n);
return;
} else {
functor f;
f.template RunBroadCast2<Place, T>(x, y, z, ctx, pre, n, post);
return;
}
}
#define EIGEN_ADD(x, y) ((x) + (y))
EIGEN_FUNCTOR(Add, EIGEN_ADD);
#define EIGEN_SUB(x, y) ((x) - (y))
EIGEN_FUNCTOR(Sub, EIGEN_SUB);
#define EIGEN_MUL(x, y) ((x) * (y))
EIGEN_FUNCTOR(Mul, EIGEN_MUL);
#define EIGEN_DIV(x, y) ((x) / (y))
EIGEN_FUNCTOR(Div, EIGEN_DIV);
template <typename Place, typename T, typename functor, typename functor1,
typename broadcastfunctor, typename broadcast2functor>
void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto place = ctx.GetEigenDevice<Place>();
auto x_dims = x->dims();
auto y_dims = y->dims();
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
if (dx) {
dx->mutable_data<T>(ctx.GetPlace());
}
if (dy) {
dy->mutable_data<T>(ctx.GetPlace());
}
if (x_dims == y_dims) {
functor f;
f(place, x, y, out, dx, dy, dout);
return;
}
if (product(y_dims) == 1) {
functor1 f;
f(place, x, y, out, dx, dy, dout);
return;
}
int axis = ctx.Attr<int>("axis");
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, pre, n, post);
if (post == 1) {
broadcastfunctor f;
f(place, x, y, out, dx, dy, dout, pre, n);
return;
} else {
broadcast2functor f;
f(place, x, y, out, dx, dy, dout, pre, n, post);
return;
}
}
class ElementwiseOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
using Tensor = framework::Tensor;
void InferShape(const framework::InferShapeContext& ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of elementwise op should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"),
"Input(Y) of elementwise op should not be null");
PADDLE_ENFORCE_NOT_NULL(
ctx.OutputVar("Out"),
"Output(Out) of elementwise op should not be null.");
auto x_dim = ctx.Input<Tensor>("X")->dims();
auto y_dim = ctx.Input<Tensor>("Y")->dims();
PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(),
"Rank of first input must >= rank of second input.")
ctx.Output<framework::Tensor>("Out")->Resize(x_dim);
ctx.ShareLoD("X", /*->*/ "Out");
}
};
class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ElementwiseOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", R"DOC(
The first input of elementwise op, it's a tensor of any dimensions.
)DOC");
AddInput("Y", R"DOC(
The sencond input of elementwise op, it's a tensor and it's dimensions
must be small or equal to X's dimensions.
)DOC");
AddAttr<int>("axis",
R"DOC(
When the shape(Y) does not equal the shape(X),Y will be broadcasted
to match the shape of X and axis should be dimension index Y in X
)DOC")
.SetDefault(-1)
.EqualGreaterThan(-1);
AddOutput("Out", "The output of elementwise op");
comment_ = R"DOC(
Limited elementwise {name} operator.The equation is: Out = {equation}.
1. The shape of Y should be same with X or
2. Y's shape is a subset of X.
Y will be broadcasted to match the shape of X and axis should be dimension index Y in X.
example:
shape(X) = (2, 3, 4, 5), shape(Y) = (,)
shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input X.
)DOC";
AddComment(comment_);
}
protected:
std::string comment_;
void Replace(std::string& src, std::string from, std::string to) {
std::size_t len_from = std::strlen(from.c_str());
std::size_t len_to = std::strlen(to.c_str());
for (std::size_t pos = src.find(from); pos != std::string::npos;
pos = src.find(from, pos + len_to)) {
src.replace(pos, len_from, to);
}
}
void SetComment(std::string name, std::string equation) {
Replace(comment_, "{name}", name);
Replace(comment_, "{equation}", equation);
}
};
class ElementwiseOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
using Tensor = framework::Tensor;
protected:
void InferShape(const framework::InferShapeContext& ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto y_dims = ctx.Input<Tensor>("Y")->dims();
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
auto* x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(),
"Rank of first input must >= rank of second input.")
if (x_grad) {
x_grad->Resize(x_dims);
}
if (y_grad) {
y_grad->Resize(y_dims);
}
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/elementwise_sub_op.h"
namespace paddle {
namespace operators {
class ElementwiseSubOpMaker : public ElementwiseOpMaker {
public:
ElementwiseSubOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) {
SetComment("Sub", "Out = X - Y");
AddComment(comment_);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker,
elementwise_sub_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/elementwise_sub_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/elementwise_op.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class ElementwiseSubKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseCompute<EigenSubFunctor, Place, T>(ctx);
}
};
template <typename T>
struct ElementwiseSubGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0) * dz_e;
}
}
};
template <typename T>
struct ElementwiseSubOneGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0) * dz_e.sum();
}
}
};
template <typename T>
struct ElementwiseSubBroadCastGradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0) *
dz_e.reshape(Eigen::DSizes<int, 2>(pre, n))
.sum(Eigen::array<int, 1>{{0}});
}
}
};
template <typename T>
struct ElementwiseSubBroadCast2GradFunctor {
template <typename Device, typename X, typename Y, typename Z, typename dX,
typename dY, typename dZ, typename Pre, typename N, typename Post>
void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz, Pre pre, N n,
Post post) {
auto dz_e = framework::EigenVector<T>::Flatten(*dz);
if (dx) {
auto dx_e = framework::EigenVector<T>::Flatten(*dx);
dx_e.device(d) = dz_e;
}
if (dy) {
auto dy_e = framework::EigenVector<T>::Flatten(*dy);
dy_e.device(d) = (-1.0) *
dz_e.reshape(Eigen::DSizes<int, 3>(pre, n, post))
.sum(Eigen::array<int, 2>{{0, 2}});
}
}
};
template <typename Place, typename T>
class ElementwiseSubGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
ElementwiseGradCompute<Place, T, ElementwiseSubGradFunctor<T>,
ElementwiseSubOneGradFunctor<T>,
ElementwiseSubBroadCastGradFunctor<T>,
ElementwiseSubBroadCast2GradFunctor<T>>(ctx);
}
};
} // namespace operators
} // namespace paddle
......@@ -186,6 +186,9 @@ W_i is a 2-D matrix of size (K x N), where N means the number of neurons
in the fully connected layer. B is a 1-D vector of size N.
Thus, the output Out is a 2-D matrix of size (M x N).
Activation type can be set to `identity` (default), `sigmoid` or `softmax`.
All the inputs can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with first input (`X[0]`).
)DOC");
}
};
......
......@@ -23,15 +23,14 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(
ctx.InputVar("Src"),
"Input(Src) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(
ctx.OutputVar("Dst"),
"Output(Dst) of FillZerosLikeOp should not be null.");
ctx.Output<framework::LoDTensor>("Dst")->Resize(
ctx.Input<framework::Tensor>("Src")->dims());
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
"Input(X) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"),
"Output(Y) of FillZerosLikeOp should not be null.");
ctx.Output<framework::Tensor>("Y")->Resize(
ctx.Input<framework::Tensor>("X")->dims());
ctx.ShareLoD("X", /*->*/ "Y");
}
};
......@@ -40,8 +39,8 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
FillZerosLikeOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Src", "The input of fill-zeros-like op.");
AddOutput("Dst", "The varibale will be filled up with zeros.");
AddInput("X", "The input of fill-zeros-like op.");
AddOutput("Y", "The varibale will be filled up with zeros.");
AddComment(R"DOC(
Fill up a vriable with zeros.
......
......@@ -23,7 +23,7 @@ template <typename Place, typename T>
class FillZerosLikeKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* output = context.Output<framework::Tensor>("Dst");
auto* output = context.Output<framework::Tensor>("Y");
output->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*output);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
......
......@@ -35,7 +35,7 @@ class GatherOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
output_dims[0] = batch_size;
ctx.Output<framework::LoDTensor>("Out")->Resize(output_dims);
ctx.Output<framework::Tensor>("Out")->Resize(output_dims);
}
};
......@@ -45,7 +45,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto X_grad = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto X_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto X = ctx.Input<Tensor>("X");
X_grad->Resize(X->dims());
......
......@@ -48,7 +48,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
ctx.OutputVar("Out"),
"Output(Out) of GaussianRandomOp should not be null.");
auto* tensor = ctx.Output<framework::LoDTensor>("Out");
auto* tensor = ctx.Output<framework::Tensor>("Out");
auto dims = Attr<std::vector<int>>("dims");
std::vector<int64_t> temp;
temp.reserve(dims.size());
......
......@@ -75,9 +75,6 @@ class GemmConv2DKernel : public framework::OpKernel {
framework::DDim output_matrix_shape = {output_channels,
output_height * output_width};
auto* device_context =
const_cast<platform::DeviceContext*>(context.device_context_);
// convolution operator: im2col + gemm
int in_step = input_channels / groups;
int out_step = output_channels / groups;
......@@ -87,14 +84,14 @@ class GemmConv2DKernel : public framework::OpKernel {
for (int g = 0; g < groups; g++) {
// im2col
Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
device_context);
im2col(context.device_context(), in_slice, col, strides[0], strides[1],
paddings[0], paddings[1]);
// gemm
Tensor out_slice = out_batch.Slice<T>(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice<T>(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(filter_slice, false, col_matrix, false, T(1.0),
&out_slice, T(0.0), device_context);
math::matmul<Place, T>(context.device_context(), filter_slice, false,
col_matrix, false, T(1.0), &out_slice, T(0.0));
}
}
}
......@@ -160,9 +157,6 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
auto* device_context =
const_cast<platform::DeviceContext*>(context.device_context_);
// convolution backward input operator: gemm + col2im
// convolution backward weight operator: im2col + gemm
int in_step = input_channels / groups;
......@@ -184,14 +178,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
out_grad_batch.Slice<T>(g * out_step, (g + 1) * out_step);
Tensor filter_slice =
filter.Slice<T>(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(filter_slice, true, out_grad_slice, false,
T(1.0), &col_matrix, T(0.0), device_context);
math::matmul<Place, T>(context.device_context(), filter_slice, true,
out_grad_slice, false, T(1.0), &col_matrix,
T(0.0));
// col2im
Tensor in_grad_slice =
in_grad_batch.Slice<T>(g * in_step, (g + 1) * in_step);
col2im(in_grad_slice, col, strides[0], strides[1], paddings[0],
paddings[1], device_context);
col2im(context.device_context(), in_grad_slice, col, strides[0],
strides[1], paddings[0], paddings[1]);
}
}
}
......@@ -212,15 +207,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
Tensor out_grad_slice =
out_grad_batch.Slice<T>(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
im2col(in_slice, col, strides[0], strides[1], paddings[0],
paddings[1], device_context);
im2col(context.device_context(), in_slice, col, strides[0],
strides[1], paddings[0], paddings[1]);
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice<T>(g * out_step, (g + 1) * out_step);
math::matmul<Place, T>(out_grad_slice, false, col_matrix, true,
T(1.0), &filter_grad_slice, T(1.0),
device_context);
math::matmul<Place, T>(context.device_context(), out_grad_slice,
false, col_matrix, true, T(1.0),
&filter_grad_slice, T(1.0));
}
}
}
......
......@@ -32,9 +32,10 @@ class LookupTableOp : public framework::OperatorWithKernel {
auto table_t = ctx.Input<Tensor>("W");
auto ids_t = ctx.Input<Tensor>("Ids");
auto output_t = ctx.Output<framework::LoDTensor>("Out");
auto output_t = ctx.Output<framework::Tensor>("Out");
output_t->Resize({ids_t->dims()[0], table_t->dims()[1]});
ctx.ShareLoD("Ids", /*->*/ "Out");
}
};
......@@ -50,9 +51,13 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"An input with type int32 or int64"
"contains the ids to be looked up in W.");
AddOutput("Out", "The lookup results, which have the same type with W.");
AddComment(
"This operator is used to perform lookups on the parameter W,"
"then concatenated into a dense tensor.");
AddComment(R"DOC(
This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`.
)DOC");
}
};
......@@ -64,7 +69,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
void InferShape(const framework::InferShapeContext &context) const override {
auto table = context.Input<Tensor>("W");
auto d_table =
context.Output<framework::LoDTensor>(framework::GradVarName("W"));
context.Output<framework::Tensor>(framework::GradVarName("W"));
d_table->Resize(table->dims());
}
};
......
......@@ -27,9 +27,10 @@ template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUPlace, T> {
public:
void operator()(const framework::Tensor& im, framework::Tensor& col,
void operator()(const platform::DeviceContext& context,
const framework::Tensor& im, framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
......@@ -79,9 +80,9 @@ template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::CPUPlace, T> {
public:
void operator()(framework::Tensor& im, const framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
const framework::Tensor& col, int stride_height,
int stride_width, int padding_height, int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
int input_channels = im.dims()[0];
......@@ -137,9 +138,10 @@ template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUPlace, T> {
public:
void operator()(const framework::Tensor& im, framework::Tensor& col,
void operator()(const platform::DeviceContext& context,
const framework::Tensor& im, framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
int input_channels = im.dims()[0];
......@@ -197,9 +199,9 @@ template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::CPUPlace, T> {
public:
void operator()(framework::Tensor& im, const framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
const framework::Tensor& col, int stride_height,
int stride_width, int padding_height, int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
int input_channels = im.dims()[0];
......
......@@ -64,9 +64,10 @@ template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
platform::GPUPlace, T> {
public:
void operator()(const framework::Tensor& im, framework::Tensor& col,
void operator()(const platform::DeviceContext& context,
const framework::Tensor& im, framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
......@@ -84,9 +85,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int block_y = (blocks + 512 - 1) / 512;
dim3 threads(1024, 1);
dim3 grid(block_x, block_y);
im2col<T><<<
grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
im2col<T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
im.data<T>(), num_outputs, input_height, input_width, filter_height,
filter_width, stride_height, stride_width, padding_height,
padding_width, output_height, output_width, col.data<T>());
......@@ -149,9 +150,9 @@ template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
platform::GPUPlace, T> {
public:
void operator()(framework::Tensor& im, const framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
const framework::Tensor& col, int stride_height,
int stride_width, int padding_height, int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
......@@ -174,9 +175,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
col2im<T><<<
grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
col2im<T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
num_kernels, col.data<T>(), input_height + 2 * padding_height,
input_width + 2 * padding_width, input_channels, filter_height,
filter_width, stride_height, stride_width, padding_height,
......@@ -235,9 +236,10 @@ template <class T>
class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
platform::GPUPlace, T> {
public:
void operator()(const framework::Tensor& im, framework::Tensor& col,
void operator()(const platform::DeviceContext& context,
const framework::Tensor& im, framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
int input_channels = im.dims()[0];
......@@ -268,9 +270,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
dim3 threads(block_dim_x, block_dim_y,
std::min(block_dim_z, input_channels));
dim3 grid(output_width, output_height);
im2colOCF<T><<<
grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
im2colOCF<T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
filter_height, filter_width, stride_height, stride_width,
padding_height, padding_width, output_height, output_width);
......@@ -318,9 +320,9 @@ template <class T>
class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
platform::GPUPlace, T> {
public:
void operator()(framework::Tensor& im, const framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context) {
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
const framework::Tensor& col, int stride_height,
int stride_width, int padding_height, int padding_width) {
PADDLE_ENFORCE(im.dims().size() == 3);
PADDLE_ENFORCE(col.dims().size() == 5);
int input_channels = im.dims()[0];
......@@ -351,9 +353,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
dim3 threads(block_dim_x, block_dim_y,
std::min(block_dim_z, input_channels));
dim3 grid(output_width, output_height);
col2imOCF<T><<<
grid, threads, 0,
reinterpret_cast<platform::CUDADeviceContext*>(context)->stream()>>>(
col2imOCF<T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
im.data<T>(), col.data<T>(), input_channels, input_height, input_width,
filter_height, filter_width, stride_height, stride_width,
padding_height, padding_width, output_height, output_width);
......
......@@ -72,17 +72,18 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
template <ColFormat Format, typename Place, typename T>
class Im2ColFunctor {
public:
void operator()(const framework::Tensor& im, framework::Tensor& col,
void operator()(const platform::DeviceContext& context,
const framework::Tensor& im, framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context);
int padding_width);
};
template <ColFormat Format, typename Place, typename T>
class Col2ImFunctor {
public:
void operator()(framework::Tensor& im, const framework::Tensor& col,
int stride_height, int stride_width, int padding_height,
int padding_width, platform::DeviceContext* context);
void operator()(const platform::DeviceContext& context, framework::Tensor& im,
const framework::Tensor& col, int stride_height,
int stride_width, int padding_height, int padding_width);
};
} // namespace math
......
......@@ -78,8 +78,8 @@ void testIm2col() {
PADDLE_THROW("no GPU support");
#endif // PADDLE_ONLY_CPU
}
im2col(input, output_cfo, stride, stride, padding, padding, context);
im2col_ocf(input, output_ocf, stride, stride, padding, padding, context);
im2col(*context, input, output_cfo, stride, stride, padding, padding);
im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding);
float* out_cfo_ptr;
if (paddle::platform::is_cpu_place(*place)) {
......
......@@ -27,7 +27,7 @@ class MeanOp : public framework::OperatorWithKernel {
"Input(X) of MeanOp should not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
"Output(Out) of MeanOp should not be null.");
ctx.Output<framework::LoDTensor>("Out")->Resize({1});
ctx.Output<framework::Tensor>("Out")->Resize({1});
}
};
......@@ -37,7 +37,8 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").NotInGradient();
AddComment("Mean Operator");
AddComment(R"DOC( Mean Operator
)DOC");
}
};
......@@ -47,7 +48,7 @@ class MeanGradOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
ctx.Output<framework::Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("X")->dims());
}
};
......
......@@ -40,7 +40,8 @@ class MinusOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
left_tensor->numel(), right_tensor->numel(),
"Minus operator must take two tensor with same num of elements");
ctx.Output<framework::LoDTensor>("Out")->Resize(left_tensor->dims());
ctx.Output<framework::Tensor>("Out")->Resize(left_tensor->dims());
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -54,7 +55,12 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(Minus Operator
Equation: Out = X - Y
Equation:
Out = X - Y
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
)DOC");
}
};
......
......@@ -34,8 +34,8 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(x->dims().size(), 2, "The tensor rank of X must be 2.");
PADDLE_ENFORCE_EQ(x->dims()[1], 1, "The 2nd dimension of X must be 1.");
context.Output<framework::LoDTensor>("IntermediateVal")->Resize(x->dims());
context.Output<framework::LoDTensor>("Out")->Resize({x->dims()[0], 1});
context.Output<framework::Tensor>("IntermediateVal")->Resize(x->dims());
context.Output<framework::Tensor>("Out")->Resize({x->dims()[0], 1});
}
};
......@@ -81,7 +81,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
auto* intermediate_val = context.Input<Tensor>("IntermediateVal");
auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
auto* x_grad =
context.Output<framework::LoDTensor>(framework::GradVarName("X"));
context.Output<framework::Tensor>(framework::GradVarName("X"));
PADDLE_ENFORCE_NOT_NULL(x, "X must be initialized.");
PADDLE_ENFORCE_NOT_NULL(y, "Y must be initialized.");
......
......@@ -52,8 +52,8 @@ class ModifiedHuberLossKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto* in0 = context.Input<Tensor>("X");
auto* in1 = context.Input<Tensor>("Y");
auto* out0 = context.Output<framework::LoDTensor>("IntermediateVal");
auto* out1 = context.Output<framework::LoDTensor>("Out");
auto* out0 = context.Output<framework::Tensor>("IntermediateVal");
auto* out1 = context.Output<framework::Tensor>("Out");
out0->mutable_data<T>(context.GetPlace());
out1->mutable_data<T>(context.GetPlace());
......@@ -77,11 +77,9 @@ class ModifiedHuberLossGradCPUKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in0 = context.Input<Tensor>("Y");
auto* in1 = context.Input<framework::LoDTensor>("IntermediateVal");
auto* in2 =
context.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* out0 =
context.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto* in1 = context.Input<framework::Tensor>("IntermediateVal");
auto* in2 = context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<framework::Tensor>(framework::GradVarName("X"));
if (out0) {
const T* y_ptr = in0->data<T>();
......
......@@ -18,7 +18,6 @@ namespace paddle {
namespace operators {
using framework::Tensor;
using framework::LoDTensor;
class MulOp : public framework::OperatorWithKernel {
public:
......@@ -53,8 +52,9 @@ class MulOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
x_mat_dims[1], y_mat_dims[0],
"First matrix's width must be equal with second matrix's height.");
ctx.Output<framework::LoDTensor>("Out")->Resize(
ctx.Output<framework::Tensor>("Out")->Resize(
{x_mat_dims[0], y_mat_dims[1]});
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -83,9 +83,14 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1)
.EqualGreaterThan(1);
AddComment(R"DOC(
Two Element Mul Operator.
Mul operator is used to perform matrix multiplication for input X and Y.
The equation is: Out = X * Y
The equation is:
Out = X * Y
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
)DOC");
}
};
......@@ -103,10 +108,8 @@ class MulOpGrad : public framework::OperatorWithKernel {
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto y_dims = ctx.Input<Tensor>("Y")->dims();
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
auto *x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *y_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
auto *x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto *y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
auto x_mat_dims =
framework::flatten_to_2d(x_dims, Attr<int>("x_num_col_dims"));
......
......@@ -39,8 +39,13 @@ class PadOp : public framework::OperatorWithKernel {
for (int i = 0; i < x_dim.size(); ++i) {
out_dims[i] = x_dim[i] + paddings[i * 2] + paddings[i * 2 + 1];
}
ctx.Output<framework::LoDTensor>("Out")->Resize(
ctx.Output<framework::Tensor>("Out")->Resize(
framework::make_ddim(out_dims));
if (out_dims[0] == x_dim[0]) {
// Only pass LoD when the first dimension is equal between
// output and input.
ctx.ShareLoD("X", /*->*/ "Out");
}
}
};
......@@ -101,7 +106,7 @@ class PadOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto *x_g = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *x_g = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
if (x_g != nullptr) {
x_g->Resize(x_dims);
}
......
......@@ -36,8 +36,9 @@ class PReluOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
"Output(Out) should not be null");
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *out = ctx.Output<framework::Tensor>("Out");
out->Resize(in->dims());
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -55,6 +56,8 @@ The equation is:
f(x) = alpha * x , for x < 0
f(x) = x , for x >= 0
The input `X` can carry the LoD (Level of Details) information,
or not. And the output shares the LoD with input `X`.
)DOC");
}
};
......@@ -69,11 +72,11 @@ class PReluGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto *x = ctx.Input<framework::Tensor>("X");
auto *dalpha =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Alpha"));
ctx.Output<framework::Tensor>(framework::GradVarName("Alpha"));
auto *alpha = ctx.Input<framework::Tensor>("Alpha");
dx->Resize(x->dims());
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/rank_loss_op.h"
namespace paddle {
namespace operators {
class RankLossOp : public framework::OperatorWithKernel {
public:
RankLossOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
// input check
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
"Input(Label) shouldn't be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Left"),
"Input(Left) shouldn't be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Right"),
"Input(Right) shouldn't be null");
auto label_dims = ctx.Input<framework::Tensor>("Label")->dims();
auto left_dims = ctx.Input<framework::Tensor>("Left")->dims();
auto right_dims = ctx.Input<framework::Tensor>("Right")->dims();
PADDLE_ENFORCE((label_dims == left_dims) && (left_dims == right_dims),
"All inputs must have the same size");
PADDLE_ENFORCE((label_dims.size() == 2) && (label_dims[1] == 1),
"All inputs must be row vector with size batch_size x 1.");
ctx.Output<framework::Tensor>("Out")->Resize(label_dims);
}
};
class RankLossOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RankLossOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Label",
"The label indicating A ranked higher than B or not, row vector.");
AddInput("Left", "The output of RankNet for doc A, vector.");
AddInput("Right", "The output of RankNet for doc B, vetor");
AddOutput("Out", "The output loss of RankLoss operator, vector.");
AddComment(R"DOC(RankLoss operator
Rank loss operator for RankNet[1]. RankNet is a pairwise ranking model with
one training sample consisting of a pair of doc A and B, and the label P
indicating that A is ranked higher than B or not:
P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of
the input pair.
The RankLoss operator contains three inputs: Left (o_i), Right (o_j) and Label
(P_{i,j}), which represent the output of RankNet for two docs and the label
respectively, and yields the rank loss C_{i,j} by following the expression
\f[
C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
o_{i,j} = o_i - o_j \\
\tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
\f]
The operator can take inputs of one sample or in batch.
[1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
Rank using Gradient Descent.
http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
)DOC");
}
};
class RankLossGradOp : public framework::OperatorWithKernel {
public:
RankLossGradOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"),
"Input(Label) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Left"),
"Input(Left) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Right"),
"Input(Right) shouldn't be null.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null.");
auto dims = ctx.Input<framework::Tensor>("Left")->dims();
auto *left_grad =
ctx.Output<framework::Tensor>(framework::GradVarName("Left"));
auto *right_grad =
ctx.Output<framework::Tensor>(framework::GradVarName("Right"));
if (left_grad) {
left_grad->Resize(dims);
}
if (right_grad) {
right_grad->Resize(dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(rank_loss, ops::RankLossOp, ops::RankLossOpMaker, rank_loss_grad,
ops::RankLossGradOp);
REGISTER_OP_CPU_KERNEL(rank_loss,
ops::RankLossKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
rank_loss_grad, ops::RankLossGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/rank_loss_op.h"
REGISTER_OP_GPU_KERNEL(
rank_loss,
paddle::operators::RankLossKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
rank_loss_grad,
paddle::operators::RankLossGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class RankLossKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* out_t = ctx.Output<framework::Tensor>("Out");
auto* label_t = ctx.Input<framework::Tensor>("Label");
auto* left_t = ctx.Input<framework::Tensor>("Left");
auto* right_t = ctx.Input<framework::Tensor>("Right");
out_t->mutable_data<T>(ctx.GetPlace());
auto out = framework::EigenVector<T>::Flatten(*out_t);
auto label = framework::EigenVector<T>::Flatten(*label_t);
auto left = framework::EigenVector<T>::Flatten(*left_t);
auto right = framework::EigenVector<T>::Flatten(*right_t);
auto& dev = ctx.GetEigenDevice<Place>();
out.device(dev) =
(1. + (left - right).exp()).log() - label * (left - right);
}
};
template <typename Place, typename T>
class RankLossGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* d_left_t =
ctx.Output<framework::Tensor>(framework::GradVarName("Left"));
auto* d_right_t =
ctx.Output<framework::Tensor>(framework::GradVarName("Right"));
auto* d_out_t = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* label_t = ctx.Input<framework::Tensor>("Label");
auto* left_t = ctx.Input<framework::Tensor>("Left");
auto* right_t = ctx.Input<framework::Tensor>("Right");
auto& dev = ctx.GetEigenDevice<Place>();
auto d_out = framework::EigenVector<T>::Flatten(*d_out_t);
auto label = framework::EigenVector<T>::Flatten(*label_t);
auto left = framework::EigenVector<T>::Flatten(*left_t);
auto right = framework::EigenVector<T>::Flatten(*right_t);
// compute d_left
if (d_left_t) {
d_left_t->mutable_data<T>(ctx.GetPlace());
auto d_left = framework::EigenVector<T>::Flatten(*d_left_t);
d_left.device(dev) = d_out * (1. / (1. + (right - left).exp()) - label);
}
// compute d_right
if (d_right_t) {
d_right_t->mutable_data<T>(ctx.GetPlace());
auto d_right = framework::EigenVector<T>::Flatten(*d_right_t);
d_right.device(dev) =
-d_out * (1.0 / (1. + (right - left).exp()) - label);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -50,7 +50,12 @@ class ReshapeOp : public framework::OperatorWithKernel {
std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); });
auto out_dims = framework::make_ddim(shape_int64);
ctx.Output<framework::LoDTensor>("Out")->Resize(out_dims);
ctx.Output<framework::Tensor>("Out")->Resize(out_dims);
if (shape[0] == in->dims()[0]) {
// Only pass LoD when the first dimension is equal between
// output and input.
ctx.ShareLoD("X", /*->*/ "Out");
}
}
};
......@@ -94,7 +99,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null.");
auto dims = ctx.Input<framework::Tensor>("X")->dims();
auto *d_in = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *d_in = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_in->Resize(dims);
}
};
......
......@@ -44,7 +44,8 @@ class RowwiseAddOp : public framework::OperatorWithKernel {
framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims,
"The width of two operands must be same");
PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1");
ctx.Output<framework::LoDTensor>("Out")->Resize(x_dims);
ctx.Output<framework::Tensor>("Out")->Resize(x_dims);
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -83,8 +84,8 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims,
"The width of two operands must be same");
auto *dx = ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *db = ctx.Output<framework::LoDTensor>(framework::GradVarName("b"));
auto *dx = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto *db = ctx.Output<framework::Tensor>(framework::GradVarName("b"));
if (dx) dx->Resize(x_dims);
if (db) db->Resize(b_dims);
}
......
......@@ -33,8 +33,9 @@ class ScaleOp : public framework::OperatorWithKernel {
"Output(Out) of ScaleOp should not be null.");
auto *in = ctx.Input<framework::Tensor>("X");
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *out = ctx.Output<framework::Tensor>("Out");
out->Resize(in->dims());
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......
......@@ -44,7 +44,7 @@ class ScatterOp : public framework::OperatorWithKernel {
framework::DDim data_dim(ctx.Input<Tensor>("Updates")->dims());
for (int i = 1; i < data_dim.size(); ++i)
PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input<Tensor>("Updates")->dims()[i]);
ctx.Output<framework::LoDTensor>("Out")->Resize(
ctx.Output<framework::Tensor>("Out")->Resize(
ctx.Input<Tensor>("Ref")->dims());
}
};
......@@ -56,10 +56,9 @@ class ScatterGradOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto *dUpdates =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Updates"));
ctx.Output<framework::Tensor>(framework::GradVarName("Updates"));
auto *Updates = ctx.Input<Tensor>("Updates");
auto *dRef =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Ref"));
auto *dRef = ctx.Output<framework::Tensor>(framework::GradVarName("Ref"));
auto *Ref = ctx.Input<Tensor>("Ref");
dRef->Resize(Ref->dims());
......
......@@ -33,7 +33,7 @@ class SGDOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("param")->dims(),
ctx.Input<Tensor>("grad")->dims(),
"Two input of SGD Op's dimension must be same.");
ctx.Output<framework::LoDTensor>("param_out")
ctx.Output<framework::Tensor>("param_out")
->Resize(ctx.Input<Tensor>("param")->dims());
}
};
......
......@@ -44,8 +44,8 @@ class SmoothL1LossOp : public framework::OperatorWithKernel {
"The shape of OutsideWeight must be same as X.");
}
auto* diff = ctx.Output<framework::LoDTensor>("Diff");
auto* out = ctx.Output<framework::LoDTensor>("Out");
auto* diff = ctx.Output<framework::Tensor>("Diff");
auto* out = ctx.Output<framework::Tensor>("Out");
diff->Resize(x->dims());
// loss is a two-rank tensor
out->Resize({x->dims()[0], 1});
......@@ -103,10 +103,8 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
auto in_dims = ctx.Input<framework::Tensor>("X")->dims();
auto out_dims =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->dims();
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto* y_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
auto* x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_GE(out_dims.size(), 2,
"The tensor rank of Input(Out@Grad) should be 2.");
......
......@@ -30,8 +30,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL,
"The input of softmax op must be a matrix.");
ctx.Output<framework::LoDTensor>("Y")->Resize(
ctx.Input<Tensor>("X")->dims());
ctx.Output<framework::Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
}
};
......@@ -77,7 +76,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"Input(Y) and its gradients should have a same shape.");
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"))
ctx.Output<framework::Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("X")->dims());
}
};
......
......@@ -27,7 +27,7 @@ class SplitOp : public framework::OperatorWithKernel {
void InferShape(const framework::InferShapeContext &ctx) const override {
// infershape
auto *in = ctx.Input<framework::Tensor>("X");
auto outs = ctx.MultiOutput<framework::LoDTensor>("Out");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
size_t num = static_cast<size_t>(ctx.Attr<int>("num"));
std::vector<int> sections =
......
......@@ -54,9 +54,10 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
"First dimension of target must be equal to input "
"or to 1.");
ctx.Output<framework::LoDTensor>("sub_result")
ctx.Output<framework::Tensor>("sub_result")
->Resize({x_dims[0], x->numel() / x_dims[0]});
ctx.Output<framework::LoDTensor>("Out")->Resize({x_dims[0], 1});
ctx.Output<framework::Tensor>("Out")->Resize({x_dims[0], 1});
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -79,6 +80,9 @@ class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
input or to 1. If the first dimension of target is 1, SquaredL2DistanceOp
will broadcast target's first dimension to input's first dimension.
You can decide whether calculate the gradient of input and target.
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input X.
)DOC");
}
};
......@@ -100,10 +104,8 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(out_dims[1], 1,
"Second dimension of output gradient "
"must be 1.");
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto* y_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("Y"));
auto* x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
auto* y_grad = ctx.Output<framework::Tensor>(framework::GradVarName("Y"));
if (x_grad) x_grad->Resize(x_dims);
if (y_grad) y_grad->Resize(y_dims);
}
......
......@@ -28,7 +28,7 @@ class SumOp : public framework::OperatorWithKernel {
"Output(Out) of SumOp should not be null.");
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *out = ctx.Output<framework::Tensor>("Out");
int N = ins.size();
auto in_dim = ins[0]->dims();
......@@ -39,6 +39,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape");
}
out->Resize(in_dim);
ctx.ShareLoD("X", /*->*/ "Out");
}
};
......@@ -49,8 +50,11 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "the input tensors of sum operator.").AsDuplicable();
AddOutput("Out", "the output tensor of sum operator.");
AddComment(R"DOC(
Sum the input tensors.
)DOC");
Sum the input tensors.
All the inputs can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with the first input.
)DOC");
}
};
......@@ -61,7 +65,7 @@ class SumGradOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto outputs =
ctx.MultiOutput<framework::LoDTensor>(framework::GradVarName("X"));
ctx.MultiOutput<framework::Tensor>(framework::GradVarName("X"));
auto dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
for (auto output : outputs) {
output->Resize(dims);
......
......@@ -40,8 +40,8 @@ class TopkOp : public framework::OperatorWithKernel {
framework::DDim dims = input->dims();
dims[dims.size() - 1] = k;
ctx.Output<framework::LoDTensor>("Out")->Resize(dims);
ctx.Output<framework::LoDTensor>("Indices")->Resize(dims);
ctx.Output<framework::Tensor>("Out")->Resize(dims);
ctx.Output<framework::Tensor>("Indices")->Resize(dims);
}
};
......
......@@ -51,7 +51,7 @@ class TransposeOp : public framework::OperatorWithKernel {
for (size_t i = 0; i < axis_size; i++) {
out_dims[i] = x_dims[axis[i]];
}
ctx.Output<framework::LoDTensor>("Out")->Resize(out_dims);
ctx.Output<framework::Tensor>("Out")->Resize(out_dims);
}
};
......@@ -99,8 +99,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto *x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto *x_grad = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
if (x_grad) x_grad->Resize(x_dims);
}
......
......@@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(Attr<float>("min") < Attr<float>("max"),
"uniform_random's min must less then max");
auto* tensor = ctx.Output<framework::LoDTensor>("Out");
auto* tensor = ctx.Output<framework::Tensor>("Out");
auto dims = Attr<std::vector<int>>("dims");
std::vector<int64_t> temp;
temp.reserve(dims.size());
......
......@@ -89,12 +89,16 @@ class OpDescCreationMethod(object):
new_attr.f = user_defined_attr
elif attr.type == framework_pb2.STRING:
new_attr.s = user_defined_attr
elif attr.type == framework_pb2.BOOLEAN:
new_attr.b = user_defined_attr
elif attr.type == framework_pb2.INTS:
new_attr.ints.extend(user_defined_attr)
elif attr.type == framework_pb2.FLOATS:
new_attr.floats.extend(user_defined_attr)
elif attr.type == framework_pb2.STRINGS:
new_attr.strings.extend(user_defined_attr)
elif attr.type == framework_pb2.BOOLEANS:
new_attr.bools.extend(user_defined_attr)
elif attr.type == framework_pb2.INT_PAIRS:
for p in user_defined_attr:
pair = new_attr.int_pairs.add()
......
import unittest
import numpy as np
from op_test import OpTest
class TestExp(OpTest):
def setUp(self):
self.op_type = "exp"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.exp(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSigmoid(OpTest):
def setUp(self):
self.op_type = "sigmoid"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.008)
class TestTanh(OpTest):
def setUp(self):
self.op_type = "tanh"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.tanh(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSqrt(OpTest):
def setUp(self):
self.op_type = "sqrt"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.sqrt(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestAbs(OpTest):
def setUp(self):
self.op_type = "abs"
x = np.random.uniform(-1, 1, [4, 4]).astype("float32")
# Because we set delta = 0.005 in caculating numeric gradient,
# if x is too small, such as 0.002, x_neg will be -0.003
# x_pos will be 0.007, so the numeric gradient is unaccurate.
# we should avoid this
x[np.abs(x) < 0.005] = 0.02
self.inputs = {'X': x}
self.outputs = {'Y': np.abs(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestRelu(OpTest):
def setUp(self):
self.op_type = "relu"
x = np.random.uniform(-1, 1, [11, 17]).astype("float32")
# The same reason with TestAbs
x[np.abs(x) < 0.005] = 0.02
self.inputs = {'X': x}
self.outputs = {'Y': np.maximum(self.inputs['X'], 0)}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestBRelu(OpTest):
def setUp(self):
self.op_type = "brelu"
x = np.random.uniform(-1, 1, [4, 4]).astype("float32")
t_min = 1
t_max = 4
# The same with TestAbs
x[np.abs(x - t_min) < 0.005] = t_min + 0.02
x[np.abs(x - t_max) < 0.005] = t_max + 0.02
self.inputs = {'X': x}
self.attrs = {'t_min': t_min, 't_max': t_max}
t = np.copy(x)
t[t < t_min] = t_min
t[t > t_max] = t_max
self.outputs = {'Y': t}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestSoftRelu(OpTest):
def setUp(self):
self.op_type = "soft_relu"
x = np.random.uniform(-3, 3, [4, 4]).astype("float32")
threshold = 2
# The same reason with TestAbs
x[np.abs(x - threshold) < 0.005] = threshold + 0.02
x[np.abs(x + threshold) < 0.005] = -threshold + 0.02
self.inputs = {'X': x}
self.attrs = {'threshold': threshold}
t = np.copy(x)
t[t < -threshold] = -threshold
t[t > threshold] = threshold
self.outputs = {'Y': np.log((np.exp(t) + 1))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestReciprocal(OpTest):
def setUp(self):
self.op_type = "reciprocal"
self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")}
self.outputs = {'Y': np.reciprocal(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.01)
class TestLog(OpTest):
def setUp(self):
self.op_type = "log"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.log(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSquare(OpTest):
def setUp(self):
self.op_type = "square"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.square(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestPow(OpTest):
def setUp(self):
self.op_type = "pow"
self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")}
self.attrs = {'factor': 3}
self.outputs = {'Y': np.power(self.inputs['X'], 3)}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestSTanh(OpTest):
def setUp(self):
self.op_type = "stanh"
self.inputs = {
'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
}
scale_a = 2.0 / 3.0
scale_b = 1.7159
self.attrs = {'scale_a': scale_a, 'scale_b': scale_b}
self.outputs = {'Y': scale_b * np.tanh(self.inputs['X'] * scale_a)}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
if __name__ == "__main__":
unittest.main()
......@@ -73,13 +73,22 @@ class TestConv2dOp(OpTest):
self.check_output()
def test_check_grad(self):
self.check_grad(set(['Input', 'Filter']), 'Output')
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.05)
def test_check_grad_no_filter(self):
self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
self.check_grad(
['Input'],
'Output',
max_relative_error=0.05,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.05,
no_grad_set=set(['Input']))
def init_groups(self):
self.groups = 1
......
......@@ -24,15 +24,15 @@ class TestCosSimOp(OpTest):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05)
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.06)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X"))
['Y'], 'Out', max_relative_error=0.06, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y'))
['X'], 'Out', max_relative_error=0.06, no_grad_set=set('Y'))
class TestCosSimOp2(TestCosSimOp):
......
import unittest
import numpy as np
from op_test import OpTest
def crop(data, offsets, crop_shape):
def indexOf(shape, index):
result = []
for dim in reversed(shape):
result.append(index % dim)
index = index / dim
return result[::-1]
result = []
for i, value in enumerate(data.flatten()):
index = indexOf(data.shape, i)
selected = True
if len(index) == len(offsets):
for j, offset in enumerate(offsets):
selected = selected and index[j] >= offset and index[
j] < crop_shape[j] + offset
if selected:
result.append(value)
return np.array(result).reshape(crop_shape)
class TestCropOp(OpTest):
def setUp(self):
self.op_type = "crop"
self.crop_by_input = False
self.attrs = {}
self.initTestCase()
self.attrs['offsets'] = self.offsets
if self.crop_by_input:
self.inputs = {
'X': np.random.random(self.x_shape).astype("float32"),
'Y': np.random.random(self.crop_shape).astype("float32")
}
else:
self.attrs['shape'] = self.crop_shape
self.inputs = {
'X': np.random.random(self.x_shape).astype("float32"),
}
self.outputs = {
'Out': crop(self.inputs['X'], self.offsets, self.crop_shape)
}
def initTestCase(self):
self.x_shape = (8, 8)
self.crop_shape = (2, 2)
self.offsets = [1, 2]
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X'], 'Out', max_relative_error=0.006)
class TestCase1(TestCropOp):
def initTestCase(self):
self.x_shape = (16, 8, 32)
self.crop_shape = [2, 2, 3]
self.offsets = [1, 5, 3]
class TestCase2(TestCropOp):
def initTestCase(self):
self.x_shape = (4, 8)
self.crop_shape = [4, 8]
self.offsets = [0, 0]
class TestCase3(TestCropOp):
def initTestCase(self):
self.x_shape = (4, 8, 16)
self.crop_shape = [2, 2, 3]
self.offsets = [1, 5, 3]
self.crop_by_input = True
class TestCase4(TestCropOp):
def initTestCase(self):
self.x_shape = (4, 4)
self.crop_shape = [4, 4]
self.offsets = [0, 0]
self.crop_by_input = True
if __name__ == '__main__':
unittest.main()
......@@ -19,7 +19,7 @@ class TestCrossEntropyOp1(OpTest):
dtype="float32")
self.inputs = {"X": X, "Label": label}
self.outputs = {"Y": cross_entropy}
self.attrs = {'soft_label': 0}
self.attrs = {'soft_label': False}
def test_check_output(self):
self.check_output()
......@@ -45,7 +45,7 @@ class TestCrossEntropyOp2(OpTest):
axis=1, keepdims=True).astype("float32")
self.inputs = {'X': X, 'Label': label}
self.outputs = {'Y': cross_entropy}
self.attrs = {'soft_label': 1}
self.attrs = {'soft_label': True}
def test_check_output(self):
self.check_output()
......@@ -76,7 +76,7 @@ class TestCrossEntropyOp3(OpTest):
axis=1, keepdims=True).astype("float32")
self.inputs = {'X': X, 'Label': label}
self.outputs = {'Y': cross_entropy}
self.attrs = {'soft_label': 1}
self.attrs = {'soft_label': True}
def test_check_output(self):
self.check_output()
......
......@@ -7,7 +7,7 @@ class TestDropoutOp(OpTest):
def setUp(self):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
self.attrs = {'dropout_prob': 0.0, 'is_training': 1}
self.attrs = {'dropout_prob': 0.0, 'is_training': True}
self.outputs = {'Out': self.inputs['X'], 'Mask': np.ones((32, 64))}
def test_check_output(self):
......@@ -21,7 +21,7 @@ class TestDropoutOp2(TestDropoutOp):
def setUp(self):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
self.attrs = {'dropout_prob': 1.0, 'is_training': 1}
self.attrs = {'dropout_prob': 1.0, 'is_training': True}
self.outputs = {'Out': np.zeros((32, 64)), 'Mask': np.zeros((32, 64))}
......@@ -29,7 +29,7 @@ class TestDropoutOp3(TestDropoutOp):
def setUp(self):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")}
self.attrs = {'dropout_prob': 0.0, 'is_training': 1}
self.attrs = {'dropout_prob': 0.0, 'is_training': True}
self.outputs = {'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2))}
......@@ -37,7 +37,7 @@ class TestDropoutOp4(OpTest):
def setUp(self):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
self.attrs = {'dropout_prob': 0.35, 'is_training': 0}
self.attrs = {'dropout_prob': 0.35, 'is_training': False}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
def test_check_output(self):
......@@ -48,7 +48,7 @@ class TestDropoutOp5(OpTest):
def setUp(self):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")}
self.attrs = {'dropout_prob': 0.75, 'is_training': 0}
self.attrs = {'dropout_prob': 0.75, 'is_training': False}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
def test_check_output(self):
......
import unittest
import numpy as np
from op_test import OpTest
class TestElementwiseOp(OpTest):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
class TestElementwiseAddOp_Vector(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.random((32, )).astype("float32"),
'Y': np.random.random((32, )).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['Y'])}
class TestElementwiseAddOp_broadcast_0(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(2).astype(np.float32)
}
self.attrs = {'axis': 0}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(2, 1, 1)
}
class TestElementwiseAddOp_broadcast_1(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(3).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 3, 1)
}
class TestElementwiseAddOp_broadcast_2(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(4).astype(np.float32)
}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 1, 4)
}
class TestElementwiseAddOp_broadcast_3(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_add"
self.inputs = {
'X': np.random.rand(2, 3, 4, 5).astype(np.float32),
'Y': np.random.rand(3, 4).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] + self.inputs['Y'].reshape(1, 3, 4, 1)
}
if __name__ == '__main__':
unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class ElementwiseDivOp(OpTest):
def setUp(self):
self.op_type = "elementwise_div"
""" Warning
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
"""
self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32")
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y'))
class TestElementwiseDivOp_Vector(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [32]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [32]).astype("float32")
}
self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])}
class TestElementwiseDivOp_broadcast_0(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 3, 4]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [2]).astype("float32")
}
self.attrs = {'axis': 0}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(2, 1, 1))
}
class TestElementwiseDivOp_broadcast_1(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 3, 4]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [3]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 1))
}
class TestElementwiseDivOp_broadcast_2(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 3, 4]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [4]).astype("float32")
}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 1, 4))
}
class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
def setUp(self):
self.op_type = "elementwise_div"
self.inputs = {
'X': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [3, 4]).astype("float32")
}
self.attrs = {'axis': 1}
self.outputs = {
'Out':
np.divide(self.inputs['X'], self.inputs['Y'].reshape(1, 3, 4, 1))
}
if __name__ == '__main__':
unittest.main()
......@@ -3,14 +3,9 @@ import numpy as np
from op_test import OpTest
class TestElementwiseMulOp_Matrix(OpTest):
class ElementwiseMulOp(OpTest):
def setUp(self):
self.op_type = "elementwise_mul"
""" Warning
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
"""
self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32")
......@@ -32,7 +27,7 @@ class TestElementwiseMulOp_Matrix(OpTest):
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_Vector(OpTest):
class TestElementwiseMulOp_Vector(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
......@@ -41,22 +36,8 @@ class TestElementwiseMulOp_Vector(OpTest):
}
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_broadcast_0(OpTest):
class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
......@@ -69,22 +50,8 @@ class TestElementwiseMulOp_broadcast_0(OpTest):
'Out': self.inputs['X'] * self.inputs['Y'].reshape(2, 1, 1)
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_broadcast_1(OpTest):
class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
......@@ -97,22 +64,8 @@ class TestElementwiseMulOp_broadcast_1(OpTest):
'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 3, 1)
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_broadcast_2(OpTest):
class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
......@@ -124,22 +77,8 @@ class TestElementwiseMulOp_broadcast_2(OpTest):
'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 1, 4)
}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_broadcast_3(OpTest):
class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
def setUp(self):
self.op_type = "elementwise_mul"
self.inputs = {
......
import unittest
import numpy as np
from op_test import OpTest
class TestElementwiseOp(OpTest):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.005)
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
class TestElementwiseSubOp_Vector(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.random((32, )).astype("float32"),
'Y': np.random.random((32, )).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}
class TestElementwiseSubOp_broadcast_0(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(2).astype(np.float32)
}
self.attrs = {'axis': 0}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(2, 1, 1)
}
class TestElementwiseSubOp_broadcast_1(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(3).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 3, 1)
}
class TestElementwiseSubOp_broadcast_2(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32),
'Y': np.random.rand(4).astype(np.float32)
}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 1, 4)
}
class TestElementwiseSubOp_broadcast_3(TestElementwiseOp):
def setUp(self):
self.op_type = "elementwise_sub"
self.inputs = {
'X': np.random.rand(2, 3, 4, 5).astype(np.float32),
'Y': np.random.rand(3, 4).astype(np.float32)
}
self.attrs = {'axis': 1}
self.outputs = {
'Out': self.inputs['X'] - self.inputs['Y'].reshape(1, 3, 4, 1)
}
if __name__ == '__main__':
unittest.main()
......@@ -6,8 +6,8 @@ from op_test import OpTest
class TestFillZerosLikeOp(OpTest):
def setUp(self):
self.op_type = "fill_zeros_like"
self.inputs = {'Src': np.random.random((219, 232)).astype("float32")}
self.outputs = {'Dst': np.zeros_like(self.inputs["Src"])}
self.inputs = {'X': np.random.random((219, 232)).astype("float32")}
self.outputs = {'Y': np.zeros_like(self.inputs["X"])}
def test_check_output(self):
self.check_output()
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册