提交 e655d291 编写于 作者: G gangliao

merge develop

......@@ -86,6 +86,14 @@ if(ANDROID OR IOS)
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
set(WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS" FORCE)
# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
set(WITH_C_API ON CACHE STRING
"Always compile the C_API when cross-compiling for Android and iOS" FORCE)
endif()
set(MOBILE_INFERENCE ON)
add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
......@@ -160,9 +168,11 @@ endif(USE_NNPACK)
add_subdirectory(proto)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
if(NOT MOBILE_INFERENCE)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it.
......
......@@ -73,25 +73,43 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links()
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(MOBILE_INFERENCE)
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
else()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(ANDROID)
target_link_libraries(${TARGET_NAME} log)
......
add_subdirectory(cuda)
add_subdirectory(function)
add_subdirectory(utils)
add_subdirectory(testing)
add_subdirectory(math)
add_subdirectory(parameter)
add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(string)
if(Boost_FOUND)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif()
add_subdirectory(parameter)
add_subdirectory(testing)
if(WITH_C_API)
if(MOBILE_INFERENCE)
add_subdirectory(capi)
endif()
else()
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(string)
add_subdirectory(scripts)
if(WITH_C_API)
add_subdirectory(capi)
endif()
if(Boost_FOUND)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif()
if(WITH_SWIG_PY)
add_subdirectory(api)
if(WITH_SWIG_PY)
add_subdirectory(api)
endif()
endif()
......@@ -37,9 +37,7 @@ set(PADDLE_CAPI_INFER_LIBS
paddle_cuda
paddle_function
paddle_gserver
paddle_proto
paddle_pserver
paddle_network)
paddle_proto)
cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS})
......
......@@ -4,11 +4,12 @@ add_unittest(capi_test_mats test_Vector.cpp
target_include_directories(capi_test_mats PUBLIC ${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_mats paddle_capi)
add_unittest_without_exec(capi_test_gradientMachine test_GradientMachine.cpp)
target_include_directories(capi_test_gradientMachine PUBLIC
${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_gradientMachine paddle_capi)
add_test(NAME capi_test_gradientMachine
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests)
if(NOT MOBILE_INFERENCE)
add_unittest_without_exec(capi_test_gradientMachine test_GradientMachine.cpp)
target_include_directories(capi_test_gradientMachine PUBLIC
${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_gradientMachine paddle_capi)
add_test(NAME capi_test_gradientMachine
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests)
endif()
......@@ -302,7 +302,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
return grad_op_descs; // empty vector
}
grad_op_descs = OpRegistry::CreateGradOpDescs(*op_desc);
grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get());
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) {
......
......@@ -58,6 +58,8 @@ class MulOpMaker : public OpProtoAndCheckerMaker {
AddInput("X", "A");
AddInput("Y", "B");
AddOutput("Out", "Out");
AddAttr<int>("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
AddAttr<int>("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
AddComment("Mul");
}
};
......@@ -440,6 +442,28 @@ TEST(Backward, simple_single_op) {
std::vector<std::string>({f::GradVarName("b")}));
}
TEST(Backward, default_attribute) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op = block->AppendOp();
op->SetType("mul");
op->SetInput("X", {"x"});
op->SetInput("Y", {"y"});
op->SetOutput("Out", {"out"});
AppendBackward(program, {});
ASSERT_EQ(block->AllOps().size(), 2UL);
EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
f::OpDescBind *grad_op = block->AllOps()[1];
ASSERT_EQ(grad_op->Type(), "mul_grad");
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
}
TEST(Backward, simple_mult_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <deque>
#include <memory>
#include <unordered_map>
#include <vector>
#include "paddle/framework/op_desc.h"
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package paddle.framework;
enum AttrType {
......
......@@ -52,8 +52,6 @@ class OpDescBind {
void SetOutput(const std::string &param_name,
const std::vector<std::string> &args);
std::string DebugString() { return this->Proto()->DebugString(); }
bool HasAttr(const std::string &name) const {
return attrs_.find(name) != attrs_.end();
}
......@@ -97,6 +95,11 @@ class OpDescBind {
const VariableNameMap &Outputs() const { return outputs_; }
AttributeMap *MutableAttrMap() {
this->need_update_ = true;
return &this->attrs_;
}
private:
template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
......
......@@ -60,9 +60,14 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
}
std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
const OpDescBind& op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc.Type());
return info.grad_op_maker_(op_desc);
OpDescBind* op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc->Type());
if (info.Checker() != nullptr) {
info.Checker()->Check(*op_desc->MutableAttrMap());
}
return info.grad_op_maker_(*op_desc);
}
} // namespace framework
......
......@@ -80,7 +80,7 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
const OpDescBind& op_desc);
OpDescBind* op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
};
......
......@@ -205,13 +205,13 @@ void OperatorBase::GenerateTemporaryNames() {
}
template <>
const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const {
const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
auto* var = InputVar(name);
return var == nullptr ? nullptr : GetTensorFromVar(var);
}
template <>
const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
const std::string& name) const {
auto names = op().Inputs(name);
std::vector<const Tensor*> res;
......@@ -225,13 +225,13 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
}
template <>
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const {
Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
auto var = OutputVar(name);
return var == nullptr ? nullptr : var->GetMutable<LoDTensor>();
}
template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const {
auto names = op().Outputs(name);
std::vector<Tensor*> res;
......
......@@ -57,7 +57,6 @@ inline std::string GradVarName(const std::string& var_name) {
}
class OperatorBase;
class InferShapeContext;
class ExecutionContext;
extern const Tensor* GetTensorFromVar(const Variable* var);
......@@ -169,10 +168,11 @@ class NOP : public OperatorBase {
}
};
class InferShapeContext {
class ExecutionContext {
public:
InferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(op), scope_(scope) {}
ExecutionContext(const OperatorBase& op, const Scope& scope,
const platform::DeviceContext& device_context)
: op_(op), scope_(scope), device_context_(device_context) {}
const OperatorBase& op() const { return op_; }
......@@ -278,31 +278,6 @@ class InferShapeContext {
out_tensor->set_lod(in_tensor.lod());
}
private:
const OperatorBase& op_;
const Scope& scope_;
};
template <>
const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const;
template <>
const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
const std::string& name) const;
template <>
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const;
template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>(
const std::string& name) const;
class ExecutionContext : public InferShapeContext {
public:
ExecutionContext(const OperatorBase& op, const Scope& scope,
const platform::DeviceContext& device_context)
: InferShapeContext(op, scope), device_context_(device_context) {}
template <typename PlaceType,
typename DeviceType = typename platform::EigenDeviceConverter<
PlaceType>::EigenDeviceType>
......@@ -315,10 +290,26 @@ class ExecutionContext : public InferShapeContext {
}
private:
const OperatorBase& op_;
const Scope& scope_;
const platform::DeviceContext& device_context_;
};
class CompileTimeInferShapeContext : public InferShapeContextBase {
template <>
const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const;
template <>
const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
const std::string& name) const;
template <>
Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
template <>
std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const;
class CompileTimeInferShapeContext : public InferShapeContext {
public:
CompileTimeInferShapeContext(const OpDescBind& op, const BlockDescBind& block)
: op_(op), block_(block) {}
......@@ -414,7 +405,7 @@ class CompileTimeInferShapeContext : public InferShapeContextBase {
const BlockDescBind& block_;
};
class RuntimeInferShapeContext : public InferShapeContextBase {
class RuntimeInferShapeContext : public InferShapeContext {
public:
RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(op), scope_(scope) {}
......@@ -612,7 +603,7 @@ class OperatorWithKernel : public OperatorBase {
});
}
virtual void InferShape(InferShapeContextBase* ctx) const = 0;
virtual void InferShape(InferShapeContext* ctx) const = 0;
protected:
// indicate kernel DataType by input data. Defaultly all input data must be
......
......@@ -113,7 +113,7 @@ class OpWithKernelTest : public OperatorWithKernel {
using OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {}
void InferShape(framework::InferShapeContext* ctx) const override {}
DataType IndicateDataType(const ExecutionContext& ctx) const override {
return DataType::FP32;
}
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <memory>
#include <vector>
#include "paddle/framework/framework.pb.h"
#include "paddle/platform/macros.h"
......@@ -31,8 +32,6 @@ class ProgramDescBind {
BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); }
std::string DebugString() { return Proto()->DebugString(); }
size_t Size() const { return blocks_.size(); }
ProgramDesc *Proto();
......
......@@ -20,11 +20,11 @@ namespace paddle {
namespace framework {
// TODO(longfei): Once after both CompileTimeInferShapeContext and
// RuntimeInferShapeContext get merged, we can rename InferShapeContextBase into
// RuntimeInferShapeContext get merged, we can rename InferShapeContext into
// InferShapeContext so to replace the current InferShapeContext.
class InferShapeContextBase {
class InferShapeContext {
public:
virtual ~InferShapeContextBase() {}
virtual ~InferShapeContext() {}
virtual bool HasInput(const std::string &name) const = 0;
virtual bool HasOutput(const std::string &name) const = 0;
......
......@@ -95,6 +95,19 @@ class Tensor {
template <typename T>
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place);
/**
* @brief Copy the content of an external vector to a tensor.
*
* @param[in] src The external vector.
* @param[in] ctx The device context contains place where to store.
*
* * @note CopyFromVector assumes that the tensor has been resized
* before invoking.
*/
template <typename T>
inline void CopyFromVector(const std::vector<T>& src,
const platform::Place& dst_place);
/**
* @brief Return the slice of the tensor.
*
......
......@@ -123,6 +123,29 @@ inline void Tensor::CopyFrom(const Tensor& src,
#endif
}
template <typename T>
inline void Tensor::CopyFromVector(const std::vector<T>& src,
const platform::Place& dst_place) {
auto src_ptr = static_cast<const void*>(src.data());
platform::CPUPlace src_place;
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
auto size = src.size() * sizeof(T);
if (platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, src_place,
src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place,
src_ptr, size, 0);
}
PADDLE_ENFORCE(cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in Tensor CopyFromVector");
#endif
}
template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>();
......
......@@ -263,6 +263,93 @@ TEST(Tensor, CopyFrom) {
#endif
}
TEST(Tensor, CopyFromVector) {
using namespace paddle::framework;
using namespace paddle::platform;
{
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Tensor cpu_tensor;
// Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
// Compare Tensors
const int* cpu_ptr = cpu_tensor.data<int>();
const int* src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
}
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
cpu_ptr = cpu_tensor.data<int>();
src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
}
delete cpu_place;
}
#ifdef PADDLE_WITH_CUDA
{
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
Tensor cpu_tensor;
Tensor gpu_tensor;
Tensor dst_tensor;
// Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
// Copy to GPUTensor
gpu_tensor.Resize(make_ddim({3, 3}));
auto gpu_place = new paddle::platform::GPUPlace();
gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place);
// Copy from GPU to CPU tensor for comparison
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
// Compare Tensors
const int* src_ptr = src_vec.data();
const int* cpu_ptr = cpu_tensor.data<int>();
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place);
gpu_tensor.Resize(make_ddim({2, 2}));
gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place);
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place);
src_ptr = src_vec.data();
cpu_ptr = cpu_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
delete cpu_place;
delete gpu_place;
}
#endif
}
TEST(Tensor, ReshapeToMatrix) {
using namespace paddle::framework;
using namespace paddle::platform;
......
......@@ -60,6 +60,36 @@ if(NOT WITH_PYTHON)
dataproviders/PyDataProvider.h)
endif()
if(MOBILE_INFERENCE)
# Remove evaluators
list(REMOVE_ITEM GSERVER_SOURCES
layers/ValidationLayer.cpp
evaluators/Evaluator.cpp
evaluators/DetectionMAPEvaluator.cpp
evaluators/CTCErrorEvaluator.cpp
evaluators/ChunkEvaluator.cpp)
# Remove dataproviders
list(REMOVE_ITEM GSERVER_SOURCES
dataproviders/DataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/ProtoDataProvider.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider.cpp)
# Remove useless gradientmachines
list(REMOVE_ITEM GSERVER_SOURCES
gradientmachines/MultiNetwork.cpp
gradientmachines/RecurrentGradientMachine.cpp
gradientmachines/ParallelNeuralNetwork.cpp
gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers
list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp)
endif()
if(WITH_GPU)
cuda_add_library(paddle_gserver ${GSERVER_SOURCES})
else()
......
......@@ -17,12 +17,15 @@ limitations under the License. */
#include <fstream>
#include "paddle/utils/Logging.h"
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "GradientMachineMode.h"
#include "MultiGradientMachine.h"
#include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h"
#include "hl_gpu.h"
#endif
namespace paddle {
......@@ -30,13 +33,16 @@ GradientMachine* GradientMachine::create(
const ModelConfig& config,
int mode,
const std::vector<ParameterType>& parameterTypes) {
#ifndef PADDLE_MOBILE_INFERENCE
if (auto gm = IGradientMachineMode::tryCreateGradientMachine(mode, config)) {
return gm;
}
if (FLAGS_trainer_count > 1) {
return new MultiGradientMachine(config, FLAGS_use_gpu);
}
#endif
if (FLAGS_trainer_count == 1) { // single
#ifndef PADDLE_MOBILE_INFERENCE
NeuralNetwork* nn;
if (config.type() == "multi_nn") {
/* multi submodel calculate, thread(s) will be initialized inside */
......@@ -48,6 +54,9 @@ GradientMachine* GradientMachine::create(
/* single thread calculate */
nn = NeuralNetwork::create(config);
}
#else
NeuralNetwork* nn = NeuralNetwork::create(config);
#endif
ParamInitCallback testParamInitCb = [](int paramId, Parameter* para) {
para->enableType(PARAMETER_VALUE);
};
......
......@@ -20,13 +20,16 @@ limitations under the License. */
#include "ModelConfig.pb.h"
#include "TrainerConfig.pb.h"
#include "paddle/gserver/dataproviders/DataProvider.h"
#include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/gserver/layers/Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/parameter/ParameterUpdaterBase.h"
#include "paddle/utils/Thread.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "paddle/gserver/evaluators/Evaluator.h"
#endif
namespace paddle {
/**
* @brief A gradient machine is capable of calculating some outputs given
......@@ -147,6 +150,7 @@ public:
virtual void onPassEnd() = 0;
#ifndef PADDLE_MOBILE_INFERENCE
/**
* Create an evaluator which can be used for eval()
*/
......@@ -156,6 +160,7 @@ public:
* evaluate using the given evaluator
*/
virtual void eval(Evaluator* evaluator) const = 0;
#endif
std::vector<ParameterPtr>& getParameters() { return parameters_; }
......
......@@ -14,15 +14,17 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "RecurrentGradientMachine.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/Stat.h"
#endif
namespace paddle {
void parameterInitNN(int paramId,
......@@ -54,6 +56,7 @@ void parameterInitNN(int paramId,
}
NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) {
#ifndef PADDLE_MOBILE_INFERENCE
if (config.type() == "recurrent_nn") {
return newNeuralNetwork("root");
} else if (config.type() == "multi_nn") {
......@@ -61,6 +64,9 @@ NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) {
} else {
return newNeuralNetwork();
}
#else
return new NeuralNetwork();
#endif
}
std::map<std::string, bool> NeuralNetwork::dllInitMap;
......@@ -304,6 +310,8 @@ void NeuralNetwork::onPassEnd() {
}
}
#ifndef PADDLE_MOBILE_INFERENCE
class CombinedEvaluator : public Evaluator {
public:
void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) {
......@@ -466,6 +474,8 @@ Evaluator* NeuralNetwork::makeEvaluator() const {
void NeuralNetwork::eval(Evaluator* evaluator) const { evaluator->eval(*this); }
#endif
void NeuralNetwork::setOutputGrad(const std::vector<Argument>& args) {
CHECK_GE(outputLayers_.size(), args.size());
for (size_t i = 0; i < args.size(); ++i) {
......
......@@ -97,9 +97,12 @@ public:
virtual void onPassEnd();
#ifndef PADDLE_MOBILE_INFERENCE
virtual Evaluator* makeEvaluator() const;
virtual void eval(Evaluator* evaluator) const;
#endif
virtual void resetState();
virtual void setOutputGrad(const std::vector<Argument>& args);
......
......@@ -15,11 +15,14 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "CostLayer.h"
#include "ValidationLayer.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "ValidationLayer.h"
#endif
DEFINE_bool(log_error_clipping, false, "enable log error clipping or not");
namespace paddle {
......@@ -103,10 +106,12 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost")
return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation")
return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation")
return LayerPtr(new PnpairValidation(config));
#endif
return LayerPtr(registrar_.createByType(config.type(), config));
}
......
# gserver pacakge unittests
if(NOT MOBILE_INFERENCE)
################### test_ProtoDataProvider ############
add_unittest_without_exec(test_ProtoDataProvider
test_ProtoDataProvider.cpp)
# test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then
# mkdir will get error.
add_test(NAME test_ProtoDataProvider
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
add_unittest_without_exec(test_ProtoDataProvider
test_ProtoDataProvider.cpp)
# test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then
# mkdir will get error.
add_test(NAME test_ProtoDataProvider
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
################# test_LayerGrad #######################
add_unittest_without_exec(test_LayerGrad
......@@ -98,9 +100,11 @@ add_unittest_without_exec(test_KmaxSeqScore
add_test(NAME test_KmaxSeqScore
COMMAND test_KmaxSeqScore)
if(NOT MOBILE_INFERENCE)
################## test_Evaluator #######################
add_unittest(test_Evaluator
test_Evaluator.cpp)
add_unittest(test_Evaluator
test_Evaluator.cpp)
endif()
################ test_LinearChainCRF ####################
add_simple_unittest(test_LinearChainCRF)
......@@ -131,27 +135,31 @@ if(NOT WITH_DOUBLE)
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
if(NOT MOBILE_INFERENCE)
############### test_RecurrentGradientMachine ###############
# TODO(yuyang18): There is some bug in test_RecurrentGradientMachine
# I will fix it.
add_unittest_without_exec(test_RecurrentGradientMachine
test_RecurrentGradientMachine.cpp)
add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d
${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
add_unittest_without_exec(test_NetworkCompare
test_NetworkCompare.cpp)
if(WITH_GPU)
add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
else()
add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
# TODO(yuyang18): There is some bug in test_RecurrentGradientMachine
# I will fix it.
add_unittest_without_exec(test_RecurrentGradientMachine
test_RecurrentGradientMachine.cpp)
add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d
${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
if(NOT MOBILE_INFERENCE)
add_unittest_without_exec(test_NetworkCompare
test_NetworkCompare.cpp)
if(WITH_GPU)
add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
else()
add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
endif()
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/testing/TestUtil.h"
using namespace std; // NOLINT
......
......@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
......
......@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/LinearChainCRF.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
......
......@@ -21,7 +21,6 @@ limitations under the License. */
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
......
......@@ -24,7 +24,6 @@ limitations under the License. */
#include "paddle/gserver/layers/Layer.h"
#include "paddle/gserver/layers/SelectiveFullyConnectedLayer.h"
#include "paddle/math/CpuSparseMatrix.h"
#include "paddle/trainer/Trainer.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
......
......@@ -22,7 +22,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Inference"),
"Input(Inference) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Label"),
......
......@@ -22,7 +22,7 @@ class ActivationOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputDim("Y", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Y");
}
......@@ -33,7 +33,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y"));
}
};
......@@ -201,6 +201,19 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
template <typename AttrType>
class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
public:
Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu6 operator");
AddOutput("Y", "Output of Relu6 operator");
AddComment("Relu6 activation operator, relu6 = min(max(0, x), 6)");
AddAttr<AttrType>("threshold", "The threshold value of Relu6")
.SetDefault(static_cast<AttrType>(6));
}
};
template <typename AttrType>
class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public:
......@@ -276,6 +289,9 @@ REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker<float>,
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
soft_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker<float>, relu6_grad,
ops::ActivationOpGrad);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
ops::ActivationOpGrad);
......@@ -285,11 +301,9 @@ REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, \
paddle::operators::ActivationKernel<paddle::platform::CPUPlace, \
paddle::operators::functor<float>>); \
ops::ActivationKernel<paddle::platform::CPUPlace, ops::functor<float>>); \
REGISTER_OP_CPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \
paddle::platform::CPUPlace, \
paddle::operators::grad_functor<float>>);
ops::ActivationGradKernel<paddle::platform::CPUPlace, \
ops::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
......@@ -15,14 +15,14 @@
#define EIGEN_USE_GPU
#include "paddle/operators/activation_op.h"
namespace ops = paddle::operators;
#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_GPU_KERNEL( \
act_type, \
paddle::operators::ActivationKernel<paddle::platform::GPUPlace, \
paddle::operators::functor<float>>); \
ops::ActivationKernel<paddle::platform::GPUPlace, ops::functor<float>>); \
REGISTER_OP_GPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \
paddle::platform::GPUPlace, \
paddle::operators::grad_functor<float>>);
ops::ActivationGradKernel<paddle::platform::GPUPlace, \
ops::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_GPU_KERNEL);
......@@ -280,6 +280,36 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> {
}
};
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
// NOTE: Explicit hides the `BaseActivationFunctor<T>::GetAttrs`
// not polymorphism for speed.
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
y.device(d) = x.cwiseMax(static_cast<T>(0)).cwiseMin(threshold);
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) =
dy * ((x > static_cast<T>(0)) * (x < threshold)).template cast<T>();
}
};
// softsign(x) = x / (1 + |x|)
template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> {
......@@ -425,5 +455,6 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
__macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor)
......@@ -22,7 +22,7 @@ class AdadeltaOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
......
......@@ -22,7 +22,7 @@ class AdagradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adamax_op.h"
namespace paddle {
namespace operators {
class AdamaxOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("InfNorm"),
"Input(InfNorm) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
"Input(Beta1Pow) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("InfNormOut"),
"Output(InfNormOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"),
"Output(Beta1PowOut) of AdamaxOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension");
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdamaxOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of AdamaxOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("InfNorm"),
"Param and InfNorm input of AdamaxOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
ctx->SetOutputDim("InfNormOut", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
}
};
class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdamaxOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment", "(Tensor) First moment");
AddInput("InfNorm",
"(Tensor) "
"Input exponentially weighted infinity norm");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output first moment");
AddOutput("InfNormOut",
"(Tensor) "
"Output exponentially weighted infinity norm");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"1st moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the weighted "
"infinity norm estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0e-8f);
AddComment(R"DOC(
Adamax Updates Operator.
This implements the Adamax optimizer from Section 7 of the Adam
paper[1]. Adamax is a variant of the
Adam algorithm based on the infinity norm.
Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad))
beta1_pow_out = beta1_pow * beta1
learning_rate_t = learning_rate/(1 - beta1_pow_out)
param_out = param - learning_rate_t * moment_out/inf_norm_out
The original paper does not have an epsilon attribute.
However, it is added here for numerical stability
by preventing divide by 0.
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adamax, ops::AdamaxOp, ops::AdamaxOpMaker);
REGISTER_OP_CPU_KERNEL(adamax,
ops::AdamaxOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adamax_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adamax,
ops::AdamaxOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdamaxOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
auto inf_norm_out_tensor = ctx.Output<framework::Tensor>("InfNormOut");
auto beta1_pow_out_tensor = ctx.Output<framework::Tensor>("Beta1PowOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
inf_norm_out_tensor->mutable_data<T>(ctx.GetPlace());
beta1_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
float beta1 = ctx.Attr<float>("beta1");
float beta2 = ctx.Attr<float>("beta2");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto inf_norm = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("InfNorm"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto beta1_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta1Pow"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto inf_norm_out =
framework::EigenVector<T>::Flatten(*inf_norm_out_tensor);
auto beta1_pow_out =
framework::EigenVector<T>::Flatten(*beta1_pow_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment_out.device(place) = beta1 * moment + (1 - beta1) * grad;
inf_norm_out.device(place) =
grad.abs().cwiseMax((beta2 * inf_norm) + epsilon);
beta1_pow_out.device(place) = beta1_pow * beta1;
auto lr_t = lr / (1 - beta1_pow_out);
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr_t.broadcast(m_dsize) * (moment_out / inf_norm_out);
}
};
} // namespace operators
} // namespace paddle
......@@ -22,7 +22,7 @@ class ClipOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ClipOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -61,7 +61,7 @@ class ClipOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
......
......@@ -24,7 +24,7 @@ class ConcatOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL,
"Inputs(X) of ConcatOp should be empty.")
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -83,7 +83,7 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
}
};
......
......@@ -27,7 +27,7 @@ class Conv2DOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"),
......@@ -106,7 +106,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
......
......@@ -24,7 +24,7 @@ class CosSimOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
// notnull check
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of CosSimOp should not be null.");
......@@ -98,7 +98,7 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
// notnull check
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null.");
......
......@@ -25,7 +25,7 @@ class CropOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of CropOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -115,7 +115,7 @@ class CropOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
......
......@@ -22,7 +22,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
......@@ -60,7 +60,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
......
......@@ -24,7 +24,7 @@ class DropoutOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE_GE(ctx->Attrs().Get<float>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx->Attrs().Get<float>("dropout_prob"), 1);
......@@ -70,7 +70,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_training"), 1,
"GradOp is only callable when is_training is true");
......
......@@ -25,7 +25,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
protected:
using Tensor = framework::Tensor;
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of elementwise op should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"),
......@@ -106,7 +106,7 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
using Tensor = framework::Tensor;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
......
......@@ -22,7 +22,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"),
......
......@@ -23,7 +23,7 @@ class GatherOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of GatherOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Index"),
......@@ -51,7 +51,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
......
......@@ -43,7 +43,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of GaussianRandomOp should not be null.");
auto dims = ctx->Attrs().Get<std::vector<int>>("dims");
......
......@@ -22,7 +22,7 @@ class LookupTableOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("W"),
"Input(W) of LookupTableOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Ids"),
......@@ -70,7 +70,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
auto table_dims = ctx->GetInputDim("W");
ctx->SetOutputDim(framework::GradVarName("W"), table_dims);
}
......
......@@ -22,7 +22,7 @@ class LstmUnitOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("C_prev"),
"Input(C_prev) of LSTM should not be null.");
......@@ -77,7 +77,7 @@ class LstmUnitGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("C")),
"Input(C@GRAD) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("H")),
......
......@@ -22,7 +22,7 @@ class MeanOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of MeanOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -47,7 +47,7 @@ class MeanGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
};
......
......@@ -26,7 +26,7 @@ class MinusOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of MinusOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"),
......
......@@ -22,7 +22,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
......@@ -74,7 +74,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("IntermediateVal"),
......
......@@ -24,7 +24,7 @@ class MulOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MulOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) of MulOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -97,7 +97,7 @@ class MulOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
......
......@@ -24,7 +24,7 @@ class MultiplexOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) shouldn't be null.");
PADDLE_ENFORCE(!ctx->Inputs("X").empty(),
"MultiInput(X) shouldn't be empty.");
......@@ -90,7 +90,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(!ctx->Inputs("X").empty(), "Input(X) should not be null.");
PADDLE_ENFORCE(!ctx->Outputs(framework::GradVarName("X")).empty(),
"Output(X@Grad) should not be null.");
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <set>
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/op_registry.h"
......
......@@ -24,7 +24,7 @@ class PadOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of PadOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of PadOp should not be null.");
......@@ -98,7 +98,7 @@ class PadOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
......
......@@ -27,7 +27,7 @@ class PoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -74,7 +74,7 @@ class PoolOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
......
......@@ -26,7 +26,7 @@ class PReluOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Alpha"), "Input(Alpha) should not be null");
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == 1,
......@@ -63,7 +63,7 @@ class PReluGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
......
......@@ -25,7 +25,7 @@ class RankLossOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
// input check
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null");
......@@ -90,7 +90,7 @@ class RankLossGradOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null.");
......
......@@ -24,7 +24,7 @@ class ReduceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReduceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -58,7 +58,7 @@ class ReduceGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null.");
......@@ -168,36 +168,22 @@ namespace ops = paddle::operators;
REGISTER_OP(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, reduce_sum_grad,
ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_sum,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::SumFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_sum_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::SumGradFunctor>);
REGISTER_OP(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker,
reduce_mean_grad, ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_mean,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MeanFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_mean_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::MeanGradFunctor>);
REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad,
ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_max,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MaxFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_max_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::MaxOrMinGradFunctor>);
REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_min_grad,
REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMinOpMaker, reduce_min_grad,
ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_min,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MinFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_min_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::MaxOrMinGradFunctor>);
#define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
reduce_type, \
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::functor>); \
REGISTER_OP_CPU_KERNEL(reduce_type##_grad, \
ops::ReduceGradKernel<paddle::platform::CPUPlace, \
float, ops::grad_functor>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_REDUCE_CPU_KERNEL);
......@@ -17,30 +17,12 @@
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
reduce_sum,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::SumFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_sum_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::SumGradFunctor>);
REGISTER_OP_GPU_KERNEL(
reduce_mean,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MeanFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_mean_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MeanGradFunctor>);
REGISTER_OP_GPU_KERNEL(
reduce_max,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MaxFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_max_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MaxOrMinGradFunctor>);
REGISTER_OP_GPU_KERNEL(
reduce_min,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MinFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_min_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MaxOrMinGradFunctor>);
#define REGISTER_REDUCE_GPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_GPU_KERNEL( \
reduce_type, \
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::functor>); \
REGISTER_OP_GPU_KERNEL(reduce_type##_grad, \
ops::ReduceGradKernel<paddle::platform::GPUPlace, \
float, ops::grad_functor>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_REDUCE_GPU_KERNEL);
......@@ -198,3 +198,9 @@ class ReduceGradKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(reduce_sum, SumFunctor, SumGradFunctor); \
__macro(reduce_mean, MeanFunctor, MeanGradFunctor); \
__macro(reduce_max, MaxFunctor, MaxOrMinGradFunctor); \
__macro(reduce_min, MinFunctor, MaxOrMinGradFunctor);
......@@ -26,7 +26,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
// input check
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReshapeOp should not be null.");
......@@ -94,7 +94,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null.");
......
......@@ -22,7 +22,7 @@ class RmspropOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("MeanSquare"),
......
......@@ -26,7 +26,7 @@ class ScaleOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ScaleOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......
......@@ -23,7 +23,7 @@ class ScatterOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Ref"),
"Input(Ref) of ScatterOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Index"),
......@@ -60,7 +60,7 @@ class ScatterGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("Updates"),
ctx->GetInputDim("Updates"));
ctx->SetOutputDim(framework::GradVarName("Ref"), ctx->GetInputDim("Ref"));
......
......@@ -22,7 +22,7 @@ class SequencePoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequencePoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -74,7 +74,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null.");
......
......@@ -22,7 +22,7 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceSoftmaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -67,7 +67,7 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Out"),
"Input(Out) of SequenceSoftmaxGradOp should not be null.");
PADDLE_ENFORCE(
......
......@@ -22,7 +22,7 @@ class SGDOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of SGDOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
......
......@@ -24,7 +24,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"),
"Input(Labels) should be not null.");
......@@ -53,7 +53,7 @@ class SigmoidCrossEntropyWithLogitsGradOp
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"),
"Input(Labels) should be not null.");
......
......@@ -22,7 +22,7 @@ class SmoothL1LossOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
......@@ -94,7 +94,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
......
......@@ -22,7 +22,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SoftmaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"),
......@@ -69,7 +69,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
"Input(Y@GRAD) should be not null.");
......
......@@ -83,7 +83,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Logits"),
"Input(Logits) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
......@@ -128,7 +128,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
"Input(Loss@Grad) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Softmax"),
......
......@@ -24,7 +24,7 @@ class SplitOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SplitOp should not be null.");
PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL,
......
......@@ -22,7 +22,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SquaredL2DistanceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"),
......@@ -86,7 +86,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
......
......@@ -22,7 +22,7 @@ class SumOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null");
auto x_dims = ctx->GetInputsDim("X");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......
......@@ -22,7 +22,7 @@ class TopkOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of TopkOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......
......@@ -24,7 +24,7 @@ class TransposeOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null");
auto x_dims = ctx->GetInputDim("X");
......@@ -93,7 +93,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
......
......@@ -47,7 +47,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of UniformRandomOp should not be null.");
......
......@@ -117,7 +117,6 @@ void BindProgramDesc(py::module &m) {
.def("append_block", &ProgramDescBind::AppendBlock,
py::return_value_policy::reference)
.def("block", &ProgramDescBind::Block, py::return_value_policy::reference)
.def("__str__", &ProgramDescBind::DebugString)
.def("num_blocks", &ProgramDescBind::Size);
}
......@@ -191,8 +190,6 @@ void BindOpDesc(py::module &m) {
.def("output", &OpDescBind::Output)
.def("output_names", &OpDescBind::OutputNames)
.def("set_output", &OpDescBind::SetOutput)
.def("__str__", &OpDescBind::DebugString)
.def("__repr__", &OpDescBind::DebugString)
.def("has_attr", &OpDescBind::HasAttr)
.def("attr_type", &OpDescBind::GetAttrType)
.def("attr_names", &OpDescBind::AttrNames)
......
......@@ -10,7 +10,8 @@ There are:
* EndPass
"""
__all__ = [
'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult'
'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult',
'EndForwardBackward'
]
......@@ -73,6 +74,17 @@ class BeginIteration(object):
self.batch_id = batch_id
class EndForwardBackward(object):
"""
Event On One Batch ForwardBackward Complete.
"""
def __init__(self, pass_id, batch_id, gm):
self.pass_id = pass_id
self.batch_id = batch_id
self.gm = gm
class EndIteration(WithMetric):
"""
Event On One Batch Training Complete.
......
......@@ -137,21 +137,26 @@ class TestBRelu(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestLeakyRelu(OpTest):
class TestRelu6(OpTest):
def setUp(self):
self.op_type = "leaky_relu"
alpha = 0.02
self.attrs = {'alpha': alpha}
self.inputs = {'X': np.random.uniform(-3, 3, [4, 4]).astype("float32")}
self.op_type = "relu6"
x = np.random.uniform(-1, 1, [4, 10]).astype("float32")
threshold = 6.0
# The same with TestAbs
x[np.abs(x) < 0.005] = 0.02
x[np.abs(x - threshold) < 0.005] = threshold + 0.02
self.inputs = {'X': x}
self.attrs = {'threshold': threshold}
self.outputs = {
'Y': np.maximum(self.inputs['X'], alpha * self.inputs['X'])
'Y': np.minimum(np.maximum(self.inputs['X'], 0), threshold)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestSoftRelu(OpTest):
......
import unittest
import numpy as np
from op_test import OpTest
class TestAdamaxOp1(OpTest):
def setUp(self):
'''Test Adamax Operator with supplied attributes
'''
self.op_type = "adamax"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.78
beta2 = 0.899
epsilon = 1e-5
beta1_pow = beta1**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
self.attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
def test_check_output(self):
self.check_output()
class TestAdamaxOp2(OpTest):
'''Test Adamax Operator with default attributes
'''
def setUp(self):
self.op_type = "adamax"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
def test_check_output(self):
self.check_output()
class TestAdamaxOpMultipleSteps(OpTest):
def setUp(self):
'''Test Adamax Operator with supplied attributes
'''
self.op_type = "adamax"
self.num_steps = 10
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.8
beta2 = 0.99
epsilon = 1e-5
beta1_pow = 1
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
self.attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
def test_check_output(self):
for _ in range(self.num_steps):
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
# Verify output for this step
self.check_output()
# Output of this step becomes input for next step
self.inputs['Param'] = param_out
self.inputs['Moment'] = moment_out
self.inputs['InfNorm'] = inf_norm_out
self.inputs['Beta1Pow'] = beta1_pow_out
# Randomize gradient for next step
self.inputs['Grad'] = np.random.uniform(
-1, 1, (102, 105)).astype("float32")
def adamax_step(inputs, attributes):
'''
Simulate one step of the adamax optimizer
:param inputs: dict of inputs
:param attributes: dict of attributes
:return tuple: tuple of output param, moment, inf_norm and
beta1 power accumulator
'''
param = inputs['Param']
grad = inputs['Grad']
moment = inputs['Moment']
inf_norm = inputs['InfNorm']
lr = inputs['LearningRate']
beta1_pow = inputs['Beta1Pow']
beta1 = attributes['beta1']
beta2 = attributes['beta2']
epsilon = attributes['epsilon']
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = np.maximum(beta2 * inf_norm + epsilon, np.abs(grad))
beta1_pow_out = beta1_pow * beta1
lr_t = (lr / (1 - beta1_pow_out))
param_out = param - lr_t * np.divide(moment_out, inf_norm_out)
return param_out, moment_out, inf_norm_out, beta1_pow_out
if __name__ == "__main__":
unittest.main()
......@@ -164,11 +164,18 @@ class SGD(object):
pass_type)
self.__gradient_machine__.eval(pass_evaluator)
self.__gradient_machine__.eval(batch_evaluator)
event_handler(
v2_event.EndForwardBackward(
pass_id=pass_id,
batch_id=batch_id,
gm=self.__gradient_machine__))
for each_param in self.__gradient_machine__.getNonStaticParameters(
):
self.__parameter_updater__.update(each_param)
cost_sum = out_args.sum()
cost = cost_sum / len(data_batch)
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
event_handler(
v2_event.EndIteration(
pass_id=pass_id,
......@@ -176,8 +183,6 @@ class SGD(object):
cost=cost,
evaluator=batch_evaluator,
gm=self.__gradient_machine__))
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
self.__parameter_updater__.finishPass()
pass_evaluator.finish()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册